├── 12306.py
├── Netease
    ├── Netease.py
    └── music_list.txt
├── README.md
├── baiduwenku.py
├── baiduwenku_pro_1.py
├── baiwan
    ├── app.js
    ├── baiwan.py
    ├── file.txt
    ├── index.html
    └── question.txt
├── bilibili
    ├── README.md
    ├── bilibili.py
    └── xml2ass.py
├── biqukan.py
├── cartoon
    ├── cartoon
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-34.pyc
    │   │   ├── items.cpython-34.pyc
    │   │   ├── pipelines.cpython-34.pyc
    │   │   └── settings.cpython-34.pyc
    │   ├── items.py
    │   ├── middlewares.py
    │   ├── pipelines.py
    │   ├── settings.py
    │   └── spiders
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-34.pyc
    │   │       └── comic_spider.cpython-34.pyc
    │   │   └── comic_spider.py
    └── scrapy.cfg
├── daili.py
├── dingdong
    ├── README.md
    └── jd.py
├── douyin.py
├── douyin_pro.py
├── douyin_pro_2.py
├── downloader.py
├── financical.py
├── geetest.py
├── hero.py
├── one_hour_spider
    ├── biqukan.py
    ├── unsplash.py
    └── vidoe_downloader.py
├── shuaia.py
└── video_downloader
    ├── Images
        ├── bg.png
        └── qrcode.png
    ├── MyQR
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-34.pyc
        │   ├── __init__.cpython-35.pyc
        │   ├── myqr.cpython-34.pyc
        │   ├── myqr.cpython-35.pyc
        │   └── terminal.cpython-35.pyc
        ├── mylibs
        │   ├── ECC.py
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── ECC.cpython-34.pyc
        │   │   ├── ECC.cpython-35.pyc
        │   │   ├── __init__.cpython-34.pyc
        │   │   ├── __init__.cpython-35.pyc
        │   │   ├── constant.cpython-34.pyc
        │   │   ├── constant.cpython-35.pyc
        │   │   ├── data.cpython-34.pyc
        │   │   ├── data.cpython-35.pyc
        │   │   ├── draw.cpython-34.pyc
        │   │   ├── draw.cpython-35.pyc
        │   │   ├── matrix.cpython-34.pyc
        │   │   ├── matrix.cpython-35.pyc
        │   │   ├── structure.cpython-34.pyc
        │   │   ├── structure.cpython-35.pyc
        │   │   ├── theqrmodule.cpython-34.pyc
        │   │   └── theqrmodule.cpython-35.pyc
        │   ├── constant.py
        │   ├── data.py
        │   ├── draw.py
        │   ├── matrix.py
        │   ├── structure.py
        │   └── theqrmodule.py
        ├── myqr.py
        └── terminal.py
    ├── requirements.txt
    └── video_downloader.py


/12306.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: liuyw
  4 | """
  5 | from splinter.browser import Browser
  6 | from time import sleep
  7 | import traceback
  8 | import time, sys
  9 | 
 10 | class huoche(object):
 11 | 	driver_name = ''
 12 | 	executable_path = ''
 13 | 	#用户名，密码
 14 | 	username = u"xxx"
 15 | 	passwd = u"xxx"
 16 | 	# cookies值得自己去找, 下面两个分别是沈阳, 哈尔滨
 17 | 	starts = u"%u6C88%u9633%2CSYT"
 18 | 	ends = u"%u54C8%u5C14%u6EE8%2CHBB"
 19 | 	
 20 | 	# 时间格式2018-01-19
 21 | 	dtime = u"2018-01-19"
 22 | 	# 车次，选择第几趟，0则从上之下依次点击
 23 | 	order = 0
 24 | 	###乘客名
 25 | 	users = [u"xxx",u"xxx"]
 26 | 	##席位
 27 | 	xb = u"二等座"
 28 | 	pz = u"成人票"
 29 | 
 30 | 	"""网址"""
 31 | 	ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init"
 32 | 	login_url = "https://kyfw.12306.cn/otn/login/init"
 33 | 	initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306"
 34 | 	buy = "https://kyfw.12306.cn/otn/confirmPassenger/initDc"
 35 | 	
 36 | 	def __init__(self):
 37 | 		self.driver_name = 'chrome'
 38 | 		self.executable_path = 'D:/chromedriver'
 39 | 
 40 | 	def login(self):
 41 | 		self.driver.visit(self.login_url)
 42 | 		self.driver.fill("loginUserDTO.user_name", self.username)
 43 | 		# sleep(1)
 44 | 		self.driver.fill("userDTO.password", self.passwd)
 45 | 		print(u"等待验证码，自行输入...")
 46 | 		while True:
 47 | 			if self.driver.url != self.initmy_url:
 48 | 				sleep(1)
 49 | 			else:
 50 | 				break
 51 | 
 52 | 	def start(self):
 53 | 		self.driver = Browser(driver_name=self.driver_name,executable_path=self.executable_path)
 54 | 		self.driver.driver.set_window_size(1400, 1000)
 55 | 		self.login()
 56 | 		# sleep(1)
 57 | 		self.driver.visit(self.ticket_url)
 58 | 		try:
 59 | 			print(u"购票页面开始...")
 60 | 			# sleep(1)
 61 | 			# 加载查询信息
 62 | 			self.driver.cookies.add({"_jc_save_fromStation": self.starts})
 63 | 			self.driver.cookies.add({"_jc_save_toStation": self.ends})
 64 | 			self.driver.cookies.add({"_jc_save_fromDate": self.dtime})
 65 | 
 66 | 			self.driver.reload()
 67 | 
 68 | 			count = 0
 69 | 			if self.order != 0:
 70 | 				while self.driver.url == self.ticket_url:
 71 | 					self.driver.find_by_text(u"查询").click()
 72 | 					count += 1
 73 | 					print(u"循环点击查询... 第 %s 次" % count)
 74 | 					# sleep(1)
 75 | 					try:
 76 | 						self.driver.find_by_text(u"预订")[self.order - 1].click()
 77 | 					except Exception as e:
 78 | 						print(e)
 79 | 						print(u"还没开始预订")
 80 | 						continue
 81 | 			else:
 82 | 				while self.driver.url == self.ticket_url:
 83 | 					self.driver.find_by_text(u"查询").click()
 84 | 					count += 1
 85 | 					print(u"循环点击查询... 第 %s 次" % count)
 86 | 					# sleep(0.8)
 87 | 					try:
 88 | 						for i in self.driver.find_by_text(u"预订"):
 89 | 							i.click()
 90 | 							sleep(1)
 91 | 					except Exception as e:
 92 | 						print(e)
 93 | 						print(u"还没开始预订 %s" % count)
 94 | 						continue
 95 | 			print(u"开始预订...")
 96 | 			# sleep(3)
 97 | 			# self.driver.reload()
 98 | 			sleep(1)
 99 | 			print(u'开始选择用户...')
100 | 			for user in self.users:
101 | 				self.driver.find_by_text(user).last.click()
102 | 
103 | 			print(u"提交订单...")
104 | 			sleep(1)
105 | 			self.driver.find_by_text(self.pz).click()
106 | 			self.driver.find_by_id('').select(self.pz)
107 | 			# sleep(1)
108 | 			self.driver.find_by_text(self.xb).click()
109 | 			sleep(1)
110 | 			self.driver.find_by_id('submitOrder_id').click()
111 | 			print(u"开始选座...")
112 | 			self.driver.find_by_id('1D').last.click()
113 | 			self.driver.find_by_id('1F').last.click()
114 | 
115 | 			sleep(1.5)
116 | 			print(u"确认选座...")
117 | 			self.driver.find_by_id('qr_submit_id').click()
118 | 
119 | 		except Exception as e:
120 | 			print(e)
121 | 
122 | if __name__ == '__main__':
123 | 	huoche = huoche()
124 | 	huoche.start()


--------------------------------------------------------------------------------
/Netease/Netease.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import requests, hashlib, sys, click, re, base64, binascii, json, os
  3 | from Crypto.Cipher import AES
  4 | from http import cookiejar
  5 | 
  6 | """
  7 | Website:http://cuijiahua.com
  8 | Author:Jack Cui
  9 | Refer:https://github.com/darknessomi/musicbox
 10 | """
 11 | 
 12 | class Encrypyed():
 13 | 	"""
 14 | 	解密算法
 15 | 	"""
 16 | 	def __init__(self):
 17 | 		self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
 18 | 		self.nonce = '0CoJUm6Qyw8W8jud'
 19 | 		self.pub_key = '010001'
 20 | 
 21 | 	# 登录加密算法, 基于https://github.com/stkevintan/nw_musicbox脚本实现
 22 | 	def encrypted_request(self, text):
 23 | 		text = json.dumps(text)
 24 | 		sec_key = self.create_secret_key(16)
 25 | 		enc_text = self.aes_encrypt(self.aes_encrypt(text, self.nonce), sec_key.decode('utf-8'))
 26 | 		enc_sec_key = self.rsa_encrpt(sec_key, self.pub_key, self.modulus)
 27 | 		data = {'params': enc_text, 'encSecKey': enc_sec_key}
 28 | 		return data
 29 | 
 30 | 	def aes_encrypt(self, text, secKey):
 31 | 		pad = 16 - len(text) % 16
 32 | 		text = text + chr(pad) * pad
 33 | 		encryptor = AES.new(secKey.encode('utf-8'), AES.MODE_CBC, b'0102030405060708')
 34 | 		ciphertext = encryptor.encrypt(text.encode('utf-8'))
 35 | 		ciphertext = base64.b64encode(ciphertext).decode('utf-8')
 36 | 		return ciphertext
 37 | 
 38 | 	def rsa_encrpt(self, text, pubKey, modulus):
 39 | 		text = text[::-1]
 40 | 		rs = pow(int(binascii.hexlify(text), 16), int(pubKey, 16), int(modulus, 16))
 41 | 		return format(rs, 'x').zfill(256)
 42 | 
 43 | 	def create_secret_key(self, size):
 44 | 		return binascii.hexlify(os.urandom(size))[:16]
 45 | 
 46 | 
 47 | class Song():
 48 | 	"""
 49 | 	歌曲对象，用于存储歌曲的信息
 50 | 	"""
 51 | 	def __init__(self, song_id, song_name, song_num, song_url=None):
 52 | 		self.song_id = song_id
 53 | 		self.song_name = song_name
 54 | 		self.song_num = song_num
 55 | 		self.song_url = '' if song_url is None else song_url
 56 | 
 57 | class Crawler():
 58 | 	"""
 59 | 	网易云爬取API
 60 | 	"""
 61 | 	def __init__(self, timeout=60, cookie_path='.'):
 62 | 		self.headers = {
 63 | 			'Accept': '*/*',
 64 | 			'Accept-Encoding': 'gzip,deflate,sdch',
 65 | 			'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
 66 | 			'Connection': 'keep-alive',
 67 | 			'Content-Type': 'application/x-www-form-urlencoded',
 68 | 			'Host': 'music.163.com',
 69 | 			'Referer': 'http://music.163.com/search/',
 70 | 			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
 71 | 		}
 72 | 		self.session = requests.Session()
 73 | 		self.session.headers.update(self.headers)
 74 | 		self.session.cookies = cookiejar.LWPCookieJar(cookie_path)
 75 | 		self.download_session = requests.Session()
 76 | 		self.timeout = timeout
 77 | 		self.ep = Encrypyed()
 78 | 
 79 | 	def post_request(self, url, params):
 80 | 		"""
 81 | 		Post请求
 82 | 		:return: 字典
 83 | 		"""
 84 | 
 85 | 		data = self.ep.encrypted_request(params)
 86 | 		resp = self.session.post(url, data=data, timeout=self.timeout)
 87 | 		result = resp.json()
 88 | 		if result['code'] != 200:
 89 | 			click.echo('post_request error')
 90 | 		else:
 91 | 		    return result
 92 | 
 93 | 	def search(self, search_content, search_type, limit=9):
 94 | 		"""
 95 | 		搜索API
 96 | 		:params search_content: 搜索内容
 97 | 		:params search_type: 搜索类型
 98 | 		:params limit: 返回结果数量
 99 | 		:return: 字典.
100 | 		"""
101 | 
102 | 		url = 'http://music.163.com/weapi/cloudsearch/get/web?csrf_token='
103 | 		params = {'s': search_content, 'type': search_type, 'offset': 0, 'sub': 'false', 'limit': limit}
104 | 		result = self.post_request(url, params)
105 | 		return result
106 | 
107 | 	def search_song(self, song_name, song_num, quiet=True, limit=9):
108 | 		"""
109 | 		根据音乐名搜索
110 | 		:params song_name: 音乐名
111 | 		:params song_num: 下载的歌曲数
112 | 		:params quiet: 自动选择匹配最优结果
113 | 		:params limit: 返回结果数量
114 | 		:return: Song独享
115 | 		"""
116 | 
117 | 		result = self.search(song_name, search_type=1, limit=limit)
118 | 
119 | 		if result['result']['songCount'] <= 0:
120 | 			click.echo('Song {} not existed.'.format(song_name))
121 | 		else:
122 | 			songs = result['result']['songs']
123 | 			if quiet:
124 | 				song_id, song_name = songs[0]['id'], songs[0]['name']
125 | 				song = Song(song_id=song_id, song_name=song_name, song_num=song_num)
126 | 				return song
127 | 
128 | 	def get_song_url(self, song_id, bit_rate=320000):
129 | 		"""
130 | 		获得歌曲的下载地址
131 | 		:params song_id: 音乐ID<int>.
132 | 		:params bit_rate: {'MD 128k': 128000, 'HD 320k': 320000}
133 | 		:return: 歌曲下载地址
134 | 		"""
135 | 
136 | 		url = 'http://music.163.com/weapi/song/enhance/player/url?csrf_token='
137 | 		csrf = ''
138 | 		params = {'ids': [song_id], 'br': bit_rate, 'csrf_token': csrf}
139 | 		result = self.post_request(url, params)
140 | 		# 歌曲下载地址
141 | 		song_url = result['data'][0]['url']
142 | 
143 | 		# 歌曲不存在
144 | 		if song_url is None:
145 | 			click.echo('Song {} is not available due to copyright issue.'.format(song_id))
146 | 		else:
147 | 			return song_url
148 | 
149 | 	def get_song_by_url(self, song_url, song_name, song_num, folder):
150 | 		"""
151 | 		下载歌曲到本地
152 | 		:params song_url: 歌曲下载地址
153 | 		:params song_name: 歌曲名字
154 | 		:params song_num: 下载的歌曲数
155 | 		:params folder: 保存路径
156 | 		"""
157 | 		if not os.path.exists(folder):
158 | 			os.makedirs(folder)
159 | 		fpath = os.path.join(folder, str(song_num) + '_' + song_name + '.mp3')
160 | 		if sys.platform == 'win32' or sys.platform == 'cygwin':
161 | 			valid_name = re.sub(r'[<>:"/\\|?*]', '', song_name)
162 | 			if valid_name != song_name:
163 | 				click.echo('{} will be saved as: {}.mp3'.format(song_name, valid_name))
164 | 				fpath = os.path.join(folder, str(song_num) + '_' + valid_name + '.mp3')
165 | 		
166 | 		if not os.path.exists(fpath):
167 | 			resp = self.download_session.get(song_url, timeout=self.timeout, stream=True)
168 | 			length = int(resp.headers.get('content-length'))
169 | 			label = 'Downloading {} {}kb'.format(song_name, int(length/1024))
170 | 
171 | 			with click.progressbar(length=length, label=label) as progressbar:
172 | 				with open(fpath, 'wb') as song_file:
173 | 					for chunk in resp.iter_content(chunk_size=1024):
174 | 						if chunk:
175 | 							song_file.write(chunk)
176 | 							progressbar.update(1024)
177 | 
178 | 
179 | class Netease():
180 | 	"""
181 | 	网易云音乐下载
182 | 	"""
183 | 	def __init__(self, timeout, folder, quiet, cookie_path):
184 | 		self.crawler = Crawler(timeout, cookie_path)
185 | 		self.folder = '.' if folder is None else folder
186 | 		self.quiet = quiet
187 | 
188 | 	def download_song_by_search(self, song_name, song_num):
189 | 		"""
190 | 		根据歌曲名进行搜索
191 | 		:params song_name: 歌曲名字
192 | 		:params song_num: 下载的歌曲数
193 | 		"""
194 | 
195 | 		try:
196 | 			song = self.crawler.search_song(song_name, song_num, self.quiet)
197 | 		except:
198 | 			click.echo('download_song_by_serach error')
199 | 		# 如果找到了音乐, 则下载
200 | 		if song != None:
201 | 			self.download_song_by_id(song.song_id, song.song_name, song.song_num, self.folder)
202 | 
203 | 	def download_song_by_id(self, song_id, song_name, song_num, folder='.'):
204 | 		"""
205 | 		通过歌曲的ID下载
206 | 		:params song_id: 歌曲ID
207 | 		:params song_name: 歌曲名
208 | 		:params song_num: 下载的歌曲数
209 | 		:params folder: 保存地址
210 | 		"""
211 | 		try:
212 | 			url = self.crawler.get_song_url(song_id)
213 | 			# 去掉非法字符
214 | 			song_name = song_name.replace('/', '')
215 | 			song_name = song_name.replace('.', '')
216 | 			self.crawler.get_song_by_url(url, song_name, song_num, folder)
217 | 
218 | 		except:
219 | 			click.echo('download_song_by_id error')
220 | 
221 | 
222 | if __name__ == '__main__':
223 | 	timeout = 60
224 | 	output = 'Musics'
225 | 	quiet = True
226 | 	cookie_path = 'Cookie'
227 | 	netease = Netease(timeout, output, quiet, cookie_path)
228 | 	music_list_name = 'music_list.txt'
229 | 	# 如果music列表存在, 那么开始下载
230 | 	if os.path.exists(music_list_name):
231 | 		with open(music_list_name, 'r') as f:
232 | 			music_list = list(map(lambda x: x.strip(), f.readlines()))
233 | 		for song_num, song_name in enumerate(music_list):
234 | 			netease.download_song_by_search(song_name,song_num + 1)
235 | 	else:
236 | 		click.echo('music_list.txt not exist.')


--------------------------------------------------------------------------------
/Netease/music_list.txt:
--------------------------------------------------------------------------------
 1 | 風見鶏
 2 | 外婆的话【不才】
 3 | We Don't Talk Anymore
 4 | 【电吉他】《青鸟》
 5 | 小棋童
 6 | 千本桜(古筝版)
 7 | 妄为
 8 | 借我
 9 | 你到底有没有爱过我
10 | 七月上
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python Spider
  2 | 
  3 | * 贵有恒，何必三更起五更睡；最无益，只怕一日暴十寒。
  4 | * Python3爬虫实战：实战源码+博客讲解
  5 | * [个人网站](http://cuijiahua.com "悬停显示")
  6 | * [CSDN博客](http://blog.csdn.net/c406495762 "悬停显示")
  7 | * [CSDN爬虫专栏](http://blog.csdn.net/column/details/15321.html "悬停显示")<br>
  8 | * 学习交流群【328127489】<a target="_blank" href="//shang.qq.com/wpa/qunwpa?idkey=e70f3fcff3761450fda9b43eadc1910dac308a962ef9e3e87941cd2c681c4bb4"><img border="0" src="https://github.com/Jack-Cherish/Pictures/blob/master/qqgroup.png" alt="Coder" title="Coder"></a><br>
  9 | 
 10 | ## 声明
 11 | 
 12 | * 代码、教程均为Jack Cui本人原创，且仅限于学习交流，请勿用于任何商业用途！
 13 | 
 14 | ### 文章首发声明
 15 | 
 16 | * 文章在自己的个人网站首发，其他平台文章均属转发，如想获得最新更新进展，欢迎关注我的个人网站：http://cuijiahua.com/
 17 | 
 18 | ## 目录
 19 | 
 20 | * [爬虫小工具](#爬虫小工具)
 21 |     * [文件下载小助手](https://github.com/Jack-Cherish/python-spider/blob/master/downloader.py "悬停显示")
 22 | * [爬虫实战](#爬虫实战)
 23 |     * [笔趣看小说下载](https://github.com/Jack-Cherish/python-spider/blob/master/biqukan.py "悬停显示")
 24 |     * [VIP视频下载](https://github.com/Jack-Cherish/python-spider/tree/master/video_downloader "悬停显示")
 25 |     * [百度文库文章下载_rev1](https://github.com/Jack-Cherish/python-spider/blob/master/baiduwenku.py "悬停显示")
 26 |     * [百度文库文章下载_rev2](https://github.com/Jack-Cherish/python-spider/blob/master/baiduwenku_pro_1.py "悬停显示")
 27 |     * [《帅啊》网帅哥图片下载](https://github.com/Jack-Cherish/python-spider/blob/master/shuaia.py "悬停显示")
 28 |     * [构建代理IP池](https://github.com/Jack-Cherish/python-spider/blob/master/daili.py "悬停显示")
 29 |     * [《火影忍者》漫画下载](https://github.com/Jack-Cherish/python-spider/tree/master/cartoon "悬停显示")
 30 |     * [财务报表下载小助手](https://github.com/Jack-Cherish/python-spider/blob/master/financical.py "悬停显示")
 31 |     * [一小时入门网络爬虫](https://github.com/Jack-Cherish/python-spider/tree/master/one_hour_spider "悬停显示")
 32 |     * [抖音App视频下载_rev1](https://github.com/Jack-Cherish/python-spider/blob/master/douyin.py "悬停显示")
 33 |     * [抖音App视频下载_rev2](https://github.com/Jack-Cherish/python-spider/blob/master/douyin_pro.py "悬停显示")
 34 |     * [抖音App视频下载_rev3](https://github.com/Jack-Cherish/python-spider/blob/master/douyin_pro_2.py "悬停显示")
 35 |     * [GEETEST验证码破解](https://github.com/Jack-Cherish/python-spider/blob/master/geetest.py "悬停显示")
 36 |     * [12306抢票小助手](https://github.com/Jack-Cherish/python-spider/blob/master/12306.py "悬停显示")
 37 |     * [百万英雄答题辅助系统](https://github.com/Jack-Cherish/python-spider/tree/master/baiwan "悬停显示")   
 38 |     * [网易云音乐批量下载](https://github.com/Jack-Cherish/python-spider/tree/master/Netease "悬停显示")
 39 |     * [B站视频和弹幕批量下载](https://github.com/Jack-Cherish/python-spider/tree/master/bilibili "悬停显示")
 40 |     * [京东商品晒单图下载](https://github.com/Jack-Cherish/python-spider/tree/master/dingdong "悬停显示")
 41 | * [其它](#其它)
 42 | 
 43 | ## 爬虫小工具
 44 | 
 45 | * downloader.py:文件下载小助手
 46 | 
 47 | 	一个可以用于下载图片、视频、文件的小工具，有下载进度显示功能。稍加修改即可添加到自己的爬虫中。
 48 | 	
 49 | 	动态示意图：
 50 | 	
 51 | 	![image](https://raw.githubusercontent.com/Jack-Cherish/Pictures/master/9.gif)
 52 | 
 53 | ## 爬虫实战
 54 |  
 55 |  * biqukan.py:《笔趣看》盗版小说网站，爬取小说工具
 56 | 
 57 | 	第三方依赖库安装：
 58 | 
 59 | 		pip3 install beautifulsoup4
 60 | 
 61 | 	使用方法：
 62 | 
 63 | 		python biqukan.py
 64 | 
 65 |  * video_downloader：爱奇艺等主流视频网站的VIP视频破解助手(暂只支持PC和手机在线观看VIP视频！)
 66 | 
 67 | 	感谢Python3二维码生成器作者：https://github.com/sylnsfar/qrcode
 68 | 	
 69 | 	编译好的软件下载连接：https://pan.baidu.com/s/1bqSTNJL 密码:p8bs
 70 | 	
 71 | 	解压密码：`cuijiahua.com`
 72 | 	
 73 | 	无需Python3环境，在Windows下，解压即用！[软件使用方法](http://blog.csdn.net/c406495762/article/details/71334633 "悬停显示")
 74 | 	
 75 | 	源码可查看`video_downloader`，运行源码需要搭建Python3环境，并安装相应第三方依赖库：
 76 | 	
 77 | 	在`video_downloader`文件夹下，安装第三方依赖库：
 78 | 
 79 | 		pip3 install -r requirements.txt
 80 | 
 81 | 	使用方法：
 82 | 	
 83 | 		python movie_downloader.py
 84 | 
 85 | 	运行环境：
 86 | 	
 87 | 		Windows, Python3
 88 | 		
 89 | 		Linux, Python3
 90 | 		
 91 | 		Mac, Python3
 92 | 
 93 |  * baiduwenku.py: 百度文库word文章爬取
 94 | 	
 95 | 	原理说明：http://blog.csdn.net/c406495762/article/details/72331737
 96 | 	
 97 | 	代码不完善，没有进行打包，不具通用性，纯属娱乐，以后有时间会完善。
 98 | 	
 99 |  * shuaia.py: 爬取《帅啊》网，帅哥图片
100 | 
101 | 	《帅啊》网URL：http://www.shuaia.net/index.html
102 | 
103 | 	原理说明：http://blog.csdn.net/c406495762/article/details/72597755
104 | 	
105 | 	第三方依赖库安装：
106 | 	
107 | 		pip3 install requests beautifulsoup4
108 | 		
109 |  * daili.py: 构建代理IP池
110 | 
111 | 	原理说明：http://blog.csdn.net/c406495762/article/details/72793480
112 | 	
113 | 	
114 |  * carton: 使用Scrapy爬取《火影忍者》漫画
115 | 
116 | 	代码可以爬取整个《火影忍者》漫画所有章节的内容，保存到本地。更改地址，可以爬取其他漫画。保存地址可以在settings.py中修改。
117 | 	
118 | 	动漫网站：http://comic.kukudm.com/
119 | 	
120 | 	原理说明：http://blog.csdn.net/c406495762/article/details/72858983
121 | 	
122 |  * hero.py: 《王者荣耀》推荐出装查询小助手
123 | 
124 | 	网页爬取已经会了，想过爬取手机APP里的内容吗？
125 | 	
126 | 	原理说明：http://blog.csdn.net/c406495762/article/details/76850843
127 | 	
128 |  * financical.py: 财务报表下载小助手
129 | 
130 | 	爬取的数据存入数据库会吗？《跟股神巴菲特学习炒股之财务报表入库(MySQL)》也许能给你一些思路。
131 | 	
132 | 	原理说明：http://blog.csdn.net/c406495762/article/details/77801899
133 | 	
134 | 	动态示意图：
135 | 	
136 | 	![image](https://raw.githubusercontent.com/Jack-Cherish/Pictures/master/10.gif)
137 | 	
138 |  * one_hour_spider:一小时入门Python3网络爬虫。
139 | 
140 | 	原理说明:
141 | 	
142 | 	 * 知乎：https://zhuanlan.zhihu.com/p/29809609
143 | 	 * CSDN：http://blog.csdn.net/c406495762/article/details/78123502
144 | 	
145 | 	本次实战内容有：
146 | 	
147 | 	 * 网络小说下载(静态网站)-biqukan
148 | 	 * 优美壁纸下载(动态网站)-unsplash
149 | 	 * 爱奇艺VIP视频下载
150 | 	 
151 |  * douyin.py:抖音App视频下载
152 |  
153 | 	抖音App的视频下载，就是普通的App爬取。
154 | 
155 | 	原理说明:
156 | 	
157 | 	 * 个人网站：http://cuijiahua.com/blog/2018/03/spider-5.html
158 | 	
159 |  * douyin_pro:抖音App视频下载（升级版）
160 |  
161 | 	抖音App的视频下载，添加视频解析网站，支持无水印视频下载，使用第三方平台解析。
162 | 
163 | 	原理说明:
164 | 	
165 | 	 * 个人网站：http://cuijiahua.com/blog/2018/03/spider-5.html
166 | 	 
167 |  * douyin_pro_2:抖音App视频下载（升级版2）
168 |  
169 | 	抖音App的视频下载，添加视频解析网站，支持无水印视频下载，通过url解析，无需第三方平台。
170 | 	
171 | 	原理说明:
172 | 	
173 | 	 * 个人网站：http://cuijiahua.com/blog/2018/03/spider-5.html
174 | 	 
175 | 	动态示意图：
176 | 	
177 | 	![image](https://github.com/Jack-Cherish/Pictures/blob/master/14.gif)
178 | 	
179 |  * geetest.py:GEETEST验证码破解
180 |  
181 |  	爬虫最大的敌人之一是什么？没错，验证码！Geetest作为提供验证码服务的行家，市场占有率还是蛮高的。遇到Geetest提供的滑动验证码怎么破？授人予鱼不如授人予渔，接下来就为大家呈现本教程的精彩内容。
182 |  
183 |  	原理说明:
184 | 	
185 | 	 * 个人网站：http://www.cuijiahua.com/blog/2017/11/spider_2_geetest.html
186 | 	 
187 | 	动态示意图：
188 | 	
189 | 	![image](https://github.com/Jack-Cherish/Pictures/blob/master/spider_2_1.gif)
190 | 	
191 |  * 12306.py:用Python抢火车票简单代码
192 |  
193 | 	可以自己慢慢丰富，蛮简单，有爬虫基础很好操作，没有原理说明。
194 | 	
195 |  * baiwan:百万英雄辅助答题
196 |  
197 | 	效果图：
198 | 	
199 | 	![image](https://github.com/Jack-Cherish/Pictures/blob/master/11.gif)
200 | 	
201 | 	原理说明：
202 | 	
203 | 	* 个人网站：http://cuijiahua.com/blog/2018/01/spider_3.html
204 | 	
205 |   	功能介绍：
206 | 	
207 | 	服务器端，使用Python（baiwan.py）通过抓包获得的接口获取答题数据，解析之后通过百度知道搜索接口匹配答案，将最终匹配的结果写入文件（file.txt)。
208 | 	
209 | 	手机抓包不会的朋友，可以看下我的早期[手机APP抓包教程](http://blog.csdn.net/c406495762/article/details/76850843 "悬停显示")。
210 | 	
211 | 	Node.js（app.js）每隔1s读取一次file.txt文件，并将读取结果通过socket.io推送给客户端（index.html）。
212 | 	
213 | 	亲测答题延时在3s左右。
214 | 	
215 | 	声明：没做过后端和前端，花了一天时间，现学现卖弄好的，javascript也是现看现用，百度的程序，调试调试而已。可能有很多用法比较low的地方，用法不对，请勿见怪，有大牛感兴趣，可以自行完善。
216 | 
217 |  * Netease:根据歌单下载网易云音乐
218 |  	
219 | 	效果图：
220 | 	
221 | 	![image](https://github.com/Jack-Cherish/Pictures/blob/master/13.gif)
222 | 	
223 | 	原理说明：
224 | 	
225 | 	暂无
226 | 	
227 | 	功能介绍：
228 | 	
229 | 	根据music_list.txt文件里的歌单的信息下载网易云音乐，将自己喜欢的音乐进行批量下载。
230 | 
231 |  * bilibili：B站视频和弹幕批量下载
232 |  	
233 | 	原理说明：
234 | 	
235 | 	暂无
236 | 	
237 | 	使用说明：
238 | 	
239 |         python bilibili.py -d 猫 -k 猫 -p 10
240 | 
241 |         三个参数：
242 |         -d	保存视频的文件夹名
243 |         -k	B站搜索的关键字
244 |         -p	下载搜索结果前多少页
245 | 
246 |  * jingdong：京东商品晒单图下载
247 |  
248 |  	效果图：
249 | 	
250 | 	![image](https://github.com/Jack-Cherish/Pictures/blob/master/jd.gif)
251 |  	
252 | 	原理说明：
253 | 	
254 | 	暂无
255 | 	
256 | 	使用说明：
257 | 	
258 |         python jd.py -k 芒果
259 | 
260 |         三个参数：
261 |         -d	保存图片的路径，默认为fd.py文件所在文件夹
262 |         -k	搜索关键词
263 |         -n  	下载商品的晒单图个数，即n个商店的晒单图
264 | 
265 | ## 其它
266 | 
267 |  * 欢迎 Pull requests，感谢贡献。
268 | 


--------------------------------------------------------------------------------
/baiduwenku.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:UTF-8 -*-
 2 | from selenium import webdriver
 3 | from bs4 import BeautifulSoup
 4 | import re
 5 | import time
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 | 	options = webdriver.ChromeOptions()
10 | 	options.add_argument('user-agent="Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"')
11 | 	driver = webdriver.Chrome('J:\迅雷下载\chromedriver.exe', chrome_options=options)
12 | 	driver.get('https://wenku.baidu.com/view/aa31a84bcf84b9d528ea7a2c.html')
13 | 
14 | 	html = driver.page_source
15 | 	bf1 = BeautifulSoup(html, 'lxml')
16 | 	result = bf1.find_all(class_='rtcspage')
17 | 	bf2 = BeautifulSoup(str(result[0]), 'lxml')
18 | 	title = bf2.div.div.h1.string
19 | 	pagenum = bf2.find_all(class_='size')
20 | 	pagenum = BeautifulSoup(str(pagenum), 'lxml').span.string
21 | 	pagepattern = re.compile('页数：(\d+)页')
22 | 	num = int(pagepattern.findall(pagenum)[0])
23 | 	print('文章标题：%s' % title)
24 | 	print('文章页数：%d' % num)
25 | 
26 | 
27 | 	while True:
28 | 		num = num / 5.0
29 | 		html = driver.page_source
30 | 		bf1 = BeautifulSoup(html, 'lxml')
31 | 		result = bf1.find_all(class_='rtcspage')
32 | 		for each_result in result:
33 | 			bf2 = BeautifulSoup(str(each_result), 'lxml')
34 | 			texts = bf2.find_all('p')
35 | 			for each_text in texts:
36 | 				main_body = BeautifulSoup(str(each_text), 'lxml')
37 | 				for each in main_body.find_all(True):
38 | 					if each.name == 'span':
39 | 						print(each.string.replace('\xa0',''),end='')
40 | 					elif each.name == 'br':
41 | 						print('')
42 | 			print('\n')
43 | 		if num > 1:
44 | 			page = driver.find_elements_by_xpath("//div[@class='page']")
45 | 			driver.execute_script('arguments[0].scrollIntoView();', page[-1]) #拖动到可见的元素去
46 | 			nextpage = driver.find_element_by_xpath("//a[@data-fun='next']")
47 | 			nextpage.click()
48 | 			time.sleep(3)
49 | 		else:
50 | 			break


--------------------------------------------------------------------------------
/baiduwenku_pro_1.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import re
  3 | import json
  4 | import os
  5 | 
  6 | session = requests.session()
  7 | 
  8 | 
  9 | def fetch_url(url):
 10 |     return session.get(url).content.decode('gbk')
 11 | 
 12 | 
 13 | def get_doc_id(url):
 14 |     return re.findall('view/(.*).html', url)[0]
 15 | 
 16 | 
 17 | def parse_type(content):
 18 |     return re.findall(r"docType.*?\:.*?\'(.*?)\'\,", content)[0]
 19 | 
 20 | 
 21 | def parse_title(content):
 22 |     return re.findall(r"title.*?\:.*?\'(.*?)\'\,", content)[0]
 23 | 
 24 | 
 25 | def parse_doc(content):
 26 |     result = ''
 27 |     url_list = re.findall('(https.*?0.json.*?)\\\\x22}', content)
 28 |     url_list = [addr.replace("\\\\\\/", "/") for addr in url_list]
 29 |     for url in url_list[:-5]:
 30 |         content = fetch_url(url)
 31 |         y = 0
 32 |         txtlists = re.findall('"c":"(.*?)".*?"y":(.*?),', content)
 33 |         for item in txtlists:
 34 |             if not y == item[1]:
 35 |                 y = item[1]
 36 |                 n = '\n'
 37 |             else:
 38 |                 n = ''
 39 |             result += n
 40 |             result += item[0].encode('utf-8').decode('unicode_escape', 'ignore')
 41 |     return result
 42 | 
 43 | 
 44 | def parse_txt(doc_id):
 45 |     content_url = 'https://wenku.baidu.com/api/doc/getdocinfo?callback=cb&doc_id=' + doc_id
 46 |     content = fetch_url(content_url)
 47 |     md5 = re.findall('"md5sum":"(.*?)"', content)[0]
 48 |     pn = re.findall('"totalPageNum":"(.*?)"', content)[0]
 49 |     rsign = re.findall('"rsign":"(.*?)"', content)[0]
 50 |     content_url = 'https://wkretype.bdimg.com/retype/text/' + doc_id + '?rn=' + pn + '&type=txt' + md5 + '&rsign=' + rsign
 51 |     content = json.loads(fetch_url(content_url))
 52 |     result = ''
 53 |     for item in content:
 54 |         for i in item['parags']:
 55 |             result += i['c'].replace('\\r', '\r').replace('\\n', '\n')
 56 |     return result
 57 | 
 58 | 
 59 | def parse_other(doc_id):
 60 |     content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + doc_id + "&pn=1&rn=99999&type=ppt"
 61 |     content = fetch_url(content_url)
 62 |     url_list = re.findall('{"zoom":"(.*?)","page"', content)
 63 |     url_list = [item.replace("\\", '') for item in url_list]
 64 |     if not os.path.exists(doc_id):
 65 |         os.mkdir(doc_id)
 66 |     for index, url in enumerate(url_list):
 67 |         content = session.get(url).content
 68 |         path = os.path.join(doc_id, str(index) + '.jpg')
 69 |         with open(path, 'wb') as f:
 70 |             f.write(content)
 71 |     print("图片保存在" + doc_id + "文件夹")
 72 | 
 73 | 
 74 | def save_file(filename, content):
 75 |     with open(filename, 'w', encoding='utf8') as f:
 76 |         f.write(content)
 77 |         print('已保存为:' + filename)
 78 | 
 79 | 
 80 | # test_txt_url = 'https://wenku.baidu.com/view/cbb4af8b783e0912a3162a89.html?from=search'
 81 | # test_ppt_url = 'https://wenku.baidu.com/view/2b7046e3f78a6529657d5376.html?from=search'
 82 | # test_pdf_url = 'https://wenku.baidu.com/view/dd6e15c1227916888586d795.html?from=search'
 83 | # test_xls_url = 'https://wenku.baidu.com/view/eb4a5bb7312b3169a551a481.html?from=search'
 84 | def main():
 85 |     url = input('请输入要下载的文库URL地址')
 86 |     content = fetch_url(url)
 87 |     doc_id = get_doc_id(url)
 88 |     type = parse_type(content)
 89 |     title = parse_title(content)
 90 |     if type == 'doc':
 91 |         result = parse_doc(content)
 92 |         save_file(title + '.txt', result)
 93 |     elif type == 'txt':
 94 |         result = parse_txt(doc_id)
 95 |         save_file(title + '.txt', result)
 96 |     else:
 97 |         parse_other(doc_id)
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     main()
102 | 


--------------------------------------------------------------------------------
/baiwan/app.js:
--------------------------------------------------------------------------------
 1 | var http = require('http');
 2 | var fs = require('fs');
 3 | var schedule = require("node-schedule"); 
 4 | var message = {};
 5 | var count = 0;
 6 | var server = http.createServer(function (req,res){
 7 |     fs.readFile('./index.html',function(error,data){
 8 |         res.writeHead(200,{'Content-Type':'text/html'});
 9 |         res.end(data,'utf-8');
10 |     });
11 | }).listen(80);
12 | console.log('Server running!');
13 | var lineReader = require('line-reader');
14 | function messageGet(){
15 |     lineReader.eachLine('file.txt', function(line, last) {
16 |         count++;
17 |         var name = 'line' + count;
18 |         console.log(name);
19 | 	console.log(line);
20 |         message[name] = line;
21 |     });  
22 |     if(count == 25){
23 |     	count = 0;
24 |     }
25 |     else{
26 |     	for(var i = count+1; i <= 25; i++){
27 |   	    var name = 'line' + i;
28 |             message[name] = 'f';
29 | 	}
30 |   	count = 0;
31 |     }
32 | }
33 | var io = require('socket.io').listen(server);
34 | var rule = new schedule.RecurrenceRule();
35 | var times = [];
36 | for(var i=1; i<1800; i++){
37 |     times.push(i);
38 | }
39 | rule.second = times;
40 | schedule.scheduleJob(rule, function(){
41 |         messageGet();
42 | });
43 | io.sockets.on('connection',function(socket){
44 |        // console.log('User connected' + count + 'user(s) present');
45 |         socket.emit('users',message);
46 |         socket.broadcast.emit('users',message);
47 | 
48 |     socket.on('disconnect',function(){
49 |         console.log('User disconnected');
50 |         //socket.broadcast.emit('users',message);  
51 |     });
52 | });
53 | 


--------------------------------------------------------------------------------
/baiwan/baiwan.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8 -*-
  2 | import requests
  3 | from lxml import etree
  4 | from bs4 import BeautifulSoup
  5 | import urllib
  6 | import time, re, types, os
  7 | 
  8 | 
  9 | """
 10 | 代码写的匆忙，本来想再重构下，完善好注释再发，但是比较忙，想想算了，所以自行完善吧！写法很不规范，勿见怪。
 11 | 
 12 | 作者：  Jack Cui
 13 | Website:http://cuijiahua.com
 14 | 注:     本软件仅用于学习交流，请勿用于任何商业用途！
 15 | """
 16 | 
 17 | class BaiWan():
 18 | 	def __init__(self):
 19 | 		# 百度知道搜索接口
 20 | 		self.baidu = 'http://zhidao.baidu.com/search?'
 21 | 		# 百万英雄及接口,每个人的接口都不一样，里面包含的手机信息，因此不公布，请自行抓包，有疑问欢迎留言：http://cuijiahua.com/liuyan.html
 22 | 		self.api = 'https://api-spe-ttl.ixigua.com/xxxxxxx={}'.format(int(time.time()*1000))
 23 | 
 24 | 	# 获取答案并解析问题
 25 | 	def get_question(self):
 26 | 		to = True
 27 | 		while to:
 28 | 			list_dir = os.listdir('./')
 29 | 			if 'question.txt' not in list_dir:
 30 | 				fw = open('question.txt', 'w')
 31 | 				fw.write('百万英雄尚未出题请稍后!')
 32 | 				fw.close()		
 33 | 			go = True
 34 | 			while go:
 35 | 				req = requests.get(self.api, verify=False)
 36 | 				req.encoding = 'utf-8'
 37 | 				html = req.text
 38 | 
 39 | 				print(html)
 40 | 				if '*' in html:
 41 | 					question_start = html.index('*')
 42 | 					try:
 43 | 						
 44 | 						question_end = html.index('？')
 45 | 					except:
 46 | 						question_end = html.index('?')
 47 | 					question = html[question_start:question_end][2:]
 48 | 					if question != None:
 49 | 						fr = open('question.txt', 'r')
 50 | 						text = fr.readline()
 51 | 						fr.close()
 52 | 						if text != question:
 53 | 							print(question)
 54 | 							go = False
 55 | 							with open('question.txt', 'w') as f:
 56 | 								f.write(question)
 57 | 						else:
 58 | 							time.sleep(1)
 59 | 					else:
 60 | 						to = False
 61 | 				else:
 62 | 					to = False
 63 | 
 64 | 			temp = re.findall(r'[\u4e00-\u9fa5a-zA-Z0-9\+\-\*/]', html[question_end+1:])
 65 | 			b_index = []
 66 | 			print(temp)
 67 | 
 68 | 			for index, each in enumerate(temp):
 69 | 				if each == 'B':
 70 | 					b_index.append(index)
 71 | 				elif each == 'P' and (len(temp) - index) <= 3 :
 72 | 					b_index.append(index)
 73 | 					break
 74 | 
 75 | 			if len(b_index) == 4:
 76 | 				a = ''.join(temp[b_index[0] + 1:b_index[1]])
 77 | 				b = ''.join(temp[b_index[1] + 1:b_index[2]])
 78 | 				c = ''.join(temp[b_index[2] + 1:b_index[3]])
 79 | 				alternative_answers = [a,b,c]
 80 | 
 81 | 				if '下列' in question:
 82 | 					question = a + ' ' + b + ' ' + c + ' ' + question.replace('下列', '')
 83 | 				elif '以下' in question:
 84 | 					question = a + ' ' + b + ' ' + c + ' ' + question.replace('以下', '')
 85 | 			else:
 86 | 				alternative_answers = []
 87 | 			# 根据问题和备选答案搜索答案
 88 | 			self.search(question, alternative_answers)
 89 | 			time.sleep(1)
 90 | 
 91 | 	def search(self, question, alternative_answers):
 92 | 		print(question)
 93 | 		print(alternative_answers)
 94 | 		infos = {"word":question}
 95 | 		# 调用百度接口
 96 | 		url = self.baidu + 'lm=0&rn=10&pn=0&fr=search&ie=gbk&' + urllib.parse.urlencode(infos, encoding='GB2312')
 97 | 		print(url)
 98 | 		headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
 99 | 		}
100 | 		sess = requests.Session()
101 | 		req = sess.get(url = url, headers=headers, verify=False)
102 | 		req.encoding = 'gbk'
103 | 		# print(req.text)
104 | 		bf = BeautifulSoup(req.text, 'lxml')
105 | 		answers = bf.find_all('dd',class_='dd answer')
106 | 		for answer in answers:
107 | 			print(answer.text)
108 | 
109 | 		# 推荐答案
110 | 		recommend = ''
111 | 		if alternative_answers != []:
112 | 			best = []
113 | 			print('\n')
114 | 			for answer in answers:
115 | 				# print(answer.text)
116 | 				for each_answer in alternative_answers:
117 | 					if each_answer in answer.text:
118 | 						best.append(each_answer)
119 | 						print(each_answer,end=' ')
120 | 						# print(answer.text)
121 | 						print('\n')
122 | 						break
123 | 			statistics = {}
124 | 			for each in best:
125 | 				if each not in statistics.keys():
126 | 					statistics[each] = 1
127 | 				else:
128 | 					statistics[each] += 1
129 | 			errors = ['没有', '不是', '不对', '不正确','错误','不包括','不包含','不在','错']
130 | 			error_list = list(map(lambda x: x in question, errors))
131 | 			print(error_list)
132 | 			if sum(error_list) >= 1:
133 | 				for each_answer in alternative_answers:
134 | 					if each_answer not in statistics.items():
135 | 						recommend = each_answer
136 | 						print('推荐答案：', recommend)
137 | 						break
138 | 			elif statistics != {}:
139 | 				recommend = sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0]
140 | 				print('推荐答案：', recommend)
141 | 
142 | 		# 写入文件
143 | 		with open('file.txt', 'w') as f:
144 | 			f.write('问题：' + question)
145 | 			f.write('\n')
146 | 			f.write('*' * 50)
147 | 			f.write('\n')
148 | 			if alternative_answers != []:
149 | 				f.write('选项：')
150 | 				for i in range(len(alternative_answers)):
151 | 					f.write(alternative_answers[i])
152 | 					f.write('  ')
153 | 			f.write('\n')
154 | 			f.write('*' * 50)
155 | 			f.write('\n')
156 | 			f.write('参考答案：\n')
157 | 			for answer in answers:
158 | 				f.write(answer.text)
159 | 				f.write('\n')
160 | 			f.write('*' * 50)
161 | 			f.write('\n')
162 | 			if recommend != '':
163 | 				f.write('最终答案请自行斟酌！\t')
164 | 				f.write('推荐答案：' + sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0])
165 | 
166 | 
167 | if __name__ == '__main__':
168 | 	bw = BaiWan()
169 | 	bw.get_question()


--------------------------------------------------------------------------------
/baiwan/file.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/baiwan/file.txt


--------------------------------------------------------------------------------
/baiwan/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <meta charset="utf-8" />
  5 |     <meta http-equiv="refresh" content="2">
  6 |     <title>Jack Cui答题辅助系统</title>
  7 |   </head>
  8 |   <body>
  9 |     <h1>百万英雄答题辅助系统</h1>
 10 |     <p id="line1"></p>
 11 |     <p id="line2"></p>
 12 |     <p id="line3"></p>
 13 |     <p id="line4"></p>
 14 |     <p id="line5"></p>
 15 |     <p id="line6"></p>
 16 |     <p id="line7"></p>
 17 |     <p id="line8"></p>
 18 |     <p id="line9"></p>
 19 |     <p id="line10"></p>
 20 |     <p id="line11"></p>
 21 |     <p id="line12"></p>
 22 |     <p id="line13"></p>
 23 |     <p id="line14"></p>
 24 |     <p id="line15"></p>
 25 |     <p id="line16"></p>
 26 |     <p id="line17"></p>
 27 |     <p id="line18"></p>
 28 |     <p id="line19"></p>
 29 |     <p id="line20"></p>
 30 |     <p id="line21"></p>
 31 |     <p id="line22"></p>
 32 |     <p id="line23"></p>
 33 |     <p id="line24"></p>
 34 |     <p id="line25"></p>
 35 |     <script src="http://222.222.124.77:9001/jquery.min.js"></script>
 36 |     <script src="/socket.io/socket.io.js"></script>
 37 |     <script>
 38 |       var socket = io.connect('http://你的IP:端口');
 39 |       var line1 = document.getElementById('line1');
 40 |       var line2 = document.getElementById('line2');
 41 |       var line3 = document.getElementById('line3');
 42 |       var line4 = document.getElementById('line4');
 43 |       var line5 = document.getElementById('line5');
 44 |       var line6 = document.getElementById('line6');
 45 |       var line7 = document.getElementById('line7');
 46 |       var line8 = document.getElementById('line8');
 47 |       var line9 = document.getElementById('line9');
 48 |       var line10 = document.getElementById('line10');
 49 |       var line11 = document.getElementById('line11');
 50 |       var line12 = document.getElementById('line12');
 51 |       var line13 = document.getElementById('line13');
 52 |       var line14 = document.getElementById('line14');
 53 |       var line15 = document.getElementById('line15');
 54 |       var line16 = document.getElementById('line16');
 55 |       var line17 = document.getElementById('line17');
 56 |       var line18 = document.getElementById('line18');
 57 |       var line19 = document.getElementById('line19');
 58 |       var line20 = document.getElementById('line20');
 59 |       var line21 = document.getElementById('line21');
 60 |       var line22 = document.getElementById('line22');
 61 |       var line23 = document.getElementById('line23');
 62 |       var line24 = document.getElementById('line24');
 63 |       var line25 = document.getElementById('line25');
 64 |       socket.on('users',function(data){
 65 |         if(data.line1 == 'f'){
 66 |            line1.innerHTML = '' 
 67 |         }
 68 |         else{
 69 |            line1.innerHTML = data.line1
 70 | 		}
 71 |         if(data.line2 == 'f'){
 72 |            line2.innerHTML = '' 
 73 |         }
 74 |         else{
 75 |            line2.innerHTML = data.line2
 76 | 		}
 77 |         if(data.line3 == 'f'){
 78 |            line3.innerHTML = '' 
 79 |         }
 80 |         else{
 81 |            line3.innerHTML = data.line3
 82 | 		}
 83 |         if(data.line4 == 'f'){
 84 |            line4.innerHTML = '' 
 85 |         }
 86 |         else{
 87 |            line4.innerHTML = data.line4
 88 |         }
 89 | 		if(data.line5 == 'f'){
 90 |            line5.innerHTML = '' 
 91 |         }
 92 |         else{
 93 |            line5.innerHTML = data.line5
 94 |         }
 95 | 		if(data.line6 == 'f'){
 96 |            line6.innerHTML = '' 
 97 |         }
 98 | 		else{
 99 |            line6.innerHTML = data.line6
100 |         }
101 | 		if(data.line7 == 'f'){
102 |            line7.innerHTML = ''
103 |         }
104 |         else{
105 |            line7.innerHTML = data.line7
106 |         }
107 | 		if(data.line8 == 'f'){
108 |            line8.innerHTML = '' 
109 | 		}
110 | 		else{
111 | 		   line8.innerHTML = data.line8
112 | 		}
113 |         if(data.line9 == 'f'){
114 |            line9.innerHTML = '' 
115 | 		}
116 | 		else{
117 | 		   line9.innerHTML = data.line9
118 | 		}
119 |         if(data.line10 == 'f'){
120 |            line10.innerHTML = '' 
121 |         }
122 |         else{
123 |            line10.innerHTML = data.line10
124 |         }
125 | 		if(data.line11 == 'f'){
126 |            line11.innerHTML = '' 
127 |         }
128 |         else{
129 |            line11.innerHTML = data.line11
130 |         }
131 | 		if(data.line12 == 'f'){
132 |            line12.innerHTML = '' 
133 |         }
134 |         else{
135 |            line12.innerHTML = data.line12
136 |         }
137 | 		if(data.line13 == 'f'){
138 |            line13.innerHTML = '' 
139 |         }
140 |         else{
141 |            line13.innerHTML = data.line13
142 |         }
143 | 		if(data.line14 == 'f'){
144 |            line14.innerHTML = '' 
145 |         }
146 |         else{
147 |            line14.innerHTML = data.line14
148 |         }
149 | 		if(data.line15 == 'f'){
150 |            line15.innerHTML = '' 
151 |         }
152 |         else{
153 |            line15.innerHTML = data.line15
154 |         }
155 | 		if(data.line16 == 'f'){
156 |            line16.innerHTML = ''
157 |         }
158 |         else{
159 |            line16.innerHTML = data.line16
160 | 		}
161 |         if(data.line17 == 'f'){
162 |            line17.innerHTML = '' 
163 | 		}
164 | 		else{
165 | 		   line17.innerHTML = data.line17
166 | 		}
167 |         if(data.line18 == 'f'){
168 |            line18.innerHTML = '' 
169 |         }
170 |         else{
171 |            line18.innerHTML = data.line18
172 | 		}
173 |         if(data.line19 == 'f'){
174 |            line19.innerHTML = '' 
175 |         }
176 |         else{
177 |            line19.innerHTML = data.line19
178 | 		}
179 |         if(data.line20 == 'f'){
180 |            line20.innerHTML = '' 
181 |         }
182 |         else{
183 |            line20.innerHTML = data.line20
184 | 		}
185 |         if(data.line21 == 'f'){
186 |            line21.innerHTML = '' 
187 |         }
188 |         else{
189 |            line21.innerHTML = data.line21
190 |         }
191 | 		if(data.line22 == 'f'){
192 |            line22.innerHTML = '' 
193 |         }
194 |         else{
195 |            line22.innerHTML = data.line22
196 |         }
197 | 		if(data.line23 == 'f'){
198 |            line23.innerHTML = '' 
199 |         }
200 | 		else{
201 |            line23.innerHTML = data.line23
202 |         }
203 | 		if(data.line24 == 'f'){
204 |            line24.innerHTML = ''
205 |         }
206 |         else{
207 |            line24.innerHTML = data.line24
208 |         }
209 | 		if(data.line25 == 'f'){
210 |            line25.innerHTML = '' 
211 | 		}
212 | 		else{
213 | 		   line25.innerHTML = data.line25
214 | 		}
215 |       });
216 |     </script>
217 | 
218 |   </body>
219 | </html>
220 | 


--------------------------------------------------------------------------------
/baiwan/question.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/baiwan/question.txt


--------------------------------------------------------------------------------
/bilibili/README.md:
--------------------------------------------------------------------------------
 1 | ## 功能
 2 | 
 3 | 下载B站视频和弹幕，将xml原生弹幕转换为ass弹幕文件，支持plotplayer等播放器的弹幕播放。
 4 | 
 5 | ## 作者
 6 | 
 7 | * Website: [http://cuijiahua.com](http://cuijiahua.com "悬停显示")
 8 | * Author: Jack Cui
 9 | * Date: 2018.6.12
10 | 
11 | ## 使用说明
12 | 
13 | 	python bilibili.py -d 猫 -k 猫 -p 10
14 | 
15 | 	三个参数：
16 | 	-d	保存视频的文件夹名
17 | 	-k	B站搜索的关键字
18 | 	-p	下载搜索结果前多少页
19 | 


--------------------------------------------------------------------------------
/bilibili/bilibili.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8 -*-
  2 | # Website: http://cuijiahua.com
  3 | # Author: Jack Cui
  4 | # Date: 2018.6.9
  5 | 
  6 | import requests, json, re, sys, os, urllib, argparse, time
  7 | from urllib.request import urlretrieve
  8 | from contextlib import closing
  9 | from urllib import parse
 10 | import xml2ass
 11 | 
 12 | class BiliBili:
 13 |     def __init__(self, dirname, keyword):
 14 | 		self.dn_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 15 | 			'Accept': '*/*',
 16 | 			'Accept-Encoding': 'gzip, deflate, br',
 17 | 			'Accept-Language': 'zh-CN,zh;q=0.9',
 18 | 			'Referer': 'https://search.bilibili.com/all?keyword=%s' % parse.quote(keyword)}
 19 | 
 20 | 		self.search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 21 | 			'Accept-Language': 'zh-CN,zh;q=0.9',
 22 | 			'Accept-Encoding': 'gzip, deflate, br',
 23 | 			'Accept': 'application/json, text/plain, */*'}
 24 | 
 25 | 		self.video_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 26 | 			'Accept-Language': 'zh-CN,zh;q=0.9',
 27 | 			'Accept-Encoding': 'gzip, deflate, br',
 28 | 			'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}
 29 | 
 30 | 		self.danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 31 | 			'Accept': '*/*',
 32 | 			'Accept-Encoding': 'gzip, deflate, br',
 33 | 			'Accept-Language': 'zh-CN,zh;q=0.9'}
 34 | 
 35 | 		self.sess = requests.Session()
 36 | 
 37 | 		self.dir = dirname
 38 | 
 39 | 	def video_downloader(self, video_url, video_name):
 40 | 		"""
 41 | 		视频下载
 42 | 		Parameters:
 43 | 			video_url: 带水印的视频地址
 44 | 			video_name: 视频名
 45 | 		Returns:
 46 | 			无
 47 | 		"""
 48 | 		size = 0
 49 | 		with closing(self.sess.get(video_url, headers=self.dn_headers, stream=True, verify=False)) as response:
 50 | 			chunk_size = 1024
 51 | 			content_size = int(response.headers['content-length'])
 52 | 			if response.status_code == 200:
 53 | 				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
 54 | 				video_name = os.path.join(self.dir, video_name)
 55 | 				with open(video_name, 'wb') as file:
 56 | 					for data in response.iter_content(chunk_size = chunk_size):
 57 | 						file.write(data)
 58 | 						size += len(data)
 59 | 						file.flush()
 60 | 
 61 | 						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
 62 | 						# sys.stdout.flush()
 63 | 						if size / content_size == 1:
 64 | 							print('\n')
 65 | 			else:
 66 | 				print('链接异常')
 67 | 
 68 | 	def search_video(self, search_url):
 69 | 		"""
 70 | 		搜索接口
 71 | 		Parameters:
 72 | 			search_url: 带水印的视频地址
 73 | 		Returns:
 74 | 			titles：视频名列表
 75 | 			arcurls: 视频播放地址列表
 76 | 		"""
 77 | 		req = self.sess.get(url=search_url, headers=self.search_headers, verify=False)
 78 | 		html = json.loads(req.text)
 79 | 		videos = html["data"]['result']
 80 | 		titles = []
 81 | 		arcurls = []
 82 | 		for video in videos:
 83 | 			titles.append(video['title'].replace('<em class="keyword">','').replace('</em>',''))
 84 | 			arcurls.append(video['arcurl'])
 85 | 		return titles, arcurls
 86 | 
 87 | 	def get_download_url(self, arcurl):
 88 | 		"""
 89 | 		获取视频下载地址
 90 | 		Parameters:
 91 | 			arcurl: 视频播放地址
 92 | 			oid：弹幕地址参数
 93 | 		Returns:
 94 | 			download_url：视频下载地址
 95 | 		"""
 96 | 		req = self.sess.get(url=arcurl, headers=self.video_headers, verify=False)
 97 | 		pattern = '.__playinfo__=(.*)</script><script>window.__INITIAL_STATE__='
 98 | 		try:
 99 | 			infos = re.findall(pattern, req.text)[0]
100 | 		except:
101 | 			return '',''
102 | 		html = json.loads(infos)
103 | 		durl = html['durl']
104 | 		print(durl)
105 | 		download_url = durl[0]['url']
106 | 		if 'mirrork' in download_url:
107 | 			oid = download_url.split('/')[6]
108 | 		else:
109 | 			oid = download_url.split('/')[7]
110 | 			if len(oid) >= 10:
111 | 				oid = download_url.split('/')[6]
112 | 		return download_url, oid
113 | 
114 | 
115 | 	def download_xml(self, danmu_url, danmu_name):
116 | 		"""
117 | 		获取视频XML原生弹幕
118 | 		Parameters:
119 | 			danmu_url: 弹幕地址
120 | 			danmu_name：弹幕xml文件保存名
121 | 		Returns:
122 | 			无
123 | 		"""
124 | 		with closing(self.sess.get(danmu_url, headers=self.danmu_header, stream=True, verify=False)) as response:  
125 | 			if response.status_code == 200:
126 | 				with open(danmu_name, 'wb') as file:
127 | 					for data in response.iter_content():
128 | 						file.write(data)
129 | 						file.flush()
130 | 			else:
131 | 				print('链接异常')
132 | 
133 | 	def get_danmu(self, oid, filename):
134 | 		"""
135 | 		下载弹幕
136 | 		Parameters:
137 | 			oid: 弹幕oid
138 | 			filename: 弹幕保存前缀名
139 | 		Returns:
140 | 			无
141 | 		"""
142 | 		danmu_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(oid)
143 | 		danmu_name = os.path.join(self.dir, filename + '.xml')
144 | 		danmu_ass = os.path.join(self.dir, filename + '.ass')
145 | 		self.download_xml(danmu_url, danmu_name)
146 | 		time.sleep(0.5)
147 | 		xml2ass.Danmaku2ASS(danmu_name, danmu_ass, 1280, 720)
148 | 		# os.remove(danmu_name)
149 | 
150 | 	def search_videos(self, keyword, pages):
151 | 		"""
152 | 		搜索视频
153 | 		Parameters:
154 | 			keyword: 搜索关键字
155 | 			pages：下载页数
156 | 		Returns:
157 | 			无
158 | 		"""
159 | 		if self.dir not in os.listdir():
160 | 			os.mkdir(self.dir)
161 | 		for page in range(1, pages+1):
162 | 			search_url = 'https://api.bilibili.com/x/web-interface/search/type?jsonp=jsonp&search_type=video&keyword={}&page={}'.format(keyword, page)
163 | 			titles, arcurls = self.search_video(search_url)
164 | 			for index, arcurl in enumerate(arcurls):
165 | 				title = titles[index]
166 | 				for c in u'´☆❤◦\/:*?"<>|':
167 | 					title = title.replace(c, '')
168 | 				if title + '.flv' not in os.listdir(self.dir):
169 | 					download_url, oid = self.get_download_url(arcurl)
170 | 					if download_url != '' and oid != '':
171 | 						print('第[ %d ]页:视频[ %s ]下载中:' % (page, title))
172 | 						self.video_downloader(download_url, title + '.flv')
173 | 						print('视频下载完成!')
174 | 						self.get_danmu(oid, title)
175 | 						print('弹幕下载完成!')
176 | 
177 | if __name__ == '__main__':
178 | 
179 | 	if len(sys.argv) == 1:
180 | 		sys.argv.append('--help')
181 | 
182 | 	parser = argparse.ArgumentParser()
183 | 	parser.add_argument('-d', '--dir', required=True, help=_('download path'))
184 | 	parser.add_argument('-k', '--keyword', required=True, help=_('search content'))
185 | 	parser.add_argument('-p', '--pages', required=True, help=_('the number of pages for downloading'), type=int, default=1)
186 | 	
187 | 	args = parser.parse_args()
188 | 	B = BiliBili(args.dir,args.keyword)
189 | 	B.search_videos(args.keyword, args.pages)
190 | 
191 | 	print('全部下载完成!')
192 | 
193 | 


--------------------------------------------------------------------------------
/bilibili/xml2ass.py:
--------------------------------------------------------------------------------
  1 | # The original author of this program, Danmaku2ASS, is StarBrilliant.
  2 | # This file is released under General Public License version 3.
  3 | # You should have received a copy of General Public License text alongside with
  4 | # this program. If not, you can obtain it at http://gnu.org/copyleft/gpl.html .
  5 | # This program comes with no warranty, the author will not be resopnsible for
  6 | # any damage or problems caused by this program.
  7 | 
  8 | import argparse
  9 | import calendar
 10 | import gettext
 11 | import io
 12 | import json
 13 | import logging
 14 | import math
 15 | import os
 16 | import random
 17 | import re
 18 | import sys
 19 | import time
 20 | import xml.dom.minidom
 21 | 
 22 | 
 23 | if sys.version_info < (3,):
 24 |     raise RuntimeError('at least Python 3.0 is required')
 25 | 
 26 | gettext.install('danmaku2ass', os.path.join(os.path.dirname(os.path.abspath(os.path.realpath(sys.argv[0] or 'locale'))), 'locale'))
 27 | 
 28 | def SeekZero(function):
 29 |     def decorated_function(file_):
 30 |         file_.seek(0)
 31 |         try:
 32 |             return function(file_)
 33 |         finally:
 34 |             file_.seek(0)
 35 |     return decorated_function
 36 | 
 37 | 
 38 | def EOFAsNone(function):
 39 |     def decorated_function(*args, **kwargs):
 40 |         try:
 41 |             return function(*args, **kwargs)
 42 |         except EOFError:
 43 |             return None
 44 |     return decorated_function
 45 | 
 46 | 
 47 | @SeekZero
 48 | @EOFAsNone
 49 | def ProbeCommentFormat(f):
 50 |     tmp = f.read(1)
 51 |     if tmp == '[':
 52 |         return 'Acfun'
 53 |         # It is unwise to wrap a JSON object in an array!
 54 |         # See this: http://haacked.com/archive/2008/11/20/anatomy-of-a-subtle-json-vulnerability.aspx/
 55 |         # Do never follow what Acfun developers did!
 56 |     elif tmp == '{':
 57 |         tmp = f.read(14)
 58 |         if tmp == '"status_code":':
 59 |             return 'Tudou'
 60 |         elif tmp == '"root":{"total':
 61 |             return 'sH5V'
 62 |     elif tmp == '<':
 63 |         tmp = f.read(1)
 64 |         if tmp == '?':
 65 |             tmp = f.read(38)
 66 |             if tmp == 'xml version="1.0" encoding="UTF-8"?><p':
 67 |                 return 'Niconico'
 68 |             elif tmp == 'xml version="1.0" encoding="UTF-8"?><i':
 69 |                 return 'Bilibili'
 70 |             elif tmp == 'xml version="1.0" encoding="utf-8"?><i':
 71 |                 return 'Bilibili'  # tucao.cc, with the same file format as Bilibili
 72 |             elif tmp == 'xml version="1.0" encoding="Utf-8"?>\n<':
 73 |                 return 'Bilibili'  # Komica, with the same file format as Bilibili
 74 |             elif tmp == 'xml version="1.0" encoding="UTF-8"?>\n<':
 75 |                 return 'MioMio'
 76 |         elif tmp == 'p':
 77 |             return 'Niconico'  # Himawari Douga, with the same file format as Niconico Douga
 78 | 
 79 | 
 80 | #
 81 | # ReadComments**** protocol
 82 | #
 83 | # Input:
 84 | #     f:         Input file
 85 | #     fontsize:  Default font size
 86 | #
 87 | # Output:
 88 | #     yield a tuple:
 89 | #         (timeline, timestamp, no, comment, pos, color, size, height, width)
 90 | #     timeline:  The position when the comment is replayed
 91 | #     timestamp: The UNIX timestamp when the comment is submitted
 92 | #     no:        A sequence of 1, 2, 3, ..., used for sorting
 93 | #     comment:   The content of the comment
 94 | #     pos:       0 for regular moving comment,
 95 | #                1 for bottom centered comment,
 96 | #                2 for top centered comment,
 97 | #                3 for reversed moving comment
 98 | #     color:     Font color represented in 0xRRGGBB,
 99 | #                e.g. 0xffffff for white
100 | #     size:      Font size
101 | #     height:    The estimated height in pixels
102 | #                i.e. (comment.count('\n')+1)*size
103 | #     width:     The estimated width in pixels
104 | #                i.e. CalculateLength(comment)*size
105 | #
106 | # After implementing ReadComments****, make sure to update ProbeCommentFormat
107 | # and CommentFormatMap.
108 | #
109 | 
110 | 
111 | def ReadCommentsNiconico(f, fontsize):
112 |     NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffcc00, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000, 'niconicowhite': 0xcccc99, 'white2': 0xcccc99, 'truered': 0xcc0033, 'red2': 0xcc0033, 'passionorange': 0xff6600, 'orange2': 0xff6600, 'madyellow': 0x999900, 'yellow2': 0x999900, 'elementalgreen': 0x00cc66, 'green2': 0x00cc66, 'marineblue': 0x33ffcc, 'blue2': 0x33ffcc, 'nobleviolet': 0x6633cc, 'purple2': 0x6633cc}
113 |     dom = xml.dom.minidom.parse(f)
114 |     comment_element = dom.getElementsByTagName('chat')
115 |     for comment in comment_element:
116 |         try:
117 |             c = str(comment.childNodes[0].wholeText)
118 |             if c.startswith('/'):
119 |                 continue  # ignore advanced comments
120 |             pos = 0
121 |             color = 0xffffff
122 |             size = fontsize
123 |             for mailstyle in str(comment.getAttribute('mail')).split():
124 |                 if mailstyle == 'ue':
125 |                     pos = 1
126 |                 elif mailstyle == 'shita':
127 |                     pos = 2
128 |                 elif mailstyle == 'big':
129 |                     size = fontsize*1.44
130 |                 elif mailstyle == 'small':
131 |                     size = fontsize*0.64
132 |                 elif mailstyle in NiconicoColorMap:
133 |                     color = NiconicoColorMap[mailstyle]
134 |             yield (max(int(comment.getAttribute('vpos')), 0)*0.01, int(comment.getAttribute('date')), int(comment.getAttribute('no')), c, pos, color, size, (c.count('\n')+1)*size, CalculateLength(c)*size)
135 |         except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
136 |             logging.warning(_('Invalid comment: %s') % comment.toxml())
137 |             continue
138 | 
139 | 
140 | def ReadCommentsAcfun(f, fontsize):
141 |     comment_element = json.load(f)
142 |     for i, comment in enumerate(comment_element):
143 |         try:
144 |             p = str(comment['c']).split(',')
145 |             assert len(p) >= 6
146 |             assert p[2] in ('1', '2', '4', '5', '7')
147 |             size = int(p[3])*fontsize/25.0
148 |             if p[2] != '7':
149 |                 c = str(comment['m']).replace('\\r', '\n').replace('\r', '\n')
150 |                 yield (float(p[0]), int(p[5]), i, c, {'1': 0, '2': 0, '4': 2, '5': 1}[p[2]], int(p[1]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
151 |             else:
152 |                 c = dict(json.loads(comment['m']))
153 |                 yield (float(p[0]), int(p[5]), i, c, 'acfunpos', int(p[1]), size, 0, 0)
154 |         except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
155 |             logging.warning(_('Invalid comment: %r') % comment)
156 |             continue
157 | 
158 | 
159 | def ReadCommentsBilibili(f, fontsize):
160 |     dom = xml.dom.minidom.parse(f)
161 |     comment_element = dom.getElementsByTagName('d')
162 |     for i, comment in enumerate(comment_element):
163 |         try:
164 |             p = str(comment.getAttribute('p')).split(',')
165 |             assert len(p) >= 5
166 |             assert p[1] in ('1', '4', '5', '6', '7')
167 |             if p[1] != '7':
168 |                 c = str(comment.childNodes[0].wholeText).replace('/n', '\n')
169 |                 size = int(p[2])*fontsize/25.0
170 |                 yield (float(p[0]), int(p[4]), i, c, {'1': 0, '4': 2, '5': 1, '6': 3}[p[1]], int(p[3]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
171 |             else:  # positioned comment
172 |                 c = str(comment.childNodes[0].wholeText)
173 |                 yield (float(p[0]), int(p[4]), i, c, 'bilipos', int(p[3]), int(p[2]), 0, 0)
174 |         except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
175 |             logging.warning(_('Invalid comment: %s') % comment.toxml())
176 |             continue
177 | 
178 | 
179 | def ReadCommentsTudou(f, fontsize):
180 |     comment_element = json.load(f)
181 |     for i, comment in enumerate(comment_element['comment_list']):
182 |         try:
183 |             assert comment['pos'] in (3, 4, 6)
184 |             c = str(comment['data'])
185 |             assert comment['size'] in (0, 1, 2)
186 |             size = {0: 0.64, 1: 1, 2: 1.44}[comment['size']]*fontsize
187 |             yield (int(comment['replay_time']*0.001), int(comment['commit_time']), i, c, {3: 0, 4: 2, 6: 1}[comment['pos']], int(comment['color']), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
188 |         except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
189 |             logging.warning(_('Invalid comment: %r') % comment)
190 |             continue
191 | 
192 | 
193 | def ReadCommentsMioMio(f, fontsize):
194 |     NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffc000, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000}
195 |     dom = xml.dom.minidom.parse(f)
196 |     comment_element = dom.getElementsByTagName('data')
197 |     for i, comment in enumerate(comment_element):
198 |         try:
199 |             message = comment.getElementsByTagName('message')[0]
200 |             c = str(message.childNodes[0].wholeText)
201 |             pos = 0
202 |             size = int(message.getAttribute('fontsize'))*fontsize/25.0
203 |             yield (float(comment.getElementsByTagName('playTime')[0].childNodes[0].wholeText), int(calendar.timegm(time.strptime(comment.getElementsByTagName('times')[0].childNodes[0].wholeText, '%Y-%m-%d %H:%M:%S')))-28800, i, c, {'1': 0, '4': 2, '5': 1}[message.getAttribute('mode')], int(message.getAttribute('color')), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
204 |         except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
205 |             logging.warning(_('Invalid comment: %s') % comment.toxml())
206 |             continue
207 | 
208 | 
209 | def ReadCommentsSH5V(f, fontsize):
210 |     comment_element = json.load(f)
211 |     for i, comment in enumerate(comment_element["root"]["bgs"]):
212 |         try:
213 |             c_at = str(comment['at'])
214 |             c_type = str(comment['type'])
215 |             c_date = str(comment['timestamp'])
216 |             c_color = str(comment['color'])
217 |             c = str(comment['text'])
218 |             size = fontsize
219 |             if c_type != '7':
220 |                 yield (float(c_at), int(c_date), i, c, {'0': 0, '1': 0, '4': 2, '5': 1}[c_type], int(c_color[1:], 16), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
221 |             else:
222 |                 c_x = float(comment['x'])
223 |                 c_y = float(comment['y'])
224 |                 size = int(comment['size'])
225 |                 dur = int(comment['dur'])
226 |                 data1 = float(comment['data1'])
227 |                 data2 = float(comment['data2'])
228 |                 data3 = int(comment['data3'])
229 |                 data4 = int(comment['data4'])
230 |                 yield (float(c_at), int(c_date), i, c, 'sH5Vpos', int(c_color[1:], 16), size, 0, 0, c_x, c_y, dur, data1, data2, data3, data4)
231 |         except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
232 |             logging.warning(_('Invalid comment: %r') % comment)
233 |             continue
234 | 
235 | 
236 | CommentFormatMap = {None: None, 'Niconico': ReadCommentsNiconico, 'Acfun': ReadCommentsAcfun, 'Bilibili': ReadCommentsBilibili, 'Tudou': ReadCommentsTudou, 'MioMio': ReadCommentsMioMio, 'sH5V': ReadCommentsSH5V}
237 | 
238 | 
239 | def WriteCommentBilibiliPositioned(f, c, width, height, styleid):
240 |     #BiliPlayerSize = (512, 384)  # Bilibili player version 2010
241 |     #BiliPlayerSize = (540, 384)  # Bilibili player version 2012
242 |     BiliPlayerSize = (672, 438)  # Bilibili player version 2014
243 |     ZoomFactor = GetZoomFactor(BiliPlayerSize, (width, height))
244 | 
245 |     def GetPosition(InputPos, isHeight):
246 |         isHeight = int(isHeight)  # True -> 1
247 |         if isinstance(InputPos, int):
248 |             return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
249 |         elif isinstance(InputPos, float):
250 |             if InputPos > 1:
251 |                 return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
252 |             else:
253 |                 return BiliPlayerSize[isHeight]*ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
254 |         else:
255 |             try:
256 |                 InputPos = int(InputPos)
257 |             except ValueError:
258 |                 InputPos = float(InputPos)
259 |             return GetPosition(InputPos, isHeight)
260 | 
261 |     try:
262 |         comment_args = safe_list(json.loads(c[3]))
263 |         text = ASSEscape(str(comment_args[4]).replace('/n', '\n'))
264 |         from_x = comment_args.get(0, 0)
265 |         from_y = comment_args.get(1, 0)
266 |         to_x = comment_args.get(7, from_x)
267 |         to_y = comment_args.get(8, from_y)
268 |         from_x = round(GetPosition(from_x, False))
269 |         from_y = round(GetPosition(from_y, True))
270 |         to_x = round(GetPosition(to_x, False))
271 |         to_y = round(GetPosition(to_y, True))
272 |         alpha = safe_list(str(comment_args.get(2, '1')).split('-'))
273 |         from_alpha = float(alpha.get(0, 1))
274 |         to_alpha = float(alpha.get(1, from_alpha))
275 |         from_alpha = 255-round(from_alpha*255)
276 |         to_alpha = 255-round(to_alpha*255)
277 |         rotate_z = int(comment_args.get(5, 0))
278 |         rotate_y = int(comment_args.get(6, 0))
279 |         lifetime = float(comment_args.get(3, 4500))
280 |         duration = int(comment_args.get(9, lifetime*1000))
281 |         delay = int(comment_args.get(10, 0))
282 |         fontface = comment_args.get(12)
283 |         isborder = comment_args.get(11, 'true')
284 |         styles = []
285 |         if (from_x, from_y) == (to_x, to_y):
286 |             styles.append('\\pos(%s, %s)' % (from_x, from_y))
287 |         else:
288 |             styles.append('\\move(%s, %s, %s, %s, %s, %s)' % (from_x, from_y, to_x, to_y, delay, delay+duration))
289 |         styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (from_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (from_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
290 |         if (from_x, from_y) != (to_x, to_y):
291 |             styles.append('\\t(%s, %s, ' % (delay, delay+duration))
292 |             styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
293 |             styles.append(')')
294 |         if fontface:
295 |             styles.append('\\fn%s' % ASSEscape(fontface))
296 |         styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
297 |         if c[5] != 0xffffff:
298 |             styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
299 |             if c[5] == 0x000000:
300 |                 styles.append('\\3c&HFFFFFF&')
301 |         if from_alpha == to_alpha:
302 |             styles.append('\\alpha&H%02X' % from_alpha)
303 |         elif (from_alpha, to_alpha) == (255, 0):
304 |             styles.append('\\fad(%s,0)' % (lifetime*1000))
305 |         elif (from_alpha, to_alpha) == (0, 255):
306 |             styles.append('\\fad(0, %s)' % (lifetime*1000))
307 |         else:
308 |             styles.append('\\fade(%(from_alpha)s, %(to_alpha)s, %(to_alpha)s, 0, %(end_time)s, %(end_time)s, %(end_time)s)' % {'from_alpha': from_alpha, 'to_alpha': to_alpha, 'end_time': lifetime*1000})
309 |         if isborder == 'false':
310 |             styles.append('\\bord0')
311 |         f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
312 |     except (IndexError, ValueError) as e:
313 |         try:
314 |             logging.warning(_('Invalid comment: %r') % c[3])
315 |         except IndexError:
316 |             logging.warning(_('Invalid comment: %r') % c)
317 | 
318 | 
319 | def WriteCommentAcfunPositioned(f, c, width, height, styleid):
320 |     AcfunPlayerSize = (560, 400)
321 |     ZoomFactor = GetZoomFactor(AcfunPlayerSize, (width, height))
322 | 
323 |     def GetPosition(InputPos, isHeight):
324 |         isHeight = int(isHeight)  # True -> 1
325 |         return AcfunPlayerSize[isHeight]*ZoomFactor[0]*InputPos*0.001+ZoomFactor[isHeight+1]
326 | 
327 |     def GetTransformStyles(x=None, y=None, scale_x=None, scale_y=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
328 |         styles = []
329 |         if x is not None and y is not None:
330 |             styles.append('\\pos(%s, %s)' % (x, y))
331 |         if scale_x is not None:
332 |             styles.append('\\fscx%s' % scale_x)
333 |         if scale_y is not None:
334 |             styles.append('\\fscy%s' % scale_y)
335 |         if rotate_z is not None and rotate_y is not None:
336 |             assert x is not None
337 |             assert y is not None
338 |             styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
339 |         if color is not None:
340 |             styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
341 |             if color == 0x000000:
342 |                 styles.append('\\3c&HFFFFFF&')
343 |         if alpha is not None:
344 |             alpha = 255-round(alpha*255)
345 |             styles.append('\\alpha&H%02X' % alpha)
346 |         return styles
347 | 
348 |     def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
349 |         if end_time > start_time:
350 |             f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
351 | 
352 |     try:
353 |         comment_args = c[3]
354 |         text = ASSEscape(str(comment_args['n']).replace('\r', '\n').replace('\r', '\n'))
355 |         common_styles = []
356 |         anchor = {0: 7, 1: 8, 2: 9, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2, 8: 3}.get(comment_args.get('c', 0), 7)
357 |         if anchor != 7:
358 |             common_styles.append('\\an%s' % anchor)
359 |         font = comment_args.get('w')
360 |         if font:
361 |             font = dict(font)
362 |             fontface = font.get('f')
363 |             if fontface:
364 |                 common_styles.append('\\fn%s' % ASSEscape(str(fontface)))
365 |             fontbold = bool(font.get('b'))
366 |             if fontbold:
367 |                 common_styles.append('\\b1')
368 |         common_styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
369 |         isborder = bool(comment_args.get('b', True))
370 |         if not isborder:
371 |             common_styles.append('\\bord0')
372 |         to_pos = dict(comment_args.get('p', {'x': 0, 'y': 0}))
373 |         to_x = round(GetPosition(int(to_pos.get('x', 0)), False))
374 |         to_y = round(GetPosition(int(to_pos.get('y', 0)), True))
375 |         to_scale_x = round(float(comment_args.get('e', 1.0))*100)
376 |         to_scale_y = round(float(comment_args.get('f', 1.0))*100)
377 |         to_rotate_z = float(comment_args.get('r', 0.0))
378 |         to_rotate_y = float(comment_args.get('k', 0.0))
379 |         to_color = c[5]
380 |         to_alpha = float(comment_args.get('a', 1.0))
381 |         from_time = float(comment_args.get('t', 0.0))
382 |         action_time = float(comment_args.get('l', 3.0))
383 |         actions = list(comment_args.get('z', []))
384 |         transform_styles = GetTransformStyles(to_x, to_y, to_scale_x, to_scale_y, to_rotate_z, to_rotate_y, to_color, to_alpha)
385 |         FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
386 |         for action in actions:
387 |             action = dict(action)
388 |             from_x, from_y = to_x, to_y
389 |             from_scale_x, from_scale_y = to_scale_x, to_scale_y
390 |             from_rotate_z, from_rotate_y = to_rotate_z, to_rotate_y
391 |             from_color, from_alpha = to_color, to_alpha
392 |             from_time += action_time
393 |             action_time = float(action.get('l', 0.0))
394 |             action_styles = []
395 |             if 'x' in action:
396 |                 to_x = round(GetPosition(int(action['x']), False))
397 |             if 'y' in action:
398 |                 to_y = round(GetPosition(int(action['y']), True))
399 |             if 'f' in action:
400 |                 to_scale_x = round(float(action['f'])*100)
401 |                 action_styles.append('\\fscx%s' % to_scale_x)
402 |             if 'g' in action:
403 |                 to_scale_y = round(float(action['g'])*100)
404 |                 action_styles.append('\\fscy%s' % to_scale_y)
405 |             if 'c' in action:
406 |                 to_color = int(action['c'])
407 |                 action_styles.append('\\c&H%02X%02X%02X&' % (to_color & 0xff, (to_color >> 8) & 0xff, (to_color >> 16) & 0xff))
408 |             if 't' in action:
409 |                 to_alpha = float(action['t'])
410 |                 action_styles.append('\\alpha&H%02X' % (255-round(to_alpha*255)))
411 |             if 'd' in action:
412 |                 to_rotate_z = float(action['d'])
413 |             if 'e' in action:
414 |                 to_rotate_y = float(action['e'])
415 |             if ('x' in action) or ('y' in action):
416 |                 transform_styles = GetTransformStyles(None, None, from_scale_x, from_scale_y, None, None, from_color, from_alpha)
417 |                 transform_styles.append('\\move(%s, %s, %s, %s)' % (from_x, from_y, to_x, to_y))
418 |                 action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
419 |             elif ('d' in action) or ('e' in action):
420 |                 action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
421 |             else:
422 |                 transform_styles = GetTransformStyles(from_x, from_y, from_scale_x, from_scale_y, from_rotate_z, from_rotate_y, from_color, from_alpha)
423 |             if action_styles:
424 |                 transform_styles.append('\\t(%s)' % (''.join(action_styles)))
425 |             FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
426 |     except (IndexError, ValueError) as e:
427 |         logging.warning(_('Invalid comment: %r') % c[3])
428 | 
429 | 
430 | def WriteCommentSH5VPositioned(f, c, width, height, styleid):
431 | 
432 |     def GetTransformStyles(x=None, y=None, fsize=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
433 |         styles = []
434 |         if x is not None and y is not None:
435 |             styles.append('\\pos(%s, %s)' % (x, y))
436 |         if fsize is not None:
437 |             styles.append('\\fs%s' % fsize)
438 |         if rotate_y is not None and rotate_z is not None:
439 |             styles.append('\\frz%s' % round(rotate_z))
440 |             styles.append('\\fry%s' % round(rotate_y))
441 |         if color is not None:
442 |             styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
443 |             if color == 0x000000:
444 |                 styles.append('\\3c&HFFFFFF&')
445 |         if alpha is not None:
446 |             alpha = 255-round(alpha*255)
447 |             styles.append('\\alpha&H%02X' % alpha)
448 |         return styles
449 | 
450 |     def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
451 |         if end_time > start_time:
452 |             f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
453 | 
454 |     try:
455 |         text = ASSEscape(str(c[3]))
456 |         to_x = round(float(c[9])*width)
457 |         to_y = round(float(c[10])*height)
458 |         to_rotate_z = -int(c[14])
459 |         to_rotate_y = -int(c[15])
460 |         to_color = c[5]
461 |         to_alpha = float(c[12])
462 |         #Note: Alpha transition hasn't been worked out yet.
463 |         to_size = round(int(c[6])*math.sqrt(width*height/307200))
464 |         #Note: Because sH5V's data is the absolute size of font,temporarily solve by it at present.[*math.sqrt(width/640*height/480)]
465 |         #But it seems to be working fine...
466 |         from_time = float(c[0])
467 |         action_time = float(c[11])/1000
468 |         transform_styles = GetTransformStyles(to_x, to_y, to_size, to_rotate_z, to_rotate_y, to_color, to_alpha)
469 |         FlushCommentLine(f, text, transform_styles, from_time, from_time+action_time, styleid)
470 |     except (IndexError, ValueError) as e:
471 |         logging.warning(_('Invalid comment: %r') % c[3])
472 | 
473 | 
474 | # Result: (f, dx, dy)
475 | # To convert: NewX = f*x+dx, NewY = f*y+dy
476 | def GetZoomFactor(SourceSize, TargetSize):
477 |     try:
478 |         if (SourceSize, TargetSize) == GetZoomFactor.Cached_Size:
479 |             return GetZoomFactor.Cached_Result
480 |     except AttributeError:
481 |         pass
482 |     GetZoomFactor.Cached_Size = (SourceSize, TargetSize)
483 |     try:
484 |         SourceAspect = SourceSize[0]/SourceSize[1]
485 |         TargetAspect = TargetSize[0]/TargetSize[1]
486 |         if TargetAspect < SourceAspect:  # narrower
487 |             ScaleFactor = TargetSize[0]/SourceSize[0]
488 |             GetZoomFactor.Cached_Result = (ScaleFactor, 0, (TargetSize[1]-TargetSize[0]/SourceAspect)/2)
489 |         elif TargetAspect > SourceAspect:  # wider
490 |             ScaleFactor = TargetSize[1]/SourceSize[1]
491 |             GetZoomFactor.Cached_Result = (ScaleFactor, (TargetSize[0]-TargetSize[1]*SourceAspect)/2, 0)
492 |         else:
493 |             GetZoomFactor.Cached_Result = (TargetSize[0]/SourceSize[0], 0, 0)
494 |         return GetZoomFactor.Cached_Result
495 |     except ZeroDivisionError:
496 |         GetZoomFactor.Cached_Result = (1, 0, 0)
497 |         return GetZoomFactor.Cached_Result
498 | 
499 | 
500 | # Calculation is based on https://github.com/jabbany/CommentCoreLibrary/issues/5#issuecomment-40087282
501 | #                     and https://github.com/m13253/danmaku2ass/issues/7#issuecomment-41489422
502 | # Input: X relative horizonal coordinate: 0 for left edge, 1 for right edge.
503 | #        Y relative vertical coordinate: 0 for top edge, 1 for bottom edge.
504 | # FOV = 1.0/math.tan(100*math.pi/360.0)
505 | # Result: (rotX, rotY, rotZ, shearX, shearY)
506 | def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)):
507 |     def WrapAngle(deg):
508 |         return 180-((180-deg)%360)
509 |     def CalcPerspectiveCorrection(alpha, X, FOV=FOV):
510 |         alpha = WrapAngle(alpha)
511 |         if FOV is None:
512 |             return alpha
513 |         if 0 <= alpha <= 180:
514 |             costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV+max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
515 |             try:
516 |                 if costheta > 1:
517 |                     costheta = 1
518 |                     raise ValueError
519 |                 elif costheta < -1:
520 |                     costheta = -1
521 |                     raise ValueError
522 |             except ValueError:
523 |                 logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
524 |             theta = math.acos(costheta)*180/math.pi
525 |         else:
526 |             costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV-max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
527 |             try:
528 |                 if costheta > 1:
529 |                     costheta = 1
530 |                     raise ValueError
531 |                 elif costheta < -1:
532 |                     costheta = -1
533 |                     raise ValueError
534 |             except ValueError:
535 |                 logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
536 |             theta = -math.acos(costheta)*180/math.pi
537 |         return WrapAngle(theta)
538 |     X = 2*X-1
539 |     Y = 2*Y-1
540 |     rotY = WrapAngle(rotY)
541 |     rotZ = WrapAngle(rotZ)
542 |     if rotY == 0 or rotZ == 0:
543 |         outX = 0
544 |         outY = -rotY  # Positive value means clockwise in Flash
545 |         outZ = -rotZ
546 |     else:
547 |         rotY = rotY*math.pi/180.0
548 |         rotZ = rotZ*math.pi/180.0
549 |         outY = math.atan2(-math.sin(rotY)*math.cos(rotZ), math.cos(rotY))*180/math.pi
550 |         outZ = math.atan2(-math.cos(rotY)*math.sin(rotZ), math.cos(rotZ))*180/math.pi
551 |         outX = math.asin(math.sin(rotY)*math.sin(rotZ))*180/math.pi
552 |     if FOV is not None:
553 |         #outX = CalcPerspectiveCorrection(outX, -Y, FOV*0.75)
554 |         outY = CalcPerspectiveCorrection(outY, X, FOV)
555 |     return (WrapAngle(round(outX)), WrapAngle(round(outY)), WrapAngle(round(outZ)), 0, round(-0.75*Y*math.sin(outY*math.pi/180.0), 3))
556 | 
557 | 
558 | def ProcessComments(comments, f, width, height, bottomReserved, fontface, fontsize, alpha, lifetime, reduced, progress_callback):
559 |     styleid = 'Danmaku2ASS_%04x' % random.randint(0, 0xffff)
560 |     WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid)
561 |     rows = [[None]*(height-bottomReserved+1) for i in range(4)]
562 |     for idx, i in enumerate(comments):
563 |         if progress_callback and idx % 1000 == 0:
564 |             progress_callback(idx, len(comments))
565 |         if isinstance(i[4], int):
566 |             row = 0
567 |             rowmax = height-bottomReserved-i[7]
568 |             while row <= rowmax:
569 |                 freerows = TestFreeRows(rows, i, row, width, height, bottomReserved, lifetime)
570 |                 if freerows >= i[7]:
571 |                     MarkCommentRow(rows, i, row)
572 |                     WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
573 |                     break
574 |                 else:
575 |                     row += freerows or 1
576 |             else:
577 |                 if not reduced:
578 |                     row = FindAlternativeRow(rows, i, height, bottomReserved)
579 |                     MarkCommentRow(rows, i, row)
580 |                     WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
581 |         elif i[4] == 'bilipos':
582 |             WriteCommentBilibiliPositioned(f, i, width, height, styleid)
583 |         elif i[4] == 'acfunpos':
584 |             WriteCommentAcfunPositioned(f, i, width, height, styleid)
585 |         elif i[4] == 'sH5Vpos':
586 |             WriteCommentSH5VPositioned(f, i, width, height, styleid)
587 |         else:
588 |             logging.warning(_('Invalid comment: %r') % i[3])
589 |     if progress_callback:
590 |         progress_callback(len(comments), len(comments))
591 | 
592 | 
593 | def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime):
594 |     res = 0
595 |     rowmax = height-bottomReserved
596 |     targetRow = None
597 |     if c[4] in (1, 2):
598 |         while row < rowmax and res < c[7]:
599 |             if targetRow != rows[c[4]][row]:
600 |                 targetRow = rows[c[4]][row]
601 |                 if targetRow and targetRow[0]+lifetime > c[0]:
602 |                     break
603 |             row += 1
604 |             res += 1
605 |     else:
606 |         try:
607 |             thresholdTime = c[0]-lifetime*(1-width/(c[8]+width))
608 |         except ZeroDivisionError:
609 |             thresholdTime = c[0]-lifetime
610 |         while row < rowmax and res < c[7]:
611 |             if targetRow != rows[c[4]][row]:
612 |                 targetRow = rows[c[4]][row]
613 |                 try:
614 |                     if targetRow and (targetRow[0] > thresholdTime or targetRow[0]+targetRow[8]*lifetime/(targetRow[8]+width) > c[0]):
615 |                         break
616 |                 except ZeroDivisionError:
617 |                     pass
618 |             row += 1
619 |             res += 1
620 |     return res
621 | 
622 | 
623 | def FindAlternativeRow(rows, c, height, bottomReserved):
624 |     res = 0
625 |     for row in range(height-bottomReserved-math.ceil(c[7])):
626 |         if not rows[c[4]][row]:
627 |             return row
628 |         elif rows[c[4]][row][0] < rows[c[4]][res][0]:
629 |             res = row
630 |     return res
631 | 
632 | 
633 | def MarkCommentRow(rows, c, row):
634 |     try:
635 |         for i in range(row, row+math.ceil(c[7])):
636 |             rows[c[4]][i] = c
637 |     except IndexError:
638 |         pass
639 | 
640 | 
641 | def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid):
642 |     f.write(
643 | '''
644 | [Script Info]
645 | ; Script generated by Danmaku2ASS
646 | ; https://github.com/m13253/danmaku2ass
647 | Script Updated By: Danmaku2ASS (https://github.com/m13253/danmaku2ass)
648 | ScriptType: v4.00+
649 | WrapStyle: 2
650 | Collisions: Normal
651 | PlayResX: %(width)s
652 | PlayResY: %(height)s
653 | ScaledBorderAndShadow: yes
654 | [V4+ Styles]
655 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
656 | Style: %(styleid)s, %(fontface)s, %(fontsize)s, &H%(alpha)02XFFFFFF, &H%(alpha)02XFFFFFF, &H%(alpha)02X000000, &H%(alpha)02X000000, 0, 0, 0, 0, 100, 100, 0.00, 0.00, 1, %(outline)s, 0, 7, 0, 0, 0, 0
657 | [Events]
658 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
659 | ''' % {'width': width, 'height': height, 'fontface': fontface, 'fontsize': round(fontsize), 'alpha': 255-round(alpha*255), 'outline': round(fontsize/25), 'styleid': styleid}
660 |     )
661 | 
662 | 
663 | def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lifetime, styleid):
664 |     text = ASSEscape(c[3])
665 |     styles = []
666 |     if c[4] == 1:
667 |         styles.append('\\an8\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': row})
668 |     elif c[4] == 2:
669 |         styles.append('\\an2\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': ConvertType2(row, height, bottomReserved)})
670 |     elif c[4] == 3:
671 |         styles.append('\\move(%(neglen)s, %(row)s, %(width)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
672 |     else:
673 |         styles.append('\\move(%(width)s, %(row)s, %(neglen)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
674 |     if not (-1 < c[6]-fontsize < 1):
675 |         styles.append('\\fs%s' % round(c[6]))
676 |     if c[5] != 0xffffff:
677 |         styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
678 |         if c[5] == 0x000000:
679 |             styles.append('\\3c&HFFFFFF&')
680 |     f.write('Dialogue: 2,%(start)s,%(end)s,%(styleid)s,,0000,0000,0000,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
681 | 
682 | 
683 | def ASSEscape(s):
684 |     return '\\N'.join((i or ' ' for i in str(s).replace('\\', '\\\\').replace('{', '\\{').replace('}', '\\}').split('\n')))
685 | 
686 | 
687 | def CalculateLength(s):
688 |     return max(map(len, s.split('\n')))  # May not be accurate
689 | 
690 | 
691 | def ConvertTimestamp(timestamp):
692 |     timestamp = round(timestamp*100.0)
693 |     hour, minute = divmod(timestamp, 360000)
694 |     minute, second = divmod(minute, 6000)
695 |     second, centsecond = divmod(second, 100)
696 |     return '%d:%02d:%02d.%02d' % (int(hour), int(minute), int(second), int(centsecond))
697 | 
698 | 
699 | def ConvertType2(row, height, bottomReserved):
700 |     return height-bottomReserved-row
701 | 
702 | 
703 | def ConvertToFile(filename_or_file, *args, **kwargs):
704 |     if isinstance(filename_or_file, bytes):
705 |         filename_or_file = str(bytes(filename_or_file).decode('utf-8', 'replace'))
706 |     if isinstance(filename_or_file, str):
707 |         return open(filename_or_file, *args, **kwargs)
708 |     else:
709 |         return filename_or_file
710 | 
711 | 
712 | def FilterBadChars(f):
713 |     s = f.read()
714 |     s = re.sub('[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]', '\ufffd', s)
715 |     return io.StringIO(s)
716 | 
717 | 
718 | class safe_list(list):
719 |     def get(self, index, default=None):
720 |         try:
721 |             return self[index]
722 |         except IndexError:
723 |             return default
724 | 
725 | 
726 | def export(func):
727 |     global __all__
728 |     try:
729 |         __all__.append(func.__name__)
730 |     except NameError:
731 |         __all__ = [func.__name__]
732 |     return func
733 | 
734 | 
735 | @export
736 | def Danmaku2ASS(input_files, output_file, stage_width, stage_height, reserve_blank=0, font_face=_('(FONT) sans-serif')[7:], font_size=25.0, text_opacity=1.0, comment_duration=5.0, is_reduce_comments=False, progress_callback=None):
737 |     fo = None
738 |     comments = ReadComments(input_files, font_size)
739 |     try:
740 |         if output_file:
741 |             fo = ConvertToFile(output_file, 'w', encoding='utf-8-sig', errors='replace', newline='\r\n')
742 |         else:
743 |             fo = sys.stdout
744 |         ProcessComments(comments, fo, stage_width, stage_height, reserve_blank, font_face, font_size, text_opacity, comment_duration, is_reduce_comments, progress_callback)
745 |     finally:
746 |         if output_file and fo != output_file:
747 |             fo.close()
748 | 
749 | 
750 | @export
751 | def ReadComments(input_files, font_size=25.0, progress_callback=None):
752 |     if isinstance(input_files, bytes):
753 |         input_files = str(bytes(input_files).decode('utf-8', 'replace'))
754 |     if isinstance(input_files, str):
755 |         input_files = [input_files]
756 |     else:
757 |         input_files = list(input_files)
758 |     comments = []
759 |     for idx, i in enumerate(input_files):
760 |         if progress_callback:
761 |             progress_callback(idx, len(input_files))
762 |         with ConvertToFile(i, 'r', encoding='utf-8', errors='replace') as f:
763 |             CommentProcessor = GetCommentProcessor(f)
764 |             if not CommentProcessor:
765 |                 raise ValueError(_('Unknown comment file format: %s') % i)
766 |             comments.extend(CommentProcessor(FilterBadChars(f), font_size))
767 |     if progress_callback:
768 |         progress_callback(len(input_files), len(input_files))
769 |     comments.sort()
770 |     return comments
771 | 
772 | 
773 | @export
774 | def GetCommentProcessor(input_file):
775 |     return CommentFormatMap[ProbeCommentFormat(input_file)]
776 | 
777 | 
778 | def main():
779 |     if len(sys.argv) == 1:
780 |         sys.argv.append('--help')
781 |     parser = argparse.ArgumentParser()
782 |     parser.add_argument('-o', '--output', metavar=_('OUTPUT'), help=_('Output file'))
783 |     parser.add_argument('-s', '--size', metavar=_('WIDTHxHEIGHT'), required=True, help=_('Stage size in pixels'))
784 |     parser.add_argument('-fn', '--font', metavar=_('FONT'), help=_('Specify font face [default: %s]') % _('(FONT) sans-serif')[7:], default=_('(FONT) sans-serif')[7:])
785 |     parser.add_argument('-fs', '--fontsize', metavar=_('SIZE'), help=(_('Default font size [default: %s]') % 25), type=float, default=25.0)
786 |     parser.add_argument('-a', '--alpha', metavar=_('ALPHA'), help=_('Text opacity'), type=float, default=1.0)
787 |     parser.add_argument('-l', '--lifetime', metavar=_('SECONDS'), help=_('Duration of comment display [default: %s]') % 5, type=float, default=5.0)
788 |     parser.add_argument('-p', '--protect', metavar=_('HEIGHT'), help=_('Reserve blank on the bottom of the stage'), type=int, default=0)
789 |     parser.add_argument('-r', '--reduce', action='store_true', help=_('Reduce the amount of comments if stage is full'))
790 |     parser.add_argument('file', metavar=_('FILE'), nargs='+', help=_('Comment file to be processed'))
791 |     args = parser.parse_args()
792 |     try:
793 |         width, height = str(args.size).split('x', 1)
794 |         width = int(width)
795 |         height = int(height)
796 |     except ValueError:
797 |         raise ValueError(_('Invalid stage size: %r') % args.size)
798 |     Danmaku2ASS(args.file, args.output, width, height, args.protect, args.font, args.fontsize, args.alpha, args.lifetime, args.reduce)
799 | 
800 | 
801 | if __name__ == '__main__':
802 |     main()
803 | 


--------------------------------------------------------------------------------
/biqukan.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:UTF-8 -*-
  2 | from urllib import request
  3 | from bs4 import BeautifulSoup
  4 | import collections
  5 | import re
  6 | import os
  7 | import time
  8 | import sys
  9 | import types
 10 | 
 11 | """
 12 | 类说明:下载《笔趣看》网小说: url:http://www.biqukan.com/
 13 | 
 14 | Parameters:
 15 | 	target - 《笔趣看》网指定的小说目录地址(string)
 16 | 
 17 | Returns:
 18 | 	无
 19 | 
 20 | Modify:
 21 | 	2017-05-06
 22 | """
 23 | class download(object):
 24 | 	def __init__(self, target):
 25 | 		self.__target_url = target
 26 | 		self.__head = {'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19',}
 27 | 
 28 | 	"""
 29 | 	函数说明:获取下载链接
 30 | 
 31 | 	Parameters:
 32 | 		无
 33 | 
 34 | 	Returns:
 35 | 		novel_name + '.txt' - 保存的小说名(string)
 36 | 		numbers - 章节数(int)
 37 | 		download_dict - 保存章节名称和下载链接的字典(dict)
 38 | 
 39 | 	Modify:
 40 | 		2017-05-06
 41 | 	"""
 42 | 	def get_download_url(self):
 43 | 		charter = re.compile(u'[第弟](.+)章', re.IGNORECASE)
 44 | 		target_req = request.Request(url = self.__target_url, headers = self.__head)
 45 | 		target_response = request.urlopen(target_req)
 46 | 		target_html = target_response.read().decode('gbk','ignore')
 47 | 		listmain_soup = BeautifulSoup(target_html,'lxml')
 48 | 		chapters = listmain_soup.find_all('div',class_ = 'listmain')
 49 | 		download_soup = BeautifulSoup(str(chapters), 'lxml')
 50 | 		novel_name = str(download_soup.dl.dt).split("》")[0][5:]
 51 | 		flag_name = "《" + novel_name + "》" + "正文卷"
 52 | 		numbers = (len(download_soup.dl.contents) - 1) / 2 - 8
 53 | 		download_dict = collections.OrderedDict()
 54 | 		begin_flag = False
 55 | 		numbers = 1
 56 | 		for child in download_soup.dl.children:
 57 | 			if child != '\n':
 58 | 				if child.string == u"%s" % flag_name:
 59 | 					begin_flag = True
 60 | 				if begin_flag == True and child.a != None:
 61 | 					download_url = "http://www.biqukan.com" + child.a.get('href')
 62 | 					download_name = child.string
 63 | 					names = str(download_name).split('章')
 64 | 					name = charter.findall(names[0] + '章')
 65 | 					if name:
 66 | 							download_dict['第' + str(numbers) + '章 ' + names[1]] = download_url
 67 | 							numbers += 1
 68 | 		return novel_name + '.txt', numbers, download_dict
 69 | 	
 70 | 	"""
 71 | 	函数说明:爬取文章内容
 72 | 
 73 | 	Parameters:
 74 | 		url - 下载连接(string)
 75 | 
 76 | 	Returns:
 77 | 		soup_text - 章节内容(string)
 78 | 
 79 | 	Modify:
 80 | 		2017-05-06
 81 | 	"""
 82 | 	def Downloader(self, url):
 83 | 		download_req = request.Request(url = url, headers = self.__head)
 84 | 		download_response = request.urlopen(download_req)
 85 | 		download_html = download_response.read().decode('gbk','ignore')
 86 | 		soup_texts = BeautifulSoup(download_html, 'lxml')
 87 | 		texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')
 88 | 		soup_text = BeautifulSoup(str(texts), 'lxml').div.text.replace('\xa0','')
 89 | 		return soup_text
 90 | 
 91 | 	"""
 92 | 	函数说明:将爬取的文章内容写入文件
 93 | 
 94 | 	Parameters:
 95 | 		name - 章节名称(string)
 96 | 		path - 当前路径下,小说保存名称(string)
 97 | 		text - 章节内容(string)
 98 | 
 99 | 	Returns:
100 | 		无
101 | 
102 | 	Modify:
103 | 		2017-05-06
104 | 	"""
105 | 	def Writer(self, name, path, text):
106 | 		write_flag = True
107 | 		with open(path, 'a', encoding='utf-8') as f:
108 | 			f.write(name + '\n\n')
109 | 			for each in text:
110 | 				if each == 'h':
111 | 					write_flag = False
112 | 				if write_flag == True and each != ' ':
113 | 					f.write(each)
114 | 				if write_flag == True and each == '\r':
115 | 					f.write('\n')			
116 | 			f.write('\n\n')
117 | 
118 | if __name__ == "__main__":
119 | 	print("\n\t\t欢迎使用《笔趣看》小说下载小工具\n\n\t\t作者:Jack-Cui\t时间:2017-05-06\n")
120 | 	print("*************************************************************************")
121 | 	
122 | 	#小说地址
123 | 	target_url = str(input("请输入小说目录下载地址:\n"))
124 | 
125 | 	#实例化下载类
126 | 	d = download(target = target_url)
127 | 	name, numbers, url_dict = d.get_download_url()
128 | 	if name in os.listdir():
129 | 		os.remove(name)
130 | 	index = 1
131 | 
132 | 	#下载中
133 | 	print("《%s》下载中:" % name[:-4])
134 | 	for key, value in url_dict.items():
135 | 		d.Writer(key, name, d.Downloader(value))
136 | 		sys.stdout.write("已下载:%.3f%%" %  float(index/numbers) + '\r')
137 | 		sys.stdout.flush()
138 | 		index += 1	
139 | 
140 | 	print("《%s》下载完成！" % name[:-4])
141 | 
142 | 	
143 | 


--------------------------------------------------------------------------------
/cartoon/cartoon/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/__init__.py


--------------------------------------------------------------------------------
/cartoon/cartoon/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/__pycache__/__init__.cpython-34.pyc


--------------------------------------------------------------------------------
/cartoon/cartoon/__pycache__/items.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/__pycache__/items.cpython-34.pyc


--------------------------------------------------------------------------------
/cartoon/cartoon/__pycache__/pipelines.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/__pycache__/pipelines.cpython-34.pyc


--------------------------------------------------------------------------------
/cartoon/cartoon/__pycache__/settings.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/__pycache__/settings.cpython-34.pyc


--------------------------------------------------------------------------------
/cartoon/cartoon/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | class ComicItem(scrapy.Item):
11 | 	dir_name = scrapy.Field()
12 | 	link_url = scrapy.Field()
13 | 	img_url = scrapy.Field()
14 | 	image_paths = scrapy.Field()


--------------------------------------------------------------------------------
/cartoon/cartoon/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class CartoonSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/cartoon/cartoon/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | from cartoon import settings
 8 | from scrapy import Request
 9 | import requests
10 | import os
11 | 
12 | 
13 | class ComicImgDownloadPipeline(object):
14 | 
15 | 	def process_item(self, item, spider):
16 | 		#如果获取了图片链接，进行如下操作
17 | 		if 'img_url' in item:
18 | 			images = []
19 | 			#文件夹名字
20 | 			dir_path = '%s/%s' % (settings.IMAGES_STORE, item['dir_name'])
21 | 			#文件夹不存在则创建文件夹
22 | 			if not os.path.exists(dir_path):
23 | 				os.makedirs(dir_path)
24 | 			#获取每一个图片链接
25 | 			for image_url in item['img_url']:
26 | 				#解析链接，根据链接为图片命名
27 | 				houzhui = image_url.split('/')[-1].split('.')[-1]
28 | 				qianzhui = item['link_url'].split('/')[-1].split('.')[0]
29 | 				#图片名
30 | 				image_file_name = '第' + qianzhui + '页.' + houzhui
31 | 				#图片保存路径
32 | 				file_path = '%s/%s' % (dir_path, image_file_name)
33 | 				images.append(file_path)
34 | 				if os.path.exists(file_path):
35 | 					continue
36 | 				#保存图片
37 | 				with open(file_path, 'wb') as handle:
38 | 					response = requests.get(url = image_url)
39 | 					for block in response.iter_content(1024):
40 | 						if not block:
41 | 							break
42 | 						handle.write(block)
43 | 			#返回图片保存路径
44 | 			item['image_paths'] = images
45 | 		return item


--------------------------------------------------------------------------------
/cartoon/cartoon/settings.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Scrapy settings for cartoon project
  4 | #
  5 | # For simplicity, this file contains only settings considered important or
  6 | # commonly used. You can find more settings consulting the documentation:
  7 | #
  8 | #     http://doc.scrapy.org/en/latest/topics/settings.html
  9 | #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
 10 | #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
 11 | 
 12 | BOT_NAME = 'cartoon'
 13 | 
 14 | SPIDER_MODULES = ['cartoon.spiders']
 15 | NEWSPIDER_MODULE = 'cartoon.spiders'
 16 | 
 17 | 
 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
 19 | #USER_AGENT = 'cartoon (+http://www.yourdomain.com)'
 20 | 
 21 | # Obey robots.txt rules
 22 | ROBOTSTXT_OBEY = False
 23 | 
 24 | ITEM_PIPELINES = {
 25 | 	'cartoon.pipelines.ComicImgDownloadPipeline': 1,
 26 | }
 27 | 
 28 | IMAGES_STORE = 'H:/火影忍者'
 29 | 
 30 | COOKIES_ENABLED = False
 31 | 
 32 | DOWNLOAD_DELAY = 0.25    # 250 ms of delay
 33 | 
 34 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
 35 | #CONCURRENT_REQUESTS = 32
 36 | 
 37 | # Configure a delay for requests for the same website (default: 0)
 38 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
 39 | # See also autothrottle settings and docs
 40 | #DOWNLOAD_DELAY = 3
 41 | # The download delay setting will honor only one of:
 42 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
 43 | #CONCURRENT_REQUESTS_PER_IP = 16
 44 | 
 45 | # Disable cookies (enabled by default)
 46 | #COOKIES_ENABLED = False
 47 | 
 48 | # Disable Telnet Console (enabled by default)
 49 | #TELNETCONSOLE_ENABLED = False
 50 | 
 51 | # Override the default request headers:
 52 | #DEFAULT_REQUEST_HEADERS = {
 53 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 54 | #   'Accept-Language': 'en',
 55 | #}
 56 | 
 57 | # Enable or disable spider middlewares
 58 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
 59 | #SPIDER_MIDDLEWARES = {
 60 | #    'cartoon.middlewares.CartoonSpiderMiddleware': 543,
 61 | #}
 62 | 
 63 | # Enable or disable downloader middlewares
 64 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
 65 | #DOWNLOADER_MIDDLEWARES = {
 66 | #    'cartoon.middlewares.MyCustomDownloaderMiddleware': 543,
 67 | #}
 68 | 
 69 | # Enable or disable extensions
 70 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
 71 | #EXTENSIONS = {
 72 | #    'scrapy.extensions.telnet.TelnetConsole': None,
 73 | #}
 74 | 
 75 | # Configure item pipelines
 76 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
 77 | #ITEM_PIPELINES = {
 78 | #    'cartoon.pipelines.CartoonPipeline': 300,
 79 | #}
 80 | 
 81 | # Enable and configure the AutoThrottle extension (disabled by default)
 82 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
 83 | #AUTOTHROTTLE_ENABLED = True
 84 | # The initial download delay
 85 | #AUTOTHROTTLE_START_DELAY = 5
 86 | # The maximum download delay to be set in case of high latencies
 87 | #AUTOTHROTTLE_MAX_DELAY = 60
 88 | # The average number of requests Scrapy should be sending in parallel to
 89 | # each remote server
 90 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 91 | # Enable showing throttling stats for every response received:
 92 | #AUTOTHROTTLE_DEBUG = False
 93 | 
 94 | # Enable and configure HTTP caching (disabled by default)
 95 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 96 | #HTTPCACHE_ENABLED = True
 97 | #HTTPCACHE_EXPIRATION_SECS = 0
 98 | #HTTPCACHE_DIR = 'httpcache'
 99 | #HTTPCACHE_IGNORE_HTTP_CODES = []
100 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
101 | 


--------------------------------------------------------------------------------
/cartoon/cartoon/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/cartoon/cartoon/spiders/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/spiders/__pycache__/__init__.cpython-34.pyc


--------------------------------------------------------------------------------
/cartoon/cartoon/spiders/__pycache__/comic_spider.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/cartoon/cartoon/spiders/__pycache__/comic_spider.cpython-34.pyc


--------------------------------------------------------------------------------
/cartoon/cartoon/spiders/comic_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import re
 4 | import scrapy
 5 | from scrapy import Selector
 6 | from cartoon.items import ComicItem
 7 | 
 8 | class ComicSpider(scrapy.Spider):
 9 | 	name = 'comic'
10 | 
11 | 	def __init__(self):
12 | 		#图片链接server域名
13 | 		self.server_img = 'http://n.1whour.com/'
14 | 		#章节链接server域名
15 | 		self.server_link = 'http://comic.kukudm.com'
16 | 		self.allowed_domains = ['comic.kukudm.com']
17 | 		self.start_urls = ['http://comic.kukudm.com/comiclist/3/']
18 | 		#匹配图片地址的正则表达式
19 | 		self.pattern_img = re.compile(r'\+"(.+)\'><span')
20 | 
21 | 	#从start_requests发送请求
22 | 	def start_requests(self):
23 | 		yield scrapy.Request(url = self.start_urls[0], callback = self.parse1)
24 | 
25 | 	#解析response,获得章节图片链接地址
26 | 	def parse1(self, response):
27 | 		hxs = Selector(response)
28 | 		items = []
29 | 		#章节链接地址
30 | 		urls = hxs.xpath('//dd/a[1]/@href').extract()
31 | 		#章节名
32 | 		dir_names = hxs.xpath('//dd/a[1]/text()').extract()
33 | 		#保存章节链接和章节名
34 | 		for index in range(len(urls)):
35 | 			item = ComicItem()
36 | 			item['link_url'] = self.server_link + urls[index]
37 | 			item['dir_name'] = dir_names[index]
38 | 			items.append(item)
39 | 
40 | 		#根据每个章节的链接，发送Request请求，并传递item参数
41 | 		for item in items:
42 | 			yield scrapy.Request(url = item['link_url'], meta = {'item':item}, callback = self.parse2)
43 | 		
44 | 	#解析获得章节第一页的页码数和图片链接	
45 | 	def parse2(self, response):
46 | 		#接收传递的item
47 | 		item = response.meta['item']
48 | 		#获取章节的第一页的链接
49 | 		item['link_url'] = response.url
50 | 		hxs = Selector(response)
51 | 		#获取章节的第一页的图片链接
52 | 		pre_img_url = hxs.xpath('//script/text()').extract()
53 | 		#注意这里返回的图片地址,应该为列表,否则会报错
54 | 		img_url = [self.server_img + re.findall(self.pattern_img, pre_img_url[0])[0]]
55 | 		#将获取的章节的第一页的图片链接保存到img_url中
56 | 		item['img_url'] = img_url
57 | 		#返回item，交给item pipeline下载图片
58 | 		yield item
59 | 		#获取章节的页数
60 | 		page_num = hxs.xpath('//td[@valign="top"]/text()').re(u'共(\d+)页')[0]
61 | 		#根据页数，整理出本章节其他页码的链接
62 | 		pre_link = item['link_url'][:-5]
63 | 		for each_link in range(2, int(page_num) + 1):
64 | 			new_link = pre_link + str(each_link) + '.htm'
65 | 			#根据本章节其他页码的链接发送Request请求，用于解析其他页码的图片链接，并传递item
66 | 			yield scrapy.Request(url = new_link, meta = {'item':item}, callback = self.parse3)
67 | 
68 | 	#解析获得本章节其他页面的图片链接
69 | 	def parse3(self, response):
70 | 		#接收传递的item
71 | 		item = response.meta['item']
72 | 		#获取该页面的链接
73 | 		item['link_url'] = response.url
74 | 		hxs = Selector(response)
75 | 		pre_img_url = hxs.xpath('//script/text()').extract()
76 | 		#注意这里返回的图片地址,应该为列表,否则会报错
77 | 		img_url = [self.server_img + re.findall(self.pattern_img, pre_img_url[0])[0]]
78 | 		#将获取的图片链接保存到img_url中
79 | 		item['img_url'] = img_url
80 | 		#返回item，交给item pipeline下载图片
81 | 		yield item
82 | 		


--------------------------------------------------------------------------------
/cartoon/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = cartoon.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = cartoon
12 | 


--------------------------------------------------------------------------------
/daili.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:UTF-8 -*-
  2 | from bs4 import BeautifulSoup
  3 | from selenium import webdriver
  4 | import subprocess as sp
  5 | from lxml import etree
  6 | import requests
  7 | import random
  8 | import re
  9 | 
 10 | """
 11 | 函数说明:获取IP代理
 12 | Parameters:
 13 | 	page - 高匿代理页数,默认获取第一页
 14 | Returns:
 15 | 	proxys_list - 代理列表
 16 | Modify:
 17 | 	2017-05-27
 18 | """
 19 | def get_proxys(page = 1):
 20 | 	#requests的Session可以自动保持cookie,不需要自己维护cookie内容
 21 | 	S = requests.Session()
 22 | 	#西祠代理高匿IP地址
 23 | 	target_url = 'http://www.xicidaili.com/nn/%d' % page
 24 | 	#完善的headers
 25 | 	target_headers = {'Upgrade-Insecure-Requests':'1',
 26 | 		'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
 27 | 		'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 28 | 		'Referer':'http://www.xicidaili.com/nn/',
 29 | 		'Accept-Encoding':'gzip, deflate, sdch',
 30 | 		'Accept-Language':'zh-CN,zh;q=0.8',
 31 | 	}
 32 | 	#get请求
 33 | 	target_response = S.get(url = target_url, headers = target_headers)
 34 | 	#utf-8编码
 35 | 	target_response.encoding = 'utf-8'
 36 | 	#获取网页信息
 37 | 	target_html = target_response.text
 38 | 	#获取id为ip_list的table
 39 | 	bf1_ip_list = BeautifulSoup(target_html, 'lxml')
 40 | 	bf2_ip_list = BeautifulSoup(str(bf1_ip_list.find_all(id = 'ip_list')), 'lxml')
 41 | 	ip_list_info = bf2_ip_list.table.contents
 42 | 	#存储代理的列表
 43 | 	proxys_list = []
 44 | 	#爬取每个代理信息
 45 | 	for index in range(len(ip_list_info)):
 46 | 		if index % 2 == 1 and index != 1:
 47 | 			dom = etree.HTML(str(ip_list_info[index]))
 48 | 			ip = dom.xpath('//td[2]')
 49 | 			port = dom.xpath('//td[3]')
 50 | 			protocol = dom.xpath('//td[6]')
 51 | 			proxys_list.append(protocol[0].text.lower() + '#' + ip[0].text + '#' + port[0].text)
 52 | 	#返回代理列表
 53 | 	return proxys_list
 54 | 
 55 | """
 56 | 函数说明:检查代理IP的连通性
 57 | Parameters:
 58 | 	ip - 代理的ip地址
 59 | 	lose_time - 匹配丢包数
 60 | 	waste_time - 匹配平均时间
 61 | Returns:
 62 | 	average_time - 代理ip平均耗时
 63 | Modify:
 64 | 	2017-05-27
 65 | """
 66 | def check_ip(ip, lose_time, waste_time):
 67 | 	#命令 -n 要发送的回显请求数 -w 等待每次回复的超时时间(毫秒)
 68 | 	cmd = "ping -n 3 -w 3 %s"
 69 | 	#执行命令
 70 | 	p = sp.Popen(cmd % ip, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) 
 71 | 	#获得返回结果并解码
 72 | 	out = p.stdout.read().decode("gbk")
 73 | 	#丢包数
 74 | 	lose_time = lose_time.findall(out)
 75 | 	#当匹配到丢失包信息失败,默认为三次请求全部丢包,丢包数lose赋值为3
 76 | 	if len(lose_time) == 0:
 77 | 		lose = 3
 78 | 	else:
 79 | 		lose = int(lose_time[0])
 80 | 	#如果丢包数目大于2个,则认为连接超时,返回平均耗时1000ms
 81 | 	if lose > 2:
 82 | 		#返回False
 83 | 		return 1000
 84 | 	#如果丢包数目小于等于2个,获取平均耗时的时间
 85 | 	else:
 86 | 		#平均时间
 87 | 		average = waste_time.findall(out)
 88 | 		#当匹配耗时时间信息失败,默认三次请求严重超时,返回平均好使1000ms
 89 | 		if len(average) == 0:
 90 | 			return 1000
 91 | 		else:
 92 | 			#
 93 | 			average_time = int(average[0])
 94 | 			#返回平均耗时
 95 | 			return average_time
 96 | 
 97 | """
 98 | 函数说明:初始化正则表达式
 99 | Parameters:
100 | 	无
101 | Returns:
102 | 	lose_time - 匹配丢包数
103 | 	waste_time - 匹配平均时间
104 | Modify:
105 | 	2017-05-27
106 | """
107 | def initpattern():
108 | 	#匹配丢包数
109 | 	lose_time = re.compile(u"丢失 = (\d+)", re.IGNORECASE)
110 | 	#匹配平均时间
111 | 	waste_time = re.compile(u"平均 = (\d+)ms", re.IGNORECASE)
112 | 	return lose_time, waste_time
113 | 
114 | if __name__ == '__main__':
115 | 	#初始化正则表达式
116 | 	lose_time, waste_time = initpattern()
117 | 	#获取IP代理
118 | 	proxys_list = get_proxys(1)
119 | 
120 | 	#如果平均时间超过200ms重新选取ip
121 | 	while True:
122 | 		#从100个IP中随机选取一个IP作为代理进行访问
123 | 		proxy = random.choice(proxys_list)
124 | 		split_proxy = proxy.split('#')
125 | 		#获取IP
126 | 		ip = split_proxy[1]
127 | 		#检查ip
128 | 		average_time = check_ip(ip, lose_time, waste_time)
129 | 		if average_time > 200:
130 | 			#去掉不能使用的IP
131 | 			proxys_list.remove(proxy)
132 | 			print("ip连接超时, 重新获取中!")
133 | 		if average_time < 200:
134 | 			break
135 | 
136 | 	#去掉已经使用的IP
137 | 	proxys_list.remove(proxy)
138 | 	proxy_dict = {split_proxy[0]:split_proxy[1] + ':' + split_proxy[2]}
139 | 	print("使用代理:", proxy_dict)
140 | 


--------------------------------------------------------------------------------
/dingdong/README.md:
--------------------------------------------------------------------------------
 1 | ## 功能
 2 | 
 3 | 下载京东商品的晒单图。
 4 | 
 5 | ## 作者
 6 | 
 7 | * Website: [http://cuijiahua.com](http://cuijiahua.com "悬停显示")
 8 | * Author: Jack Cui
 9 | * Date: 2018.7.7
10 | 
11 | ## 效果图：
12 | 
13 | ![image](https://github.com/Jack-Cherish/Pictures/blob/master/jd.gif)
14 | 
15 | ## 使用说明
16 | 
17 | 	python jd.py -k 芒果
18 | 
19 | 	三个参数：
20 | 	-d	保存图片的路径，默认为fd.py文件所在文件夹
21 | 	-k	搜索关键词
22 | 	-n  	下载商品的晒单图个数，即n个商店的晒单图
23 | 


--------------------------------------------------------------------------------
/dingdong/jd.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8 -*-
  2 | # Author:Jack Cui
  3 | # Website:http://cuijiahua.com
  4 | # Date:2018-7-7
  5 | import os
  6 | import re
  7 | import sys
  8 | import bs4
  9 | import json
 10 | import math
 11 | import time
 12 | import math
 13 | import argparse
 14 | import requests
 15 | from contextlib import closing
 16 | 
 17 | def search_goods(keyword, pages):
 18 | 	"""
 19 | 	搜索商品
 20 | 	Parameters:
 21 | 		keyword - str 搜索关键词
 22 | 		pages - int 搜索页数
 23 | 	Returns:
 24 | 		goods_urls - list 商品链接
 25 | 	"""
 26 | 	# 创建session
 27 | 	sess = requests.Session()
 28 | 	goods_urls = []
 29 | 	for page in range(pages):
 30 | 		# 第一次加载
 31 | 		search_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 32 | 			'Accept-Encoding': 'gzip, deflate, br',
 33 | 			'Accept-Language': 'zh-CN,zh;q=0.9',
 34 | 			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 35 | 			'Host': 'search.jd.com'}
 36 | 		s = page*28
 37 | 		if s == 0:
 38 | 			s = 1
 39 | 		# 搜索url
 40 | 		search_url = 'https://search.jd.com/Search'
 41 | 		search_params = {'keyword':keyword,
 42 | 			'enc':'utf-8',
 43 | 			'qrst':'1',
 44 | 			'rt':'1',
 45 | 			'stop':'1',
 46 | 			'vt':'2',
 47 | 			'wq':keyword,
 48 | 			'stock':'1',
 49 | 			'page':page*2+1,
 50 | 			's':s,
 51 | 			'click':'0'}
 52 | 		search_req = sess.get(url=search_url, params=search_params, headers=search_headers, verify=False)
 53 | 		search_req.encoding = 'utf-8'
 54 | 		# 匹配商品链接
 55 | 		search_req_bf = bs4.BeautifulSoup(search_req.text, 'lxml')
 56 | 		for item in search_req_bf.find_all('li', class_='gl-item'):
 57 | 			item_url = item.div.div.a.get('href')
 58 | 			# 滤除广告
 59 | 			if 'ccc-x.jd.com' not in item_url:
 60 | 				goods_urls.append(item_url)
 61 | 		# 继续加载log_id
 62 | 		log_id = re.findall("log_id:'(.*)',", search_req.text)[0]
 63 | 		
 64 | 		# 第二次加载
 65 | 		# 继续加载url
 66 | 		search_more_url = 'https://search.jd.com/s_new.php'
 67 | 		search_more_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 68 | 			'Accept-Encoding': 'gzip, deflate, br',
 69 | 			'Accept-Language': 'zh-CN,zh;q=0.9',
 70 | 			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 71 | 			'Host': 'search.jd.com',
 72 | 			'Referer':search_req.url}
 73 | 		s = (1+page)*25
 74 | 		search_more_params = {'keyword':keyword,
 75 | 			'enc':'utf-8',
 76 | 			'qrst':'1',
 77 | 			'rt':'1',
 78 | 			'stop':'1',
 79 | 			'vt':'2',
 80 | 			'wq':keyword,
 81 | 			'stock':'1',
 82 | 			'page':(1+page)*2,
 83 | 			's':s,
 84 | 			'log_id':log_id,
 85 | 			'scrolling':'y',
 86 | 			'tpl':'1_M'}
 87 | 		search_more_req = sess.get(url=search_more_url, params=search_more_params, headers=search_more_headers, verify=False)
 88 | 		search_more_req.encoding = 'utf-8'
 89 | 		# 匹配商品链接
 90 | 		search_more_req_bf = bs4.BeautifulSoup(search_more_req.text, 'lxml')
 91 | 		for item in search_more_req_bf.find_all('li', class_='gl-item'):
 92 | 			item_url = item.div.div.a.get('href')
 93 | 			# 滤除广告
 94 | 			if 'ccc-x.jd.com' not in item_url:
 95 | 				goods_urls.append(item_url)
 96 | 	# 去重
 97 | 	goods_urls = list(set(goods_urls))
 98 | 	# 链接合成
 99 | 	goods_urls = list(map(lambda x: 'http:'+x, goods_urls))
100 | 	return goods_urls
101 | 
102 | def goods_images(goods_url):
103 | 	"""
104 | 	获得商品晒图
105 | 	Parameters:
106 | 		goods_url - str 商品链接
107 | 	Returns:
108 | 		image_urls - list 图片链接
109 | 	"""
110 | 	image_urls = []
111 | 	productId = goods_url.split('/')[-1].split('.')[0]
112 | 
113 | 	# 评论url
114 | 	comment_url = 'https://sclub.jd.com/comment/productPageComments.action'
115 | 	comment_params = {'productId':productId,
116 | 		'score':'0',
117 | 		'sortType':'5',
118 | 		'page':'0',
119 | 		'pageSize':'10',
120 | 		'isShadowSku':'0',
121 | 		'fold':'1'}
122 | 	comment_headers = {'Accept': '*/*',
123 | 		'Accept-Encoding': 'gzip, deflate, br',
124 | 		'Accept-Language': 'zh-CN,zh;q=0.9',
125 | 		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
126 | 		'Referer':goods_url,
127 | 		'Host': 'sclub.jd.com'}
128 | 
129 | 	comment_req = requests.get(url=comment_url, params=comment_params, headers=comment_headers, verify=False)
130 | 	html = json.loads(comment_req.text)
131 | 	# 获得晒图个数
132 | 	imageListCount = html['imageListCount']
133 | 	# 计算晒图页数,向上取整
134 | 	pages = math.ceil(imageListCount / 10)
135 | 	for page in range(1, pages+1):
136 | 		# 获取晒图图片url
137 | 		club_url = 'https://club.jd.com/discussion/getProductPageImageCommentList.action'
138 | 		now = time.time()
139 | 		now_str = str(now).split('.')
140 | 		now = now_str[0] + now_str[-1][:3]
141 | 		club_params = {'productId':productId,
142 | 			'isShadowSku':'0',
143 | 			'page':page,
144 | 			'pageSize':'10',
145 | 			'_':now}
146 | 		club_headers = comment_headers
147 | 		club_req = requests.get(url=club_url, params=club_params, headers=club_headers, verify=False)
148 | 		html = json.loads(club_req.text)
149 | 		for img in html['imgComments']['imgList']:
150 | 			image_urls.append(img['imageUrl'])
151 | 	# 去重
152 | 	image_urls = list(set(image_urls))
153 | 	# 链接合成
154 | 	image_urls = list(map(lambda x: 'http:'+x, image_urls))
155 | 
156 | 	return image_urls
157 | 
158 | def download_image(path, image_url):
159 | 	"""
160 | 	图片下载
161 | 	Parameters:
162 | 		path - str 图片保存地址
163 | 		image_url - str 图片下载地址
164 | 	Returns:
165 | 		None
166 | 	"""
167 | 	print(image_url)
168 | 	filename = image_url.split('/')[-1]
169 | 	image_path = os.path.join(path, filename)
170 | 	download_headers = {'Accept': '*/*',
171 | 		'Accept-Encoding': 'gzip, deflate, br',
172 | 		'Accept-Language': 'zh-CN,zh;q=0.9',
173 | 		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}
174 | 	size = 0
175 | 	with closing(requests.get(image_url, headers=download_headers, stream=True)) as response:
176 | 		chunk_size = 1024
177 | 		content_size = int(response.headers['content-length'])
178 | 		if response.status_code == 200:
179 | 			sys.stdout.write(filename+'下载中:\n')
180 | 			sys.stdout.write('    [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
181 | 
182 | 			with open(image_path, 'wb') as file:
183 | 				for data in response.iter_content(chunk_size = chunk_size):
184 | 					file.write(data)
185 | 					size += len(data)
186 | 					file.flush()
187 | 					sys.stdout.write('    [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
188 | 					sys.stdout.flush()
189 | 
190 | def run(path, keyword, num):
191 | 	"""
192 | 	运行函数
193 | 	Parameters:
194 | 		path - str 图片保存目录
195 | 		keyword - str 关键词
196 | 		num - int 下载的商店个数
197 | 	Returns:
198 | 		None
199 | 	"""
200 | 	flag = False
201 | 	pages = 1
202 | 	while flag == False:
203 | 		goods_urls = search_goods(keyword, pages)
204 | 		if len(goods_urls) > num:
205 | 			flag = True
206 | 		else:
207 | 			pages += 1
208 | 
209 | 	if keyword not in os.listdir():
210 | 		os.mkdir(keyword)
211 | 	path = os.path.join(path, keyword)
212 | 	for goods_url in goods_urls[:num]:
213 | 		image_urls = goods_images(goods_url)
214 | 		for image_url in image_urls:
215 | 			download_image(path, image_url)
216 | 
217 | if __name__ == '__main__':
218 | 	if len(sys.argv) == 1:
219 | 		sys.argv.append('--help')
220 | 	parser = argparse.ArgumentParser()
221 | 	parser.add_argument('-d', '--dir', help=('store path'), type=str, default=os.path.dirname(__file__))
222 | 	parser.add_argument('-k', '--keyword', required=True, help=('search content'))
223 | 	parser.add_argument('-n', '--num', help=('the number of goods to download images'), type=int, default=1)
224 | 	args = parser.parse_args()
225 | 	run(args.dir, args.keyword, args.num)
226 | 


--------------------------------------------------------------------------------
/douyin.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from bs4 import BeautifulSoup
  3 | from contextlib import closing
  4 | import requests, json, time, re, os, sys, time
  5 | 
  6 | class DouYin(object):
  7 | 	def __init__(self):
  8 | 		"""
  9 | 		抖音App视频下载
 10 | 		"""
 11 | 		#SSL认证
 12 | 		pass
 13 | 
 14 | 	def get_video_urls(self, user_id):
 15 | 		"""
 16 | 		获得视频播放地址
 17 | 		Parameters:
 18 | 			nickname：查询的用户名
 19 | 		Returns:
 20 | 			video_names: 视频名字列表
 21 | 			video_urls: 视频链接列表
 22 | 			aweme_count: 视频数量
 23 | 		"""
 24 | 		video_names = []
 25 | 		video_urls = []
 26 | 		unique_id = ''
 27 | 		while unique_id != user_id:
 28 | 			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
 29 | 			req = requests.get(url = search_url, verify = False)
 30 | 			html = json.loads(req.text)
 31 | 			aweme_count = html['user_list'][0]['user_info']['aweme_count']
 32 | 			uid = html['user_list'][0]['user_info']['uid']
 33 | 			nickname = html['user_list'][0]['user_info']['nickname']
 34 | 			unique_id = html['user_list'][0]['user_info']['unique_id']
 35 | 		user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
 36 | 		req = requests.get(url = user_url, verify = False)
 37 | 		html = json.loads(req.text)
 38 | 		i = 1
 39 | 		for each in html['aweme_list']:
 40 | 			share_desc = each['share_info']['share_desc']
 41 | 			if '抖音-原创音乐短视频社区' == share_desc:
 42 | 				video_names.append(str(i) + '.mp4')
 43 | 				i += 1
 44 | 			else:
 45 | 				video_names.append(share_desc + '.mp4')
 46 | 			video_urls.append(each['share_info']['share_url'])
 47 | 
 48 | 		return video_names, video_urls, nickname
 49 | 
 50 | 	def get_download_url(self, video_url):
 51 | 		"""
 52 | 		获得视频播放地址
 53 | 		Parameters:
 54 | 			video_url：视频播放地址
 55 | 		Returns:
 56 | 			download_url: 视频下载地址
 57 | 		"""
 58 | 		req = requests.get(url = video_url, verify = False)
 59 | 		bf = BeautifulSoup(req.text, 'lxml')
 60 | 		script = bf.find_all('script')[-1]
 61 | 		video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
 62 | 		video_html = json.loads(video_url_js)
 63 | 		download_url = video_html['video']['play_addr']['url_list'][0]
 64 | 		return download_url
 65 | 
 66 | 	def video_downloader(self, video_url, video_name):
 67 | 		"""
 68 | 		视频下载
 69 | 		Parameters:
 70 | 			None
 71 | 		Returns:
 72 | 			None
 73 | 		"""
 74 | 		size = 0
 75 | 		with closing(requests.get(video_url, stream=True, verify = False)) as response:
 76 | 			chunk_size = 1024
 77 | 			content_size = int(response.headers['content-length']) 
 78 | 			if response.status_code == 200:
 79 | 				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
 80 | 
 81 | 				with open(video_name, "wb") as file:  
 82 | 					for data in response.iter_content(chunk_size = chunk_size):
 83 | 						file.write(data)
 84 | 						size += len(data)
 85 | 						file.flush()
 86 | 
 87 | 					sys.stdout.write('    [下载进度]:%.2f%%' % float(size / content_size * 100))
 88 | 					sys.stdout.flush()
 89 | 		time.sleep(1)
 90 | 
 91 | 
 92 | 	def run(self):
 93 | 		"""
 94 | 		运行函数
 95 | 		Parameters:
 96 | 			None
 97 | 		Returns:
 98 | 			None
 99 | 		"""
100 | 		self.hello()
101 | 		# user_id = input('请输入ID(例如13978338):')
102 | 		user_id = 'sm666888'
103 | 		video_names, video_urls, nickname = self.get_video_urls(user_id)
104 | 		if nickname not in os.listdir():
105 | 			os.mkdir(nickname)
106 | 		sys.stdout.write('视频下载中:\n')
107 | 		for num in range(len(video_urls)):
108 | 			print('  %s\n' % video_urls[num])
109 | 			video_url = self.get_download_url(video_urls[num])
110 | 			if '\\' in video_names[num]:
111 | 				video_name = video_names[num].replace('\\', '')
112 | 			elif '/' in video_names[num]:
113 | 				video_name = video_names[num].replace('/', '')
114 | 			else:
115 | 				video_name = video_names[num]
116 | 			self.video_downloader(video_url, os.path.join(nickname, video_name))
117 | 			print('')
118 | 
119 | 	def hello(self):
120 | 		"""
121 | 		打印欢迎界面
122 | 		Parameters:
123 | 			None
124 | 		Returns:
125 | 			None
126 | 		"""
127 | 		print('*' * 100)
128 | 		print('\t\t\t\t抖音App视频下载小助手')
129 | 		print('*' * 100)
130 | 
131 | 		
132 | if __name__ == '__main__':
133 | 	douyin = DouYin()
134 | 	douyin.run()


--------------------------------------------------------------------------------
/douyin_pro.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from splinter.driver.webdriver.chrome import Options, Chrome
  3 | from splinter.browser import Browser
  4 | from contextlib import closing
  5 | import requests, json, time, re, os, sys, time
  6 | from bs4 import BeautifulSoup
  7 | 
  8 | class DouYin(object):
  9 | 	def __init__(self, width = 500, height = 300):
 10 | 		"""
 11 | 		抖音App视频下载
 12 | 		"""
 13 | 		# 无头浏览器
 14 | 		chrome_options = Options()
 15 | 		chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"')
 16 | 		self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True)
 17 | 
 18 | 	def get_video_urls(self, user_id):
 19 | 		"""
 20 | 		获得视频播放地址
 21 | 		Parameters:
 22 | 			user_id：查询的用户ID
 23 | 		Returns:
 24 | 			video_names: 视频名字列表
 25 | 			video_urls: 视频链接列表
 26 | 			nickname: 用户昵称
 27 | 		"""
 28 | 		video_names = []
 29 | 		video_urls = []
 30 | 		unique_id = ''
 31 | 		while unique_id != user_id:
 32 | 			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
 33 | 			req = requests.get(url = search_url, verify = False)
 34 | 			html = json.loads(req.text)
 35 | 			aweme_count = html['user_list'][0]['user_info']['aweme_count']
 36 | 			uid = html['user_list'][0]['user_info']['uid']
 37 | 			nickname = html['user_list'][0]['user_info']['nickname']
 38 | 			unique_id = html['user_list'][0]['user_info']['unique_id']
 39 | 		user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
 40 | 		req = requests.get(url = user_url, verify = False)
 41 | 		html = json.loads(req.text)
 42 | 		i = 1
 43 | 		for each in html['aweme_list']:
 44 | 			share_desc = each['share_info']['share_desc']
 45 | 			if '抖音-原创音乐短视频社区' == share_desc:
 46 | 				video_names.append(str(i) + '.mp4')
 47 | 				i += 1
 48 | 			else:
 49 | 				video_names.append(share_desc + '.mp4')
 50 | 			video_urls.append(each['share_info']['share_url'])
 51 | 
 52 | 		return video_names, video_urls, nickname
 53 | 
 54 | 	def get_download_url(self, video_url):
 55 | 		"""
 56 | 		获得带水印的视频播放地址
 57 | 		Parameters:
 58 | 			video_url：带水印的视频播放地址
 59 | 		Returns:
 60 | 			download_url: 带水印的视频下载地址
 61 | 		"""
 62 | 		req = requests.get(url = video_url, verify = False)
 63 | 		bf = BeautifulSoup(req.text, 'lxml')
 64 | 		script = bf.find_all('script')[-1]
 65 | 		video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
 66 | 		video_html = json.loads(video_url_js)
 67 | 		download_url = video_html['video']['play_addr']['url_list'][0]
 68 | 		return download_url
 69 | 
 70 | 	def video_downloader(self, video_url, video_name, watermark_flag=True):
 71 | 		"""
 72 | 		视频下载
 73 | 		Parameters:
 74 | 			video_url: 带水印的视频地址
 75 | 			video_name: 视频名
 76 | 			watermark_flag: 是否下载不带水印的视频
 77 | 		Returns:
 78 | 			无
 79 | 		"""
 80 | 		size = 0
 81 | 		if watermark_flag == True:
 82 | 			video_url = self.remove_watermark(video_url)
 83 | 		else:
 84 | 			video_url = self.get_download_url(video_url)
 85 | 		with closing(requests.get(video_url, stream=True, verify = False)) as response:
 86 | 			chunk_size = 1024
 87 | 			content_size = int(response.headers['content-length']) 
 88 | 			if response.status_code == 200:
 89 | 				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
 90 | 
 91 | 				with open(video_name, "wb") as file:  
 92 | 					for data in response.iter_content(chunk_size = chunk_size):
 93 | 						file.write(data)
 94 | 						size += len(data)
 95 | 						file.flush()
 96 | 
 97 | 						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
 98 | 						sys.stdout.flush()
 99 | 
100 | 
101 | 	def remove_watermark(self, video_url):
102 | 		"""
103 | 		获得无水印的视频播放地址
104 | 		Parameters:
105 | 			video_url: 带水印的视频地址
106 | 		Returns:
107 | 			无水印的视频下载地址
108 | 		"""
109 | 		self.driver.visit('http://douyin.iiilab.com/')
110 | 		self.driver.find_by_tag('input').fill(video_url)
111 | 		self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
112 | 		html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html
113 | 		bf = BeautifulSoup(html, 'lxml')
114 | 		return bf.find('a').get('href')
115 | 
116 | 	def run(self):
117 | 		"""
118 | 		运行函数
119 | 		Parameters:
120 | 			None
121 | 		Returns:
122 | 			None
123 | 		"""
124 | 		self.hello()
125 | 		user_id = input('请输入ID(例如40103580):')
126 | 		video_names, video_urls, nickname = self.get_video_urls(user_id)
127 | 		if nickname not in os.listdir():
128 | 			os.mkdir(nickname)
129 | 		print('视频下载中:共有%d个作品!\n' % len(video_urls))
130 | 		for num in range(len(video_urls)):
131 | 			print('  解析第%d个视频链接 [%s] 中，请稍后!\n' % (num+1, video_urls[num]))
132 | 			if '\\' in video_names[num]:
133 | 				video_name = video_names[num].replace('\\', '')
134 | 			elif '/' in video_names[num]:
135 | 				video_name = video_names[num].replace('/', '')
136 | 			else:
137 | 				video_name = video_names[num]
138 | 			self.video_downloader(video_urls[num], os.path.join(nickname, video_name))
139 | 			print('\n')
140 | 
141 | 		print('下载完成!')
142 | 
143 | 	def hello(self):
144 | 		"""
145 | 		打印欢迎界面
146 | 		Parameters:
147 | 			None
148 | 		Returns:
149 | 			None
150 | 		"""
151 | 		print('*' * 100)
152 | 		print('\t\t\t\t抖音App视频下载小助手')
153 | 		print('\t\t作者:Jack Cui')
154 | 		print('*' * 100)
155 | 
156 | 
157 | if __name__ == '__main__':
158 | 	douyin = DouYin()
159 | 	douyin.run()
160 | 


--------------------------------------------------------------------------------
/douyin_pro_2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from contextlib import closing
  3 | import requests, json, re, os, sys
  4 | from datetime import datetime, timezone
  5 | 
  6 | class DouYin(object):
  7 | 	def __init__(self, width = 500, height = 300):
  8 | 		"""
  9 | 		抖音App视频下载
 10 | 		"""
 11 | 		self.headers = {
 12 | 			'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 13 | 			'accept-encoding': 'gzip, deflate, br',
 14 | 			'accept-language': 'zh-CN,zh;q=0.9',
 15 | 			'cache-control': 'max-age=0',
 16 | 			'upgrade-insecure-requests': '1',
 17 | 			'user-agent': 'Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; MI 4S Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/9.1.3',
 18 | 		}
 19 | 
 20 | 	def get_video_urls(self, user_id):
 21 | 		"""
 22 | 		获得视频播放地址
 23 | 		Parameters:
 24 | 			user_id：查询的用户ID
 25 | 		Returns:
 26 | 			video_names: 视频名字列表
 27 | 			video_urls: 视频链接列表
 28 | 			nickname: 用户昵称
 29 | 		"""
 30 | 		video_names = []
 31 | 		video_urls = []
 32 | 		share_urls = []
 33 | 		unique_id = ''
 34 | 		while unique_id != user_id:
 35 | 			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
 36 | 			req = requests.get(search_url, headers=self.headers)
 37 | 			html = json.loads(req.text)
 38 | 			aweme_count = 32767 # html['user_list'][0]['user_info']['aweme_count']
 39 | 			uid = html['user_list'][0]['user_info']['uid']
 40 | 			nickname = html['user_list'][0]['user_info']['nickname']
 41 | 			unique_id = html['user_list'][0]['user_info']['unique_id']
 42 | 		user_url = 'https://www.amemv.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
 43 | 		req = requests.get(user_url, headers=self.headers)
 44 | 		html = json.loads(req.text)
 45 | 		for each in html['aweme_list']:
 46 | 			share_desc = each['share_info']['share_desc']
 47 | 			if os.name == 'nt':
 48 | 				for c in r'\/:*?"<>|':
 49 | 					share_desc = share_desc.replace(c, '')
 50 | 			unix_timestamp = each['create_time']
 51 | 			utc_time = datetime.fromtimestamp(unix_timestamp, timezone.utc)
 52 | 			local_time = utc_time.astimezone()
 53 | 			tc = local_time.strftime('%Y-%m-%d-%H-%M-%S')
 54 | 			if share_desc in ['抖音-原创音乐短视频社区', 'TikTok']:
 55 | 				video_names.append(tc + '.mp4')
 56 | 			else:
 57 | 				video_names.append(tc + '-' + share_desc + '.mp4')
 58 | 			share_urls.append(each['share_info']['share_url'])
 59 | 			video_urls.append(each['video']['play_addr']['url_list'][0])
 60 | 
 61 | 		return video_names, video_urls, share_urls, nickname
 62 | 
 63 | 	def get_download_url(self, video_url, watermark_flag):
 64 | 		"""
 65 | 		获得带水印的视频播放地址
 66 | 		Parameters:
 67 | 			video_url：带水印的视频播放地址
 68 | 		Returns:
 69 | 			download_url: 带水印的视频下载地址
 70 | 		"""
 71 | 		# 带水印视频
 72 | 		if watermark_flag == True:
 73 | 			download_url = video_url
 74 | 		# 无水印视频
 75 | 		else:
 76 | 			download_url = video_url.replace('playwm', 'play')
 77 | 
 78 | 		return download_url
 79 | 
 80 | 	def video_downloader(self, video_url, video_name, watermark_flag=False):
 81 | 		"""
 82 | 		视频下载
 83 | 		Parameters:
 84 | 			video_url: 带水印的视频地址
 85 | 			video_name: 视频名
 86 | 			watermark_flag: 是否下载带水印的视频
 87 | 		Returns:
 88 | 			无
 89 | 		"""
 90 | 		size = 0
 91 | 		video_url = self.get_download_url(video_url, watermark_flag=watermark_flag)
 92 | 		with closing(requests.get(video_url, headers=self.headers, stream=True)) as response:
 93 | 			chunk_size = 1024
 94 | 			content_size = int(response.headers['content-length'])
 95 | 			if response.status_code == 200:
 96 | 				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
 97 | 
 98 | 				with open(video_name, 'wb') as file:
 99 | 					for data in response.iter_content(chunk_size = chunk_size):
100 | 						file.write(data)
101 | 						size += len(data)
102 | 						file.flush()
103 | 
104 | 						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
105 | 						sys.stdout.flush()
106 | 
107 | 	def run(self):
108 | 		"""
109 | 		运行函数
110 | 		Parameters:
111 | 			None
112 | 		Returns:
113 | 			None
114 | 		"""
115 | 		self.hello()
116 | 		user_id = input('请输入ID(例如145651081):')
117 | 		watermark_flag = int(input('是否下载带水印的视频(0-否,1-是):'))
118 | 		video_names, video_urls, share_urls, nickname = self.get_video_urls(user_id)
119 | 		if nickname not in os.listdir():
120 | 			os.mkdir(nickname)
121 | 		print('视频下载中:共有%d个作品!\n' % len(video_urls))
122 | 		for num in range(len(video_urls)):
123 | 			print('  解析第%d个视频链接 [%s] 中，请稍后!\n' % (num + 1, share_urls[num]))
124 | 			if '\\' in video_names[num]:
125 | 				video_name = video_names[num].replace('\\', '')
126 | 			elif '/' in video_names[num]:
127 | 				video_name = video_names[num].replace('/', '')
128 | 			else:
129 | 				video_name = video_names[num]
130 | 			if os.path.isfile(os.path.join(nickname, video_name)):
131 | 				print('视频已存在')
132 | 			else:
133 | 				self.video_downloader(video_urls[num], os.path.join(nickname, video_name), watermark_flag)
134 | 			print('\n')
135 | 		print('下载完成!')
136 | 
137 | 	def hello(self):
138 | 		"""
139 | 		打印欢迎界面
140 | 		Parameters:
141 | 			None
142 | 		Returns:
143 | 			None
144 | 		"""
145 | 		print('*' * 100)
146 | 		print('\t\t\t\t抖音App视频下载小助手')
147 | 		print('\t\t作者:Jack Cui')
148 | 		print('*' * 100)
149 | 
150 | 
151 | if __name__ == '__main__':
152 | 	douyin = DouYin()
153 | 	douyin.run()
154 | 


--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
 1 | #-*- coding: UTF-8 -*-
 2 | import requests  
 3 | from contextlib import closing
 4 | 
 5 | class ProgressBar(object):  
 6 |     def __init__(self, title, count=0.0, run_status=None, fin_status=None, total=100.0, unit='', sep='/', chunk_size=1.0):  
 7 |         super(ProgressBar, self).__init__()  
 8 |         self.info = "[%s] %s %.2f %s %s %.2f %s"  
 9 |         self.title = title  
10 |         self.total = total  
11 |         self.count = count  
12 |         self.chunk_size = chunk_size  
13 |         self.status = run_status or ""  
14 |         self.fin_status = fin_status or " " * len(self.status)  
15 |         self.unit = unit  
16 |         self.seq = sep  
17 |   
18 |     def __get_info(self):  
19 |         #[名称] 状态 进度 单位 分割线 总数 单位  
20 |         _info = self.info % (self.title, self.status, self.count/self.chunk_size, self.unit, self.seq, self.total/self.chunk_size, self.unit)  
21 |         return _info  
22 |   
23 |     def refresh(self, count = 1, status = None):  
24 |         self.count += count  
25 |         self.status = status or self.status  
26 |         end_str = "\r"  
27 |         if self.count >= self.total:  
28 |             end_str = '\n'  
29 |             self.status = status or self.fin_status  
30 |         print(self.__get_info(), end=end_str, )  
31 | 
32 | 
33 | if __name__ == '__main__':
34 | 	#url = 'http://www.demongan.com/source/game/二十四点.zip'
35 | 	#filename = '二十四点.zip'
36 | 	print('*' * 100)
37 | 	print('\t\t\t\t欢迎使用文件下载小助手')
38 | 	print('作者:Jack-Cui\n博客:http://blog.csdn.net/c406495762')
39 | 	print('*' * 100)
40 | 	url  = input('请输入需要下载的文件链接:\n')
41 | 	filename = url.split('/')[-1]
42 | 	with closing(requests.get(url, stream=True)) as response:  
43 | 		chunk_size = 1024  
44 | 		content_size = int(response.headers['content-length'])  
45 | 		if response.status_code == 200:
46 | 			print('文件大小:%0.2f KB' % (content_size / chunk_size))
47 | 			progress = ProgressBar("%s下载进度" % filename
48 | 			            , total = content_size  
49 | 			            , unit = "KB"  
50 | 			            , chunk_size = chunk_size  
51 | 			            , run_status = "正在下载"  
52 | 			            , fin_status = "下载完成")  
53 | 
54 | 			with open(filename, "wb") as file:  
55 | 			        for data in response.iter_content(chunk_size=chunk_size):  
56 | 			            file.write(data)  
57 | 			            progress.refresh(count=len(data))  
58 | 		else:
59 | 			print('链接异常')


--------------------------------------------------------------------------------
/financical.py:
--------------------------------------------------------------------------------
  1 | #-*- coding:UTF-8 -*-
  2 | import sys
  3 | import pymysql
  4 | import requests
  5 | import json
  6 | import re
  7 | from bs4 import BeautifulSoup
  8 | 
  9 | """
 10 | 类说明:获取财务数据
 11 | 
 12 | Author:
 13 | 	Jack Cui
 14 | Blog:
 15 | 	http://blog.csdn.net/c406495762
 16 | Zhihu:
 17 | 	https://www.zhihu.com/people/Jack--Cui/
 18 | Modify:
 19 | 	2017-08-31
 20 | """
 21 | class FinancialData():
 22 | 
 23 | 	def __init__(self):
 24 | 		#服务器域名
 25 | 		self.server = 'http://quotes.money.163.com/'
 26 | 		self.cwnb = 'http://quotes.money.163.com/hkstock/cwsj_'
 27 | 		#主要财务指标
 28 | 		self.cwzb_dict = {'EPS':'基本每股收益','EPS_DILUTED':'摊薄每股收益','GROSS_MARGIN':'毛利率',
 29 | 		'CAPITAL_ADEQUACY':'资本充足率','LOANS_DEPOSITS':'贷款回报率','ROTA':'总资产收益率',
 30 | 		'ROEQUITY':'净资产收益率','CURRENT_RATIO':'流动比率','QUICK_RATIO':'速动比率',
 31 | 		'ROLOANS':'存贷比','INVENTORY_TURNOVER':'存货周转率','GENERAL_ADMIN_RATIO':'管理费用比率',
 32 | 		'TOTAL_ASSET2TURNOVER':'资产周转率','FINCOSTS_GROSSPROFIT':'财务费用比率','TURNOVER_CASH':'销售现金比率','YEAREND_DATE':'报表日期'}
 33 | 		#利润表
 34 | 		self.lrb_dict = {'TURNOVER':'总营收','OPER_PROFIT':'经营利润','PBT':'除税前利润',
 35 | 		'NET_PROF':'净利润','EPS':'每股基本盈利','DPS':'每股派息',
 36 | 		'INCOME_INTEREST':'利息收益','INCOME_NETTRADING':'交易收益','INCOME_NETFEE':'费用收益','YEAREND_DATE':'报表日期'}
 37 | 		#资产负债表
 38 | 		self.fzb_dict = {
 39 | 			'FIX_ASS':'固定资产','CURR_ASS':'流动资产','CURR_LIAB':'流动负债',
 40 | 			'INVENTORY':'存款','CASH':'现金及银行存结','OTHER_ASS':'其他资产',
 41 | 			'TOTAL_ASS':'总资产','TOTAL_LIAB':'总负债','EQUITY':'股东权益',
 42 | 			'CASH_SHORTTERMFUND':'库存现金及短期资金','DEPOSITS_FROM_CUSTOMER':'客户存款',
 43 | 			'FINANCIALASSET_SALE':'可供出售之证券','LOAN_TO_BANK':'银行同业存款及贷款',
 44 | 			'DERIVATIVES_LIABILITIES':'金融负债','DERIVATIVES_ASSET':'金融资产','YEAREND_DATE':'报表日期'}
 45 | 		#现金流表
 46 | 		self.llb_dict = {
 47 | 			'CF_NCF_OPERACT':'经营活动产生的现金流','CF_INT_REC':'已收利息','CF_INT_PAID':'已付利息',
 48 | 			'CF_INT_REC':'已收股息','CF_DIV_PAID':'已派股息','CF_INV':'投资活动产生现金流',
 49 | 			'CF_FIN_ACT':'融资活动产生现金流','CF_BEG':'期初现金及现金等价物','CF_CHANGE_CSH':'现金及现金等价物净增加额',
 50 | 			'CF_END':'期末现金及现金等价物','CF_EXCH':'汇率变动影响','YEAREND_DATE':'报表日期'}
 51 | 		#总表
 52 | 		self.table_dict = {'cwzb':self.cwzb_dict,'lrb':self.lrb_dict,'fzb':self.fzb_dict,'llb':self.llb_dict}
 53 | 		#请求头
 54 | 		self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 55 | 			'Accept-Encoding': 'gzip, deflate',
 56 | 			'Accept-Language': 'zh-CN,zh;q=0.8',
 57 | 			'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36'}
 58 | 	
 59 | 	"""
 60 | 	函数说明:获取股票页面信息
 61 | 
 62 | 	Author:
 63 | 		Jack Cui
 64 | 	Parameters:
 65 | 	    url - 股票财务数据界面地址
 66 | 	Returns:
 67 | 	    name - 股票名
 68 | 	    table_name_list - 财务报表名称
 69 | 	    table_date_list - 财务报表年限
 70 | 	    url_list - 财务报表查询连接
 71 | 	Blog:
 72 | 		http://blog.csdn.net/c406495762
 73 | 	Zhihu:
 74 | 		https://www.zhihu.com/people/Jack--Cui/
 75 | 	Modify:
 76 | 		2017-08-31
 77 | 	"""
 78 | 	def get_informations(self, url):
 79 | 		req = requests.get(url = url, headers = self.headers)
 80 | 		req.encoding = 'utf-8'
 81 | 		html = req.text
 82 | 		page_bf = BeautifulSoup(html, 'lxml')
 83 | 		#股票名称，股票代码
 84 | 		name = page_bf.find_all('span', class_ = 'name')[0].string
 85 | 		# code = page_bf.find_all('span', class_ = 'code')[0].string
 86 | 		# code = re.findall('\d+',code)[0]
 87 | 
 88 | 		#存储各个表名的列表
 89 | 		table_name_list = []
 90 | 		table_date_list = []
 91 | 		each_date_list = []
 92 | 		url_list = []
 93 | 		#表名和表时间
 94 | 		table_name = page_bf.find_all('div', class_ = 'titlebar3')
 95 | 		for each_table_name in table_name:
 96 | 			#表名
 97 | 			table_name_list.append(each_table_name.span.string)
 98 | 			#表时间
 99 | 			for each_table_date in each_table_name.div.find_all('select', id = re.compile('.+1$')):
100 | 				url_list.append(re.findall('(\w+)1',each_table_date.get('id'))[0])
101 | 				for each_date in each_table_date.find_all('option'):
102 | 					each_date_list.append(each_date.string)
103 | 				table_date_list.append(each_date_list)
104 | 				each_date_list = []
105 | 		return name,table_name_list,table_date_list,url_list
106 | 
107 | 	"""
108 | 	函数说明:财务报表入库
109 | 
110 | 	Author:
111 | 		Jack Cui
112 | 	Parameters:
113 | 	    name - 股票名
114 | 	    table_name_list - 财务报表名称
115 | 	    table_date_list - 财务报表年限
116 | 	    url_list - 财务报表查询连接
117 | 	Returns:
118 | 		无
119 | 	Blog:
120 | 		http://blog.csdn.net/c406495762
121 | 	Zhihu:
122 | 		https://www.zhihu.com/people/Jack--Cui/
123 | 	Modify:
124 | 		2017-08-31
125 | 	"""
126 | 	def insert_tables(self, name, table_name_list,table_date_list, url_list):
127 | 		#打开数据库连接:host-连接主机地址,port-端口号,user-用户名,passwd-用户密码,db-数据库名,charset-编码
128 | 		conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='yourpasswd',db='financialdata',charset='utf8')
129 | 		#使用cursor()方法获取操作游标
130 | 		cursor = conn.cursor()  
131 | 		#插入信息
132 | 		for i in range(len(table_name_list)):
133 | 			sys.stdout.write('    [正在下载       ]    %s' % table_name_list[i] + '\r')
134 | 			#获取数据地址
135 | 			url = self.server + 'hk/service/cwsj_service.php?symbol={}&start={}&end={}&type={}&unit=yuan'.format(code,table_date_list[i][-1],table_date_list[i][0],url_list[i])
136 | 			req_table = requests.get(url = url, headers = self.headers)
137 | 			table = req_table.json()
138 | 			nums = len(table)
139 | 			value_dict = {}
140 | 			for num in range(nums):
141 | 				sys.stdout.write('    [正在下载 %.2f%%]   ' % (((num+1) / nums)*100) + '\r')
142 | 				sys.stdout.flush()
143 | 				value_dict['股票名'] = name
144 | 				value_dict['股票代码'] = code
145 | 				for key, value in table[i].items():
146 | 					if key in self.table_dict[url_list[i]]:
147 | 						value_dict[self.table_dict[url_list[i]][key]] = value
148 | 
149 | 				sql1 = """
150 | 				INSERT INTO %s (`股票名`,`股票代码`,`报表日期`) VALUES ('%s','%s','%s')""" % (url_list[i],value_dict['股票名'],value_dict['股票代码'],value_dict['报表日期'])
151 | 				try:
152 | 					cursor.execute(sql1)
153 | 					# 执行sql语句
154 | 					conn.commit()
155 | 				except:
156 | 					# 发生错误时回滚
157 | 					conn.rollback()
158 | 
159 | 				for key, value in value_dict.items():
160 | 					if key not in ['股票名','股票代码','报表日期']:
161 | 						sql2 = """
162 | 						UPDATE %s SET %s='%s' WHERE `股票名`='%s' AND `报表日期`='%s'""" % (url_list[i],key,value,value_dict['股票名'],value_dict['报表日期'])
163 | 						try:
164 | 							cursor.execute(sql2)
165 | 							# 执行sql语句
166 | 							conn.commit()
167 | 						except:
168 | 							# 发生错误时回滚
169 | 							conn.rollback()
170 | 				value_dict = {}
171 | 			print('    [下载完成 ')
172 | 
173 | 		# 关闭数据库连接
174 | 		cursor.close()  
175 | 		conn.close()
176 | 
177 | if __name__ == '__main__':
178 | 	print('*' * 100)
179 | 	print('\t\t\t\t\t财务数据下载助手\n')
180 | 	print('作者:Jack-Cui\n')
181 | 	print('About Me:\n')
182 | 	print('  知乎:https://www.zhihu.com/people/Jack--Cui')
183 | 	print('  Blog:http://blog.csdn.net/c406495762')
184 | 	print('  Gihub:https://github.com/Jack-Cherish\n')
185 | 	print('*' * 100)
186 | 	fd = FinancialData()
187 | 	#上市股票地址
188 | 	code = input('请输入股票代码:')
189 | 
190 | 	name,table_name_list,table_date_list,url_list = fd.get_informations(fd.cwnb + code + '.html')
191 | 	print('\n  %s:(%s)财务数据下载中！\n' % (name,code))
192 | 	fd.insert_tables(name,table_name_list,table_date_list,url_list)
193 | 	print('\n  %s:(%s)财务数据下载完成！' % (name,code))


--------------------------------------------------------------------------------
/geetest.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8 -*-
  2 | import random
  3 | import re
  4 | import time
  5 | from urllib.request import urlretrieve
  6 | 
  7 | from bs4 import BeautifulSoup
  8 | 
  9 | import PIL.Image as image
 10 | from selenium import webdriver
 11 | from selenium.webdriver import ActionChains
 12 | from selenium.webdriver.common.by import By
 13 | from selenium.webdriver.support import expected_conditions as EC
 14 | from selenium.webdriver.support.ui import WebDriverWait
 15 | 
 16 | 
 17 | class Crack():
 18 | 	def __init__(self,keyword):
 19 | 		self.url = 'http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml'
 20 | 		self.browser = webdriver.Chrome('D:\\chromedriver.exe')
 21 | 		self.wait = WebDriverWait(self.browser, 100)
 22 | 		self.keyword = keyword
 23 | 		self.BORDER = 6
 24 | 
 25 | 	def open(self):
 26 | 		"""
 27 | 		打开浏览器,并输入查询内容
 28 | 		"""
 29 | 		self.browser.get(self.url)
 30 | 		keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx')))
 31 | 		bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn')))
 32 | 		keyword.send_keys(self.keyword)
 33 | 		bowton.click()
 34 | 
 35 | 	def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg.jpg'):
 36 | 		"""
 37 | 		获取验证码图片
 38 | 		:return: 图片的location信息
 39 | 		"""
 40 | 		bg = []
 41 | 		fullgb = []
 42 | 		while bg == [] and fullgb == []:
 43 | 			bf = BeautifulSoup(self.browser.page_source, 'lxml')
 44 | 			bg = bf.find_all('div', class_ = 'gt_cut_bg_slice')
 45 | 			fullgb = bf.find_all('div', class_ = 'gt_cut_fullbg_slice')
 46 | 		bg_url = re.findall('url\(\"(.*)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg')
 47 | 		fullgb_url = re.findall('url\(\"(.*)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg')
 48 | 		bg_location_list = []
 49 | 		fullbg_location_list = []
 50 | 		for each_bg in bg:
 51 | 			location = {}
 52 | 			location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][0])
 53 | 			location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][1])
 54 | 			bg_location_list.append(location)
 55 | 		for each_fullgb in fullgb:
 56 | 			location = {}
 57 | 			location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][0])
 58 | 			location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][1])
 59 | 			fullbg_location_list.append(location)
 60 | 
 61 | 		urlretrieve(url = bg_url, filename = bg_filename)
 62 | 		print('缺口图片下载完成')
 63 | 		urlretrieve(url = fullgb_url, filename = fullbg_filename)
 64 | 		print('背景图片下载完成')
 65 | 		return bg_location_list, fullbg_location_list
 66 | 
 67 | 	def get_merge_image(self, filename, location_list):
 68 | 		"""
 69 | 		根据位置对图片进行合并还原
 70 | 		:filename:图片
 71 | 		:location_list:图片位置
 72 | 		"""
 73 | 		im = image.open(filename)
 74 | 		new_im = image.new('RGB', (260,116))
 75 | 		im_list_upper=[]
 76 | 		im_list_down=[]
 77 | 
 78 | 		for location in location_list:
 79 | 			if location['y'] == -58:
 80 | 				im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x']) + 10, 166)))
 81 | 			if location['y'] == 0:
 82 | 				im_list_down.append(im.crop((abs(location['x']),0,abs(location['x']) + 10, 58)))
 83 | 
 84 | 		new_im = image.new('RGB', (260,116))
 85 | 
 86 | 		x_offset = 0
 87 | 		for im in im_list_upper:
 88 | 			new_im.paste(im, (x_offset,0))
 89 | 			x_offset += im.size[0]
 90 | 
 91 | 		x_offset = 0
 92 | 		for im in im_list_down:
 93 | 			new_im.paste(im, (x_offset,58))
 94 | 			x_offset += im.size[0]
 95 | 
 96 | 		new_im.save(filename)
 97 | 
 98 | 		return new_im
 99 | 
100 | 	def get_merge_image(self, filename, location_list):
101 | 		"""
102 | 		根据位置对图片进行合并还原
103 | 		:filename:图片
104 | 		:location_list:图片位置
105 | 		"""
106 | 		im = image.open(filename)
107 | 		new_im = image.new('RGB', (260,116))
108 | 		im_list_upper=[]
109 | 		im_list_down=[]
110 | 
111 | 		for location in location_list:
112 | 			if location['y']==-58:
113 | 				im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))
114 | 			if location['y']==0:
115 | 				im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
116 | 
117 | 		new_im = image.new('RGB', (260,116))
118 | 
119 | 		x_offset = 0
120 | 		for im in im_list_upper:
121 | 			new_im.paste(im, (x_offset,0))
122 | 			x_offset += im.size[0]
123 | 
124 | 		x_offset = 0
125 | 		for im in im_list_down:
126 | 			new_im.paste(im, (x_offset,58))
127 | 			x_offset += im.size[0]
128 | 
129 | 		new_im.save(filename)
130 | 
131 | 		return new_im
132 | 
133 | 	def is_pixel_equal(self, img1, img2, x, y):
134 | 		"""
135 | 		判断两个像素是否相同
136 | 		:param image1: 图片1
137 | 		:param image2: 图片2
138 | 		:param x: 位置x
139 | 		:param y: 位置y
140 | 		:return: 像素是否相同
141 | 		"""
142 | 		# 取两个图片的像素点
143 | 		pix1 = img1.load()[x, y]
144 | 		pix2 = img2.load()[x, y]
145 | 		threshold = 60
146 | 		if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) and abs(pix1[2] - pix2[2] < threshold)):
147 | 			return True
148 | 		else:
149 | 			return False
150 | 
151 | 	def get_gap(self, img1, img2):
152 | 		"""
153 | 		获取缺口偏移量
154 | 		:param img1: 不带缺口图片
155 | 		:param img2: 带缺口图片
156 | 		:return:
157 | 		"""
158 | 		left = 43
159 | 		for i in range(left, img1.size[0]):
160 | 			for j in range(img1.size[1]):
161 | 				if not self.is_pixel_equal(img1, img2, i, j):
162 | 					left = i
163 | 					return left
164 | 		return left	
165 | 
166 | 	def get_track(self, distance):
167 | 		"""
168 | 		根据偏移量获取移动轨迹
169 | 		:param distance: 偏移量
170 | 		:return: 移动轨迹
171 | 		"""
172 | 		# 移动轨迹
173 | 		track = []
174 | 		# 当前位移
175 | 		current = 0
176 | 		# 减速阈值
177 | 		mid = distance * 4 / 5
178 | 		# 计算间隔
179 | 		t = 0.2
180 | 		# 初速度
181 | 		v = 0
182 |         
183 | 		while current < distance:
184 | 			if current < mid:
185 | 				# 加速度为正2
186 | 				a = 2
187 | 			else:	
188 | 				# 加速度为负3
189 | 				a = -3
190 | 			# 初速度v0
191 | 			v0 = v
192 | 			# 当前速度v = v0 + at
193 | 			v = v0 + a * t
194 | 			# 移动距离x = v0t + 1/2 * a * t^2
195 | 			move = v0 * t + 1 / 2 * a * t * t
196 | 			# 当前位移
197 | 			current += move
198 | 			# 加入轨迹
199 | 			track.append(round(move))
200 | 		return track
201 | 
202 | 	def get_slider(self):
203 | 		"""
204 | 		获取滑块
205 | 		:return: 滑块对象
206 | 		"""
207 | 		while True:
208 | 			try:
209 | 				slider = self.browser.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']")
210 | 				break
211 | 			except:
212 | 				time.sleep(0.5)
213 | 		return slider
214 | 
215 | 	def move_to_gap(self, slider, track):
216 | 		"""
217 | 		拖动滑块到缺口处
218 | 		:param slider: 滑块
219 | 		:param track: 轨迹
220 | 		:return:
221 | 		"""
222 | 		ActionChains(self.browser).click_and_hold(slider).perform()
223 | 		while track:
224 | 			x = random.choice(track)
225 | 			ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
226 | 			track.remove(x)
227 | 		time.sleep(0.5)
228 | 		ActionChains(self.browser).release().perform()
229 | 
230 | 	def crack(self):
231 | 		# 打开浏览器
232 | 		self.open()
233 | 
234 | 		# 保存的图片名字
235 | 		bg_filename = 'bg.jpg'
236 | 		fullbg_filename = 'fullbg.jpg'
237 | 
238 | 		# 获取图片
239 | 		bg_location_list, fullbg_location_list = self.get_images(bg_filename, fullbg_filename)
240 | 
241 | 		# 根据位置对图片进行合并还原
242 | 		bg_img = self.get_merge_image(bg_filename, bg_location_list)
243 | 		fullbg_img = self.get_merge_image(fullbg_filename, fullbg_location_list)
244 | 
245 |         # 获取缺口位置
246 | 		gap = self.get_gap(fullbg_img, bg_img)
247 | 		print('缺口位置', gap)
248 | 
249 | 		track = self.get_track(gap-self.BORDER)
250 | 		print('滑动滑块')
251 | 		print(track)
252 | 
253 | 		# # 点按呼出缺口
254 | 		# slider = self.get_slider()
255 | 		# # 拖动滑块到缺口处
256 | 		# self.move_to_gap(slider, track)
257 | 
258 | if __name__ == '__main__':
259 | 	print('开始验证')
260 | 	crack = Crack(u'中国移动')
261 | 	crack.crack()
262 | 	print('验证成功')
263 | 


--------------------------------------------------------------------------------
/hero.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: UTF-8 -*-
  2 | from urllib.request import urlretrieve
  3 | import requests
  4 | import os
  5 | 
  6 | """
  7 | 函数说明:下载《英雄联盟盒子》中的英雄图片
  8 | 
  9 | Parameters:
 10 |     url - GET请求地址，通过Fiddler抓包获取
 11 |     header - headers信息
 12 | Returns:
 13 |     无
 14 | Author:
 15 |     Jack Cui
 16 | Blog:
 17 |     http://blog.csdn.net/c406495762
 18 | Modify:
 19 |     2017-08-07
 20 | """
 21 | def hero_imgs_download(url, header):
 22 |     req = requests.get(url = url, headers = header).json()
 23 |     hero_num = len(req['list'])
 24 |     print('一共有%d个英雄' % hero_num)
 25 |     hero_images_path = 'hero_images'
 26 |     for each_hero in req['list']:
 27 |         hero_photo_url = each_hero['cover']
 28 |         hero_name = each_hero['name'] + '.jpg'
 29 |         filename = hero_images_path + '/' + hero_name
 30 |         if hero_images_path not in os.listdir():
 31 |             os.makedirs(hero_images_path)
 32 |         urlretrieve(url = hero_photo_url, filename = filename)
 33 | 
 34 | """
 35 | 函数说明:打印所有英雄的名字和ID
 36 | 
 37 | Parameters:
 38 |     url - GET请求地址，通过Fiddler抓包获取
 39 |     header - headers信息
 40 | Returns:
 41 |     无
 42 | Author:
 43 |     Jack Cui
 44 | Blog:
 45 |     http://blog.csdn.net/c406495762
 46 | Modify:
 47 |     2017-08-07
 48 | """
 49 | def hero_list(url, header):
 50 | 	print('*' * 100)
 51 | 	print('\t\t\t\t欢迎使用《王者荣耀》出装下助手！')
 52 | 	print('*' * 100)
 53 | 	req = requests.get(url = url, headers = header).json()
 54 | 	flag = 0
 55 | 	for each_hero in req['list']:
 56 | 		flag += 1
 57 | 		print('%s的ID为:%-7s' % (each_hero['name'], each_hero['hero_id']), end = '\t\t')
 58 | 		if flag == 3:
 59 | 			print('\n', end = '')
 60 | 			flag = 0
 61 | 
 62 | """
 63 | 函数说明:根据equip_id查询武器名字和价格
 64 | 
 65 | Parameters:
 66 |     equip_id - 武器的ID
 67 |     weapon_info - 存储所有武器的字典
 68 | Returns:
 69 |     weapon_name - 武器的名字
 70 |     weapon_price - 武器的价格
 71 | Author:
 72 |     Jack Cui
 73 | Blog:
 74 |     http://blog.csdn.net/c406495762
 75 | Modify:
 76 |     2017-08-07
 77 | """
 78 | def seek_weapon(equip_id, weapon_info):
 79 | 	for each_weapon in weapon_info:
 80 | 		if each_weapon['equip_id'] == str(equip_id):
 81 | 			weapon_name = each_weapon['name']
 82 | 			weapon_price = each_weapon['price']
 83 | 			return weapon_name, weapon_price
 84 | 
 85 | 
 86 | """
 87 | 函数说明:获取并打印出装信息
 88 | 
 89 | Parameters:
 90 |     url - GET请求地址，通过Fiddler抓包获取
 91 |     header - headers信息
 92 |     weapon_info - 存储所有武器的字典
 93 | Returns:
 94 | 	无
 95 | Author:
 96 |     Jack Cui
 97 | Blog:
 98 |     http://blog.csdn.net/c406495762
 99 | Modify:
100 |     2017-08-07
101 | """
102 | def hero_info(url, header, weapon_info):
103 | 	req = requests.get(url = url, headers = header).json()
104 | 	print('\n历史上的%s:\n    %s' % (req['info']['name'], req['info']['history_intro']))
105 | 	for each_equip_choice in req['info']['equip_choice']:
106 | 		print('\n%s:\n   %s' % (each_equip_choice['title'], each_equip_choice['description']))
107 | 		total_price = 0
108 | 		flag = 0
109 | 		for each_weapon in each_equip_choice['list']:
110 | 			flag += 1
111 | 			weapon_name, weapon_price = seek_weapon(each_weapon['equip_id'], weapon_info)
112 | 			print('%s:%s' % (weapon_name, weapon_price), end = '\t')
113 | 			if flag == 3:
114 | 				print('\n', end = '')
115 | 				flag = 0
116 | 			total_price += int(weapon_price)
117 | 		print('神装套件价格共计:%d' % total_price)
118 | 
119 | 
120 | """
121 | 函数说明:获取武器信息
122 | 
123 | Parameters:
124 |     url - GET请求地址，通过Fiddler抓包获取
125 |     header - headers信息
126 | Returns:
127 |     weapon_info_dict - 武器信息
128 | Author:
129 |     Jack Cui
130 | Blog:
131 |     http://blog.csdn.net/c406495762
132 | Modify:
133 |     2017-08-07
134 | """
135 | def hero_weapon(url, header):
136 |     req = requests.get(url = url, headers = header).json()
137 |     weapon_info_dict = req['list']
138 |     return weapon_info_dict
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     headers = {'Accept-Charset': 'UTF-8',
143 |             'Accept-Encoding': 'gzip,deflate',
144 |             'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; MI 5 MIUI/V8.1.6.0.MAACNDI)',
145 |             'X-Requested-With': 'XMLHttpRequest',
146 |             'Content-type': 'application/x-www-form-urlencoded',
147 |             'Connection': 'Keep-Alive',
148 |             'Host': 'gamehelper.gm825.com'}
149 |     weapon_url = "http://gamehelper.gm825.com/wzry/equip/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8"
150 |     heros_url = "http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8"
151 |     hero_list(heros_url, headers)
152 |     hero_id = input("请输入要查询的英雄ID:")
153 |     hero_url = "http://gamehelper.gm825.com/wzry/hero/detail?hero_id={}&channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8".format(hero_id)
154 |     weapon_info_dict = hero_weapon(weapon_url, headers)
155 |     hero_info(hero_url, headers, weapon_info_dict)


--------------------------------------------------------------------------------
/one_hour_spider/biqukan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:UTF-8 -*-
 2 | from bs4 import BeautifulSoup
 3 | import requests, sys
 4 | 
 5 | """
 6 | 类说明:下载《笔趣看》网小说《一念永恒》
 7 | Parameters:
 8 | 	无
 9 | Returns:
10 | 	无
11 | Modify:
12 | 	2017-09-13
13 | """
14 | class downloader(object):
15 | 
16 | 	def __init__(self):
17 | 		self.server = 'http://www.biqukan.com/'
18 | 		self.target = 'http://www.biqukan.com/1_1094/'
19 | 		self.names = []			#存放章节名
20 | 		self.urls = []			#存放章节链接
21 | 		self.nums = 0			#章节数
22 | 
23 | 	"""
24 | 	函数说明:获取下载链接
25 | 	Parameters:
26 | 		无
27 | 	Returns:
28 | 		无
29 | 	Modify:
30 | 		2017-09-13
31 | 	"""
32 | 	def get_download_url(self):
33 | 	    req = requests.get(url = self.target)
34 | 	    html = req.text
35 | 	    div_bf = BeautifulSoup(html)
36 | 	    div = div_bf.find_all('div', class_ = 'listmain')
37 | 	    a_bf = BeautifulSoup(str(div[0]))
38 | 	    a = a_bf.find_all('a')
39 | 	    self.nums = len(a[15:])								#剔除不必要的章节，并统计章节数
40 | 	    for each in a[15:]:
41 | 	    	self.names.append(each.string)
42 | 	    	self.urls.append(self.server + each.get('href'))
43 | 
44 | 	"""
45 | 	函数说明:获取章节内容
46 | 	Parameters:
47 | 		target - 下载连接(string)
48 | 	Returns:
49 | 		texts - 章节内容(string)
50 | 	Modify:
51 | 		2017-09-13
52 | 	"""
53 | 	def get_contents(self, target):
54 | 		req = requests.get(url = target)
55 | 		html = req.text
56 | 		bf = BeautifulSoup(html)
57 | 		texts = bf.find_all('div', class_ = 'showtxt')
58 | 		texts = texts[0].text.replace('\xa0'*8,'\n\n')
59 | 		return texts
60 | 
61 | 	"""
62 | 	函数说明:将爬取的文章内容写入文件
63 | 	Parameters:
64 | 		name - 章节名称(string)
65 | 		path - 当前路径下,小说保存名称(string)
66 | 		text - 章节内容(string)
67 | 	Returns:
68 | 		无
69 | 	Modify:
70 | 		2017-09-13
71 | 	"""
72 | 	def writer(self, name, path, text):
73 | 		write_flag = True
74 | 		with open(path, 'a', encoding='utf-8') as f:
75 | 			f.write(name + '\n')
76 | 			f.writelines(text)
77 | 			f.write('\n\n')
78 | 
79 | if __name__ == "__main__":
80 | 	dl = downloader()
81 | 	dl.get_download_url()
82 | 	print('《一年永恒》开始下载：')
83 | 	for i in range(dl.nums):
84 | 		dl.writer(dl.names[i], '一念永恒.txt', dl.get_contents(dl.urls[i]))
85 | 		sys.stdout.write("  已下载:%.3f%%" %  float(i/dl.nums*100) + '\r')
86 | 		sys.stdout.flush()
87 | 	print('《一年永恒》下载完成')
88 | 


--------------------------------------------------------------------------------
/one_hour_spider/unsplash.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:UTF-8 -*-
 2 | import requests, json, time, sys
 3 | from contextlib import closing
 4 | 
 5 | class get_photos(object):
 6 | 
 7 | 	def __init__(self):
 8 | 		self.photos_id = []
 9 | 		self.download_server = 'https://unsplash.com/photos/xxx/download?force=trues'
10 | 		self.target = 'http://unsplash.com/napi/feeds/home'
11 | 		self.headers = {'authorization':'Client-ID c94869b36aa272dd62dfaeefed769d4115fb3189a9d1ec88ed457207747be626'}
12 | 
13 | 	"""
14 | 	函数说明:获取图片ID
15 | 	Parameters:
16 | 		无
17 | 	Returns:
18 | 		无
19 | 	Modify:
20 | 		2017-09-13
21 | 	"""	
22 | 	def get_ids(self):
23 | 		req = requests.get(url=self.target, headers=self.headers, verify=False)
24 | 		html = json.loads(req.text)
25 | 		next_page = html['next_page']
26 | 		for each in html['photos']:
27 | 			self.photos_id.append(each['id'])
28 | 		time.sleep(1)
29 | 		for i in range(5):
30 | 			req = requests.get(url=next_page, headers=self.headers, verify=False)
31 | 			html = json.loads(req.text)
32 | 			next_page = html['next_page']
33 | 			for each in html['photos']:
34 | 				self.photos_id.append(each['id'])
35 | 			time.sleep(1)
36 | 
37 | 
38 | 	"""
39 | 	函数说明:图片下载
40 | 	Parameters:
41 | 		无
42 | 	Returns:
43 | 		无
44 | 	Modify:
45 | 		2017-09-13
46 | 	"""	
47 | 	def download(self, photo_id, filename):
48 | 		headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'}
49 | 		target = self.download_server.replace('xxx', photo_id)
50 | 		with closing(requests.get(url=target, stream=True, verify = False, headers = self.headers)) as r:
51 | 			with open('%d.jpg' % filename, 'ab+') as f:
52 | 				for chunk in r.iter_content(chunk_size = 1024):
53 | 					if chunk:
54 | 						f.write(chunk)
55 | 						f.flush()
56 | 
57 | if __name__ == '__main__':
58 | 	gp = get_photos()
59 | 	print('获取图片连接中:')
60 | 	gp.get_ids()
61 | 	print('图片下载中:')
62 | 	for i in range(len(gp.photos_id)):
63 | 		print('  正在下载第%d张图片' % (i+1))
64 | 		gp.download(gp.photos_id[i], (i+1))


--------------------------------------------------------------------------------
/one_hour_spider/vidoe_downloader.py:
--------------------------------------------------------------------------------
 1 | #-*- coding:UTF-8 -*-
 2 | import requests,re, json, sys
 3 | from bs4 import BeautifulSoup
 4 | from urllib import request
 5 | 
 6 | class video_downloader():
 7 | 	def __init__(self, url):
 8 | 		self.server = 'http://api.xfsub.com'
 9 | 		self.api = 'http://api.xfsub.com/xfsub_api/?url='
10 | 		self.get_url_api = 'http://api.xfsub.com/xfsub_api/url.php'
11 | 		self.url = url.split('#')[0]
12 | 		self.headers = {'Referer': 'http://api.xfsub.com/xfsub_api/?url=%s?qqdrsign=055a4' % self.url}
13 | 		self.target = self.api + self.url
14 | 		self.s = requests.session()
15 | 
16 | 	"""
17 | 	函数说明:获取key、time、url等参数
18 | 	Parameters:
19 | 		无
20 | 	Returns:
21 | 		无
22 | 	Modify:
23 | 		2017-09-18
24 | 	"""
25 | 	def get_key(self):
26 | 		req = self.s.get(url=self.target)
27 | 		req.encoding = 'utf-8'
28 | 		self.info = json.loads(re.findall('"url.php",\ (.+),', req.text)[0])	#使用正则表达式匹配结果，将匹配的结果存入info变量中
29 | 
30 | 	"""
31 | 	函数说明:获取视频地址
32 | 	Parameters:
33 | 		无
34 | 	Returns:
35 | 		video_url - 视频存放地址
36 | 	Modify:
37 | 		2017-09-18
38 | 	"""
39 | 	def get_url(self):
40 | 		data = {'time':self.info['time'],
41 | 			'key':self.info['key'],
42 | 			'url':self.info['url'],
43 | 			'type':''}
44 | 		req = self.s.post(url=self.get_url_api,data=data, headers=self.headers)
45 | 		url = self.server + json.loads(req.text)['url']
46 | 		req = self.s.get(url=url, headers=self.headers)
47 | 		bf = BeautifulSoup(req.text,'xml')										#因为文件是xml格式的，所以要进行xml解析。
48 | 		video_url = bf.find('file').string										#匹配到视频地址
49 | 		return video_url
50 | 
51 | 	"""
52 | 	函数说明:回调函数，打印下载进度
53 | 	Parameters:
54 | 		a b c - 返回信息
55 | 	Returns:
56 | 		无
57 | 	Modify:
58 | 		2017-09-18
59 | 	"""
60 | 	def Schedule(self, a, b, c):
61 | 		per = 100.0*a*b/c
62 | 		if per > 100 :
63 | 			per = 1
64 | 		sys.stdout.write("  " + "%.2f%% 已经下载的大小:%ld 文件大小:%ld" % (per,a*b,c) + '\r')
65 | 		sys.stdout.flush()
66 | 
67 | 	"""
68 | 	函数说明:视频下载
69 | 	Parameters:
70 | 		url - 视频地址
71 | 		filename - 视频名字
72 | 	Returns:
73 | 		无
74 | 	Modify:
75 | 		2017-09-18
76 | 	"""
77 | 	def video_download(self, url, filename):
78 | 		request.urlretrieve(url=url,filename=filename,reporthook=self.Schedule)
79 | 
80 | 
81 | if __name__ == '__main__':
82 | 	url = 'http://www.iqiyi.com/v_19rr7qhfg0.html#vfrm=19-9-0-1'
83 | 	vd = video_downloader(url)
84 | 	filename = '加勒比海盗5'
85 | 	print('%s下载中:' % filename)
86 | 	vd.get_key()
87 | 	video_url = vd.get_url()
88 | 	print('  获取地址成功:%s' % video_url)
89 | 	vd.video_download(video_url, filename+'.mp4')
90 | 	print('\n下载完成！')
91 | 


--------------------------------------------------------------------------------
/shuaia.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:UTF-8 -*-
 2 | from bs4 import BeautifulSoup
 3 | from urllib.request import urlretrieve
 4 | import requests
 5 | import os
 6 | import time
 7 | 
 8 | if __name__ == '__main__':
 9 | 	list_url = []
10 | 	for num in range(1,3):
11 | 		if num == 1:
12 | 			url = 'http://www.shuaia.net/index.html'
13 | 		else:
14 | 			url = 'http://www.shuaia.net/index_%d.html' % num
15 | 		headers = {
16 | 				"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
17 | 		}
18 | 		req = requests.get(url = url,headers = headers)
19 | 		req.encoding = 'utf-8'
20 | 		html = req.text
21 | 		bf = BeautifulSoup(html, 'lxml')
22 | 		targets_url = bf.find_all(class_='item-img')
23 | 		
24 | 		for each in targets_url:
25 | 			list_url.append(each.img.get('alt') + '=' + each.get('href'))
26 | 
27 | 	print('连接采集完成')
28 | 
29 | 	for each_img in list_url:
30 | 		img_info = each_img.split('=')
31 | 		target_url = img_info[1]
32 | 		filename = img_info[0] + '.jpg'
33 | 		print('下载：' + filename)
34 | 		headers = {
35 | 			"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
36 | 		}
37 | 		img_req = requests.get(url = target_url,headers = headers)
38 | 		img_req.encoding = 'utf-8'
39 | 		img_html = img_req.text
40 | 		img_bf_1 = BeautifulSoup(img_html, 'lxml')
41 | 		img_url = img_bf_1.find_all('div', class_='wr-single-content-list')
42 | 		img_bf_2 = BeautifulSoup(str(img_url), 'lxml')
43 | 		img_url = 'http://www.shuaia.net' + img_bf_2.div.img.get('src')
44 | 		if 'images' not in os.listdir():
45 | 			os.makedirs('images')
46 | 		urlretrieve(url = img_url,filename = 'images/' + filename)
47 | 		time.sleep(1)
48 | 
49 | 	print('下载完成！')


--------------------------------------------------------------------------------
/video_downloader/Images/bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/Images/bg.png


--------------------------------------------------------------------------------
/video_downloader/Images/qrcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/Images/qrcode.png


--------------------------------------------------------------------------------
/video_downloader/MyQR/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/__init__.py


--------------------------------------------------------------------------------
/video_downloader/MyQR/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/__pycache__/__init__.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/__pycache__/myqr.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/__pycache__/myqr.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/__pycache__/myqr.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/__pycache__/myqr.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/__pycache__/terminal.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/__pycache__/terminal.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/ECC.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from MyQR.mylibs.constant import GP_list, ecc_num_per_block, lindex, po2, log
 4 |  
 5 | #ecc: Error Correction Codewords
 6 | def encode(ver, ecl, data_codewords):
 7 |     en = ecc_num_per_block[ver-1][lindex[ecl]]
 8 |     ecc = []
 9 |     for dc in data_codewords:
10 |         ecc.append(get_ecc(dc, en))
11 |     return ecc
12 | 
13 | def get_ecc(dc, ecc_num):
14 |     gp = GP_list[ecc_num]
15 |     remainder = dc
16 |     for i in range(len(dc)):
17 |         remainder = divide(remainder, *gp)
18 |     return remainder
19 |     
20 | def divide(MP, *GP):
21 |     if MP[0]:
22 |         GP = list(GP)
23 |         for i in range(len(GP)):
24 |             GP[i] += log[MP[0]]
25 |             if GP[i] > 255:
26 |                 GP[i] %= 255
27 |             GP[i] = po2[GP[i]]
28 |         return XOR(GP, *MP)
29 |     else:
30 |         return XOR([0]*len(GP), *MP)
31 |     
32 |     
33 | def XOR(GP, *MP):
34 |     MP = list(MP)
35 |     a = len(MP) - len(GP)
36 |     if a < 0:
37 |         MP += [0] * (-a)
38 |     elif a > 0:
39 |         GP += [0] * a
40 |     
41 |     remainder = []
42 |     for i in range(1, len(MP)):
43 |         remainder.append(MP[i]^GP[i])
44 |     return remainder


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/ECC.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/ECC.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/ECC.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/ECC.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/__init__.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/__init__.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/constant.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/constant.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/constant.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/constant.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/data.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/data.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/data.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/data.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/draw.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/draw.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/draw.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/draw.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/matrix.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/matrix.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/matrix.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/matrix.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/structure.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/structure.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/structure.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/structure.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/theqrmodule.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/theqrmodule.cpython-34.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/__pycache__/theqrmodule.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sukersuker/python--spider/0e743c1503212cff9d2a800b6c5df344d81362b7/video_downloader/MyQR/mylibs/__pycache__/theqrmodule.cpython-35.pyc


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/constant.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | ***** for data.py *******
  4 | """
  5 | # character capacities
  6 | # {level1: [version1(mode1,mode2,mode3,mode4), version2(..,..,..,..), ...],
  7 | #   level2: [version1(mode1,mode2,mode3,mode4), version2(..,..,..,..),...],
  8 | #   ...}
  9 | char_cap = {
 10 |         'L': [(41, 25, 17, 10), (77, 47, 32, 20), (127, 77, 53, 32), (187, 114, 78, 48), (255, 154, 106, 65), (322, 195, 134, 82), (370, 224, 154, 95), (461, 279, 192, 118), (552, 335, 230, 141), (652, 395, 271, 167), (772, 468, 321, 198), (883, 535, 367, 226), (1022, 619, 425, 262), (1101, 667, 458, 282), (1250, 758, 520, 320), (1408, 854, 586, 361), (1548, 938, 644, 397), (1725, 1046, 718, 442), (1903, 1153, 792, 488), (2061, 1249, 858, 528), (2232, 1352, 929, 572), (2409, 1460, 1003, 618), (2620, 1588, 1091, 672), (2812, 1704, 1171, 721), (3057, 1853, 1273, 784), (3283, 1990, 1367, 842), (3517, 2132, 1465, 902), (3669, 2223, 1528, 940), (3909, 2369, 1628, 1002), (4158, 2520, 1732, 1066), (4417, 2677, 1840, 1132), (4686, 2840, 1952, 1201), (4965, 3009, 2068, 1273), (5253, 3183, 2188, 1347), (5529, 3351, 2303, 1417), (5836, 3537, 2431, 1496), (6153, 3729, 2563, 1577), (6479, 3927, 2699, 1661), (6743, 4087, 2809, 1729), (7089, 4296, 2953, 1817)],
 11 |         'M': [(34, 20, 14, 8), (63, 38, 26, 16), (101, 61, 42, 26), (149, 90, 62, 38), (202, 122, 84, 52), (255, 154, 106, 65), (293, 178, 122, 75), (365, 221, 152, 93), (432, 262, 180, 111), (513, 311, 213, 131), (604, 366, 251, 155), (691, 419, 287, 177), (796, 483, 331, 204), (871, 528, 362, 223), (991, 600, 412, 254), (1082, 656, 450, 277), (1212, 734, 504, 310), (1346, 816, 560, 345), (1500, 909, 624, 384), (1600, 970, 666, 410), (1708, 1035, 711, 438), (1872, 1134, 779, 480), (2059, 1248, 857, 528), (2188, 1326, 911, 561), (2395, 1451, 997, 614), (2544, 1542, 1059, 652), (2701, 1637, 1125, 692), (2857, 1732, 1190, 732), (3035, 1839, 1264, 778), (3289, 1994, 1370, 843), (3486, 2113, 1452, 894), (3693, 2238, 1538, 947), (3909, 2369, 1628, 1002), (4134, 2506, 1722, 1060), (4343, 2632, 1809, 1113), (4588, 2780, 1911, 1176), (4775, 2894, 1989, 1224), (5039, 3054, 2099, 1292), (5313, 3220, 2213, 1362), (5596, 3391, 2331, 1435)],
 12 |         'Q': [(27, 16, 11, 7), (48, 29, 20, 12), (77, 47, 32, 20), (111, 67, 46, 28), (144, 87, 60, 37), (178, 108, 74, 45), (207, 125, 86, 53), (259, 157, 108, 66), (312, 189, 130, 80), (364, 221, 151, 93), (427, 259, 177, 109), (489, 296, 203, 125), (580, 352, 241, 149), (621, 376, 258, 159), (703, 426, 292, 180), (775, 470, 322, 198), (876, 531, 364, 224), (948, 574, 394, 243), (1063, 644, 442, 272), (1159, 702, 482, 297), (1224, 742, 509, 314), (1358, 823, 565, 348), (1468, 890, 611, 376), (1588, 963, 661, 407), (1718, 1041, 715, 440), (1804, 1094, 751, 462), (1933, 1172, 805, 496), (2085, 1263, 868, 534), (2181, 1322, 908, 559), (2358, 1429, 982, 604), (2473, 1499, 1030, 634), (2670, 1618, 1112, 684), (2805, 1700, 1168, 719), (2949, 1787, 1228, 756), (3081, 1867, 1283, 790), (3244, 1966, 1351, 832), (3417, 2071, 1423, 876), (3599, 2181, 1499, 923), (3791, 2298, 1579, 972), (3993, 2420, 1663, 1024)],
 13 |         'H': [(17, 10, 7, 4), (34, 20, 14, 8), (58, 35, 24, 15), (82, 50, 34, 21), (106, 64, 44, 27), (139, 84, 58, 36), (154, 93, 64, 39), (202, 122, 84, 52), (235, 143, 98, 60), (288, 174, 119, 74), (331, 200, 137, 85), (374, 227, 155, 96), (427, 259, 177, 109), (468, 283, 194, 120), (530, 321, 220, 136), (602, 365, 250, 154), (674, 408, 280, 173), (746, 452, 310, 191), (813, 493, 338, 208), (919, 557, 382, 235), (969, 587, 403, 248), (1056, 640, 439, 270), (1108, 672, 461, 284), (1228, 744, 511, 315), (1286, 779, 535, 330), (1425, 864, 593, 365), (1501, 910, 625, 385), (1581, 958, 658, 405), (1677, 1016, 698, 430), (1782, 1080, 742, 457), (1897, 1150, 790, 486), (2022, 1226, 842, 518), (2157, 1307, 898, 553), (2301, 1394, 958, 590), (2361, 1431, 983, 605), (2524, 1530, 1051, 647), (2625, 1591, 1093, 673), (2735, 1658, 1139, 701), (2927, 1774, 1219, 750), (3057, 1852, 1273, 784)]
 14 |         }
 15 | 
 16 | mindex = {'numeric':0, 'alphanumeric':1, 'byte':2, 'kanji':3}
 17 | 
 18 | # [
 19 | # version1[level1,level2,level3,level4], 
 20 | # version2[..,..,..,..],
 21 | # ...
 22 | #   ]
 23 | required_bytes = [
 24 |         [19, 16, 13, 9], [34, 28, 22, 16], [55, 44, 34, 26], [80, 64, 48, 36], [108, 86, 62, 46], [136, 108, 76, 60], [156, 124, 88, 66], [194, 154, 110, 86], [232, 182, 132, 100], [274, 216, 154, 122], [324, 254, 180, 140], [370, 290, 206, 158], [428, 334, 244, 180], [461, 365, 261, 197], [523, 415, 295, 223], [589, 453, 325, 253], [647, 507, 367, 283], [721, 563, 397, 313], [795, 627, 445, 341], [861, 669, 485, 385], [932, 714, 512, 406], [1006, 782, 568, 442], [1094, 860, 614, 464], [1174, 914, 664, 514], [1276, 1000, 718, 538], [1370, 1062, 754, 596], [1468, 1128, 808, 628], [1531, 1193, 871, 661], [1631, 1267, 911, 701], [1735, 1373, 985, 745], [1843, 1455, 1033, 793], [1955, 1541, 1115, 845], [2071, 1631, 1171, 901], [2191, 1725, 1231, 961], [2306, 1812, 1286, 986], [2434, 1914, 1354, 1054], [2566, 1992, 1426, 1096], [2702, 2102, 1502, 1142], [2812, 2216, 1582, 1222], [2956, 2334, 1666, 1276]
 25 |         ]
 26 | 
 27 | num_list = '0123456789'
 28 | alphanum_list = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:'
 29 | 
 30 | # [
 31 | # version1[
 32 | #       level1(num_of_group_1_blocks, DC_per_group_1_block, num_of_group_2_blocks, DC_per_group_2_block),
 33 | #       level2(..,..,..,..),
 34 | #       level3(..,..,..,..),
 35 | #       level4(..,..,..,..)
 36 | #       ], 
 37 | # version2[level1(..), level2(..), level3(..), level4(..)],
 38 | # ...
 39 | #   ]
 40 | grouping_list = [
 41 |     [(1, 19, 0, 0), (1, 16, 0, 0), (1, 13, 0, 0), (1, 9, 0, 0)], [(1, 34, 0, 0), (1, 28, 0, 0), (1, 22, 0, 0), (1, 16, 0, 0)], [(1, 55, 0, 0), (1, 44, 0, 0), (2, 17, 0, 0), (2, 13, 0, 0)], [(1, 80, 0, 0), (2, 32, 0, 0), (2, 24, 0, 0), (4, 9, 0, 0)], [(1, 108, 0, 0), (2, 43, 0, 0), (2, 15, 2, 16), (2, 11, 2, 12)], [(2, 68, 0, 0), (4, 27, 0, 0), (4, 19, 0, 0), (4, 15, 0, 0)], [(2, 78, 0, 0), (4, 31, 0, 0), (2, 14, 4, 15), (4, 13, 1, 14)], [(2, 97, 0, 0), (2, 38, 2, 39), (4, 18, 2, 19), (4, 14, 2, 15)], [(2, 116, 0, 0), (3, 36, 2, 37), (4, 16, 4, 17), (4, 12, 4, 13)], [(2, 68, 2, 69), (4, 43, 1, 44), (6, 19, 2, 20), (6, 15, 2, 16)], [(4, 81, 0, 0), (1, 50, 4, 51), (4, 22, 4, 23), (3, 12, 8, 13)], [(2, 92, 2, 93), (6, 36, 2, 37), (4, 20, 6, 21), (7, 14, 4, 15)], [(4, 107, 0, 0), (8, 37, 1, 38), (8, 20, 4, 21), (12, 11, 4, 12)], [(3, 115, 1, 116), (4, 40, 5, 41), (11, 16, 5, 17), (11, 12, 5, 13)], [(5, 87, 1, 88), (5, 41, 5, 42), (5, 24, 7, 25), (11, 12, 7, 13)], [(5, 98, 1, 99), (7, 45, 3, 46), (15, 19, 2, 20), (3, 15, 13, 16)], [(1, 107, 5, 108), (10, 46, 1, 47), (1, 22, 15, 23), (2, 14, 17, 15)], [(5, 120, 1, 121), (9, 43, 4, 44), (17, 22, 1, 23), (2, 14, 19, 15)], [(3, 113, 4, 114), (3, 44, 11, 45), (17, 21, 4, 22), (9, 13, 16, 14)], [(3, 107, 5, 108), (3, 41, 13, 42), (15, 24, 5, 25), (15, 15, 10, 16)], [(4, 116, 4, 117), (17, 42, 0, 0), (17, 22, 6, 23), (19, 16, 6, 17)], [(2, 111, 7, 112), (17, 46, 0, 0), (7, 24, 16, 25), (34, 13, 0, 0)], [(4, 121, 5, 122), (4, 47, 14, 48), (11, 24, 14, 25), (16, 15, 14, 16)], [(6, 117, 4, 118), (6, 45, 14, 46), (11, 24, 16, 25), (30, 16, 2, 17)], [(8, 106, 4, 107), (8, 47, 13, 48), (7, 24, 22, 25), (22, 15, 13, 16)], [(10, 114, 2, 115), (19, 46, 4, 47), (28, 22, 6, 23), (33, 16, 4, 17)], [(8, 122, 4, 123), (22, 45, 3, 46), (8, 23, 26, 24), (12, 15, 28, 16)], [(3, 117, 10, 118), (3, 45, 23, 46), (4, 24, 31, 25), (11, 15, 31, 16)], [(7, 116, 7, 117), (21, 45, 7, 46), (1, 23, 37, 24), (19, 15, 26, 16)], [(5, 115, 10, 116), (19, 47, 10, 48), (15, 24, 25, 25), (23, 15, 25, 16)], [(13, 115, 3, 116), (2, 46, 29, 47), (42, 24, 1, 25), (23, 15, 28, 16)], [(17, 115, 0, 0), (10, 46, 23, 47), (10, 24, 35, 25), (19, 15, 35, 16)], [(17, 115, 1, 116), (14, 46, 21, 47), (29, 24, 19, 25), (11, 15, 46, 16)], [(13, 115, 6, 116), (14, 46, 23, 47), (44, 24, 7, 25), (59, 16, 1, 17)], [(12, 121, 7, 122), (12, 47, 26, 48), (39, 24, 14, 25), (22, 15, 41, 16)], [(6, 121, 14, 122), (6, 47, 34, 48), (46, 24, 10, 25), (2, 15, 64, 16)], [(17, 122, 4, 123), (29, 46, 14, 47), (49, 24, 10, 25), (24, 15, 46, 16)], [(4, 122, 18, 123), (13, 46, 32, 47), (48, 24, 14, 25), (42, 15, 32, 16)], [(20, 117, 4, 118), (40, 47, 7, 48), (43, 24, 22, 25), (10, 15, 67, 16)], [(19, 118, 6, 119), (18, 47, 31, 48), (34, 24, 34, 25), (20, 15, 61, 16)]
 42 |     ]
 43 | 
 44 | mode_indicator = {'numeric': '0001', 'alphanumeric': '0010', 'byte': '0100', 'kanji': '1000'}
 45 | 
 46 | 
 47 | 
 48 | """
 49 | ******  for ECC.py  *******
 50 | """
 51 | #GP: Generator Polynomial, MP: Message Polynomial
 52 | GP_list = {
 53 |         7: [0, 87, 229, 146, 149, 238, 102, 21],
 54 |         10: [0, 251, 67, 46, 61, 118, 70, 64, 94, 32, 45],
 55 |         13: [0, 74, 152, 176, 100, 86, 100, 106, 104, 130, 218, 206, 140, 78],
 56 |         15: [0, 8, 183, 61, 91, 202, 37, 51, 58, 58, 237, 140, 124, 5, 99, 105],
 57 |         16: [0, 120, 104, 107, 109, 102, 161, 76, 3, 91, 191, 147, 169, 182, 194, 225, 120],
 58 |         17: [0, 43, 139, 206, 78, 43, 239, 123, 206, 214, 147, 24, 99, 150, 39, 243, 163, 136],
 59 |         18: [0, 215, 234, 158, 94, 184, 97, 118, 170, 79, 187, 152, 148, 252, 179, 5, 98, 96, 153],
 60 |         20: [0, 17, 60, 79, 50, 61, 163, 26, 187, 202, 180, 221, 225, 83, 239, 156, 164, 212, 212, 188, 190],
 61 |         22: [0, 210, 171, 247, 242, 93, 230, 14, 109, 221, 53, 200, 74, 8, 172, 98, 80, 219, 134, 160, 105, 165, 231],
 62 |         24: [0, 229, 121, 135, 48, 211, 117, 251, 126, 159, 180, 169, 152, 192, 226, 228, 218, 111, 0, 117, 232, 87, 96, 227, 21],
 63 |         26: [0, 173, 125, 158, 2, 103, 182, 118, 17, 145, 201, 111, 28, 165, 53, 161, 21, 245, 142, 13, 102, 48, 227, 153, 145, 218, 70],
 64 |         28: [0, 168, 223, 200, 104, 224, 234, 108, 180, 110, 190, 195, 147, 205, 27, 232, 201, 21, 43, 245, 87, 42, 195, 212, 119, 242, 37, 9, 123],
 65 |         30: [0, 41, 173, 145, 152, 216, 31, 179, 182, 50, 48, 110, 86, 239, 96, 222, 125, 42, 173, 226, 193, 224, 130, 156, 37, 251, 216, 238, 40, 192, 180]
 66 |         }
 67 | 
 68 | # Error Correction Codewords per block
 69 | # [version1(level1,level2,level3,level4),
 70 | #  version2(..,..,..,..),
 71 | #   ....]
 72 | ecc_num_per_block = [
 73 |     (7, 10, 13, 17), (10, 16, 22, 28), (15, 26, 18, 22), (20, 18, 26, 16), (26, 24, 18, 22), (18, 16, 24, 28), (20, 18, 18, 26), (24, 22, 22, 26), (30, 22, 20, 24), (18, 26, 24, 28), (20, 30, 28, 24), (24, 22, 26, 28), (26, 22, 24, 22), (30, 24, 20, 24), (22, 24, 30, 24), (24, 28, 24, 30), (28, 28, 28, 28), (30, 26, 28, 28), (28, 26, 26, 26), (28, 26, 30, 28), (28, 26, 28, 30), (28, 28, 30, 24), (30, 28, 30, 30), (30, 28, 30, 30), (26, 28, 30, 30), (28, 28, 28, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30)
 74 |     ]
 75 |         
 76 | 
 77 | # powers of 2 list  
 78 | po2 = [
 79 |     1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1
 80 |     ]
 81 |     
 82 | # log list
 83 | log = [
 84 |     None, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175
 85 |     ]
 86 | 
 87 | """
 88 | ******  for data.py + ECC.py + structure.py + matrix.py  *******
 89 | """
 90 | lindex = {'L':0, 'M':1, 'Q':2, 'H':3}
 91 |     
 92 | """
 93 | ******  for structure.py  *******
 94 | """    
 95 | required_remainder_bits = (0,7,7,7,7,7,0,0,0,0,0,0,0,3,3,3,3,3,3,3,4,4,4,4,4,4,4,3,3,3,3,3,3,3,0,0,0,0,0,0)
 96 | 
 97 | # [
 98 | # version1[
 99 | #       level1(num_of_group_1_blocks, DC_per_group_1_block, num_of_group_2_blocks, DC_per_group_2_block),
100 | #       level2(..,..,..,..),
101 | #       level3(..,..,..,..),
102 | #       level4(..,..,..,..)
103 | #       ], 
104 | # version2[level1(..), level2(..), level3(..), level4(..)],
105 | # ...
106 | #   ]
107 | grouping_list = [
108 |     [(1, 19, 0, 0), (1, 16, 0, 0), (1, 13, 0, 0), (1, 9, 0, 0)], [(1, 34, 0, 0), (1, 28, 0, 0), (1, 22, 0, 0), (1, 16, 0, 0)], [(1, 55, 0, 0), (1, 44, 0, 0), (2, 17, 0, 0), (2, 13, 0, 0)], [(1, 80, 0, 0), (2, 32, 0, 0), (2, 24, 0, 0), (4, 9, 0, 0)], [(1, 108, 0, 0), (2, 43, 0, 0), (2, 15, 2, 16), (2, 11, 2, 12)], [(2, 68, 0, 0), (4, 27, 0, 0), (4, 19, 0, 0), (4, 15, 0, 0)], [(2, 78, 0, 0), (4, 31, 0, 0), (2, 14, 4, 15), (4, 13, 1, 14)], [(2, 97, 0, 0), (2, 38, 2, 39), (4, 18, 2, 19), (4, 14, 2, 15)], [(2, 116, 0, 0), (3, 36, 2, 37), (4, 16, 4, 17), (4, 12, 4, 13)], [(2, 68, 2, 69), (4, 43, 1, 44), (6, 19, 2, 20), (6, 15, 2, 16)], [(4, 81, 0, 0), (1, 50, 4, 51), (4, 22, 4, 23), (3, 12, 8, 13)], [(2, 92, 2, 93), (6, 36, 2, 37), (4, 20, 6, 21), (7, 14, 4, 15)], [(4, 107, 0, 0), (8, 37, 1, 38), (8, 20, 4, 21), (12, 11, 4, 12)], [(3, 115, 1, 116), (4, 40, 5, 41), (11, 16, 5, 17), (11, 12, 5, 13)], [(5, 87, 1, 88), (5, 41, 5, 42), (5, 24, 7, 25), (11, 12, 7, 13)], [(5, 98, 1, 99), (7, 45, 3, 46), (15, 19, 2, 20), (3, 15, 13, 16)], [(1, 107, 5, 108), (10, 46, 1, 47), (1, 22, 15, 23), (2, 14, 17, 15)], [(5, 120, 1, 121), (9, 43, 4, 44), (17, 22, 1, 23), (2, 14, 19, 15)], [(3, 113, 4, 114), (3, 44, 11, 45), (17, 21, 4, 22), (9, 13, 16, 14)], [(3, 107, 5, 108), (3, 41, 13, 42), (15, 24, 5, 25), (15, 15, 10, 16)], [(4, 116, 4, 117), (17, 42, 0, 0), (17, 22, 6, 23), (19, 16, 6, 17)], [(2, 111, 7, 112), (17, 46, 0, 0), (7, 24, 16, 25), (34, 13, 0, 0)], [(4, 121, 5, 122), (4, 47, 14, 48), (11, 24, 14, 25), (16, 15, 14, 16)], [(6, 117, 4, 118), (6, 45, 14, 46), (11, 24, 16, 25), (30, 16, 2, 17)], [(8, 106, 4, 107), (8, 47, 13, 48), (7, 24, 22, 25), (22, 15, 13, 16)], [(10, 114, 2, 115), (19, 46, 4, 47), (28, 22, 6, 23), (33, 16, 4, 17)], [(8, 122, 4, 123), (22, 45, 3, 46), (8, 23, 26, 24), (12, 15, 28, 16)], [(3, 117, 10, 118), (3, 45, 23, 46), (4, 24, 31, 25), (11, 15, 31, 16)], [(7, 116, 7, 117), (21, 45, 7, 46), (1, 23, 37, 24), (19, 15, 26, 16)], [(5, 115, 10, 116), (19, 47, 10, 48), (15, 24, 25, 25), (23, 15, 25, 16)], [(13, 115, 3, 116), (2, 46, 29, 47), (42, 24, 1, 25), (23, 15, 28, 16)], [(17, 115, 0, 0), (10, 46, 23, 47), (10, 24, 35, 25), (19, 15, 35, 16)], [(17, 115, 1, 116), (14, 46, 21, 47), (29, 24, 19, 25), (11, 15, 46, 16)], [(13, 115, 6, 116), (14, 46, 23, 47), (44, 24, 7, 25), (59, 16, 1, 17)], [(12, 121, 7, 122), (12, 47, 26, 48), (39, 24, 14, 25), (22, 15, 41, 16)], [(6, 121, 14, 122), (6, 47, 34, 48), (46, 24, 10, 25), (2, 15, 64, 16)], [(17, 122, 4, 123), (29, 46, 14, 47), (49, 24, 10, 25), (24, 15, 46, 16)], [(4, 122, 18, 123), (13, 46, 32, 47), (48, 24, 14, 25), (42, 15, 32, 16)], [(20, 117, 4, 118), (40, 47, 7, 48), (43, 24, 22, 25), (10, 15, 67, 16)], [(19, 118, 6, 119), (18, 47, 31, 48), (34, 24, 34, 25), (20, 15, 61, 16)]
109 |     ]
110 | 
111 | 
112 | 
113 | """
114 | ******  for matrix.py  *******
115 | """
116 | # Alignment Pattern Locations
117 | alig_location = [
118 |     (6, 18), (6, 22), (6, 26), (6, 30), (6, 34), (6, 22, 38), (6, 24, 42), (6, 26, 46), (6, 28, 50), (6, 30, 54), (6, 32, 58), (6, 34, 62), (6, 26, 46, 66), (6, 26, 48, 70), (6, 26, 50, 74), (6, 30, 54, 78), (6, 30, 56, 82), (6, 30, 58, 86), (6, 34, 62, 90), (6, 28, 50, 72, 94), (6, 26, 50, 74, 98), (6, 30, 54, 78, 102), (6, 28, 54, 80, 106), (6, 32, 58, 84, 110), (6, 30, 58, 86, 114), (6, 34, 62, 90, 118), (6, 26, 50, 74, 98, 122), (6, 30, 54, 78, 102, 126), (6, 26, 52, 78, 104, 130), (6, 30, 56, 82, 108, 134), (6, 34, 60, 86, 112, 138), (6, 30, 58, 86, 114, 142), (6, 34, 62, 90, 118, 146), (6, 30, 54, 78, 102, 126, 150), (6, 24, 50, 76, 102, 128, 154), (6, 28, 54, 80, 106, 132, 158), (6, 32, 58, 84, 110, 136, 162), (6, 26, 54, 82, 110, 138, 166), (6, 30, 58, 86, 114, 142, 170)
119 |     ]
120 | 
121 | # List of all Format Information Strings
122 | # [
123 | #   level1[mask_pattern0, mask_pattern1, mask_...3,...], 
124 | #   level2[...], 
125 | #   level3[...], 
126 | #   level4[...]
127 | #   ]    
128 | format_info_str = [
129 |     ['111011111000100', '111001011110011', '111110110101010', '111100010011101', '110011000101111', '110001100011000', '110110001000001', '110100101110110'], ['101010000010010', '101000100100101', '101111001111100', '101101101001011', '100010111111001', '100000011001110', '100111110010111', '100101010100000'], ['011010101011111', '011000001101000', '011111100110001', '011101000000110', '010010010110100', '010000110000011', '010111011011010', '010101111101101'], ['001011010001001', '001001110111110', '001110011100111', '001100111010000', '000011101100010', '000001001010101', '000110100001100', '000100000111011']
130 |     ]
131 | 
132 | # Version Information Strings
133 | version_info_str = [
134 |     '000111110010010100', '001000010110111100', '001001101010011001', '001010010011010011', '001011101111110110', '001100011101100010', '001101100001000111', '001110011000001101', '001111100100101000', '010000101101111000', '010001010001011101', '010010101000010111', '010011010100110010', '010100100110100110', '010101011010000011', '010110100011001001', '010111011111101100', '011000111011000100', '011001000111100001', '011010111110101011', '011011000010001110', '011100110000011010', '011101001100111111', '011110110101110101', '011111001001010000', '100000100111010101', '100001011011110000', '100010100010111010', '100011011110011111', '100100101100001011', '100101010000101110', '100110101001100100', '100111010101000001', '101000110001101001'
135 |     ]
136 | 


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from MyQR.mylibs.constant import char_cap, required_bytes, mindex, lindex, num_list, alphanum_list, grouping_list, mode_indicator
  4 |        
  5 | # ecl: Error Correction Level(L,M,Q,H)
  6 | def encode(ver, ecl, str):
  7 |     mode_encoding = {
  8 |             'numeric': numeric_encoding,
  9 |             'alphanumeric': alphanumeric_encoding,
 10 |             'byte': byte_encoding,
 11 |             'kanji': kanji_encoding
 12 |             }
 13 |           
 14 |     ver, mode = analyse(ver, ecl, str)
 15 |     
 16 |     # print('line 16: mode:', mode)
 17 |     
 18 |     code = mode_indicator[mode] + get_cci(ver, mode, str) + mode_encoding[mode](str)
 19 |     
 20 |     # Add a Terminator
 21 |     rqbits = 8 * required_bytes[ver-1][lindex[ecl]]
 22 |     b = rqbits - len(code)
 23 |     code += '0000' if b >= 4 else '0' * b
 24 |     
 25 |     # Make the Length a Multiple of 8
 26 |     while len(code) % 8 != 0:
 27 |         code += '0'
 28 |     
 29 |     # Add Pad Bytes if the String is Still too Short
 30 |     while len(code) < rqbits:    
 31 |         code += '1110110000010001' if rqbits - len(code) >= 16 else '11101100'
 32 |         
 33 |     data_code = [code[i:i+8] for i in range(len(code)) if i%8 == 0]
 34 |     data_code = [int(i,2) for i in data_code]
 35 | 
 36 |     g = grouping_list[ver-1][lindex[ecl]]
 37 |     data_codewords, i = [], 0
 38 |     for n in range(g[0]):
 39 |         data_codewords.append(data_code[i:i+g[1]])
 40 |         i += g[1]
 41 |     for n in range(g[2]):
 42 |         data_codewords.append(data_code[i:i+g[3]])
 43 |         i += g[3]
 44 |     
 45 |     return ver, data_codewords
 46 |     
 47 | def analyse(ver, ecl, str):
 48 |     if all(i in num_list for i in str):
 49 |         mode = 'numeric'
 50 |     elif all(i in alphanum_list for i in str):
 51 |         mode = 'alphanumeric'
 52 |     else:
 53 |         mode = 'byte'
 54 |     
 55 |     m = mindex[mode]
 56 |     l = len(str)
 57 |     for i in range(40):
 58 |         if char_cap[ecl][i][m] > l:
 59 |             ver = i + 1 if i+1 > ver else ver
 60 |             break
 61 |  
 62 |     return ver, mode
 63 | 
 64 | def numeric_encoding(str):   
 65 |     str_list = [str[i:i+3] for i in range(0,len(str),3)]
 66 |     code = ''
 67 |     for i in str_list:
 68 |         rqbin_len = 10
 69 |         if len(i) == 1: 
 70 |             rqbin_len = 4
 71 |         elif len(i) == 2:
 72 |             rqbin_len = 7
 73 |         code_temp = bin(int(i))[2:]
 74 |         code += ('0'*(rqbin_len - len(code_temp)) + code_temp)
 75 |     return code
 76 |     
 77 | def alphanumeric_encoding(str):
 78 |     code_list = [alphanum_list.index(i) for i in str]
 79 |     code = ''
 80 |     for i in range(1, len(code_list), 2):
 81 |         c = bin(code_list[i-1] * 45 + code_list[i])[2:]
 82 |         c = '0'*(11-len(c)) + c
 83 |         code += c
 84 |     if i != len(code_list) - 1:
 85 |         c = bin(code_list[-1])[2:]
 86 |         c = '0'*(6-len(c)) + c
 87 |         code += c
 88 |     
 89 |     return code
 90 |     
 91 | def byte_encoding(str):
 92 |     code = ''
 93 |     for i in str:
 94 |         c = bin(ord(i.encode('iso-8859-1')))[2:]
 95 |         c = '0'*(8-len(c)) + c
 96 |         code += c
 97 |     return code
 98 |     
 99 | def kanji_encoding(str):
100 |     pass
101 |     
102 | # cci: character count indicator  
103 | def get_cci(ver, mode, str):
104 |     if 1 <= ver <= 9:
105 |         cci_len = (10, 9, 8, 8)[mindex[mode]]
106 |     elif 10 <= ver <= 26:
107 |         cci_len = (12, 11, 16, 10)[mindex[mode]]
108 |     else:
109 |         cci_len = (14, 13, 16, 12)[mindex[mode]]
110 |         
111 |     cci = bin(len(str))[2:]
112 |     cci = '0' * (cci_len - len(cci)) + cci
113 |     return cci
114 |     
115 | if __name__ == '__main__':
116 |     s = '123456789'
117 |     v, datacode = encode(1, 'H', s)
118 |     print(v, datacode)


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/draw.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from PIL import Image
 4 | import os
 5 | 
 6 | def draw_qrcode(abspath, qrmatrix):
 7 |     unit_len = 3
 8 |     x = y = 4*unit_len
 9 |     pic = Image.new('1', [(len(qrmatrix)+8)*unit_len]*2, 'white')
10 |     
11 |     for line in qrmatrix:
12 |         for module in line:
13 |             if module:
14 |                 draw_a_black_unit(pic, x, y, unit_len)
15 |             x += unit_len
16 |         x, y = 4*unit_len, y+unit_len
17 | 
18 |     saving = os.path.join(abspath, 'qrcode.png')
19 |     pic.save(saving)
20 |     return saving
21 |     
22 | def draw_a_black_unit(p, x, y, ul):
23 |     for i in range(ul):
24 |         for j in range(ul):
25 |             p.putpixel((x+i, y+j), 0)


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/matrix.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 |      
  3 | from MyQR.mylibs.constant import alig_location, format_info_str, version_info_str, lindex
  4 |     
  5 | def get_qrmatrix(ver, ecl, bits):
  6 |     num = (ver - 1) * 4 + 21
  7 |     qrmatrix = [[None] * num for i in range(num)]
  8 |     #  [([None] * num * num)[i:i+num] for i in range(num * num) if i % num == 0] 
  9 | 
 10 |     # Add the Finder Patterns & Add the Separators
 11 |     add_finder_and_separator(qrmatrix)
 12 |     
 13 |     # Add the Alignment Patterns
 14 |     add_alignment(ver, qrmatrix)
 15 |     
 16 |     # Add the Timing Patterns
 17 |     add_timing(qrmatrix)
 18 |     
 19 |     # Add the Dark Module and Reserved Areas
 20 |     add_dark_and_reserving(ver, qrmatrix)
 21 |     
 22 |     maskmatrix = [i[:] for i in qrmatrix]
 23 |     
 24 |     # Place the Data Bits
 25 |     place_bits(bits, qrmatrix)
 26 |     
 27 |     # Data Masking
 28 |     mask_num, qrmatrix = mask(maskmatrix, qrmatrix)
 29 |     
 30 |     # Format Information
 31 |     add_format_and_version_string(ver, ecl, mask_num, qrmatrix)
 32 | 
 33 |     return qrmatrix
 34 | 
 35 | def add_finder_and_separator(m):             
 36 |     for i in range(8):
 37 |         for j in range(8):
 38 |             if i in (0, 6):
 39 |                 m[i][j] = m[-i-1][j] = m[i][-j-1] = 0 if j == 7 else 1
 40 |             elif i in (1, 5):
 41 |                 m[i][j] = m[-i-1][j] = m[i][-j-1] = 1 if j in (0, 6) else 0  
 42 |             elif i == 7:
 43 |                 m[i][j] = m[-i-1][j] = m[i][-j-1] = 0
 44 |             else:
 45 |                 m[i][j] = m[-i-1][j] = m[i][-j-1] = 0 if j in (1, 5, 7) else 1
 46 |     
 47 | def add_alignment(ver, m):
 48 |     if ver > 1:
 49 |         coordinates = alig_location[ver-2]
 50 |         for i in coordinates:
 51 |             for j in coordinates:
 52 |                 if m[i][j] is None:
 53 |                     add_an_alignment(i, j, m)
 54 |             
 55 | def add_an_alignment(row, column, m):
 56 |     for i in range(row-2, row+3):
 57 |         for j in range(column-2, column+3):
 58 |             m[i][j] = 1 if i in (row-2, row+2) or j in (column-2, column+2) else 0
 59 |     m[row][column] = 1
 60 |     
 61 | def add_timing(m):
 62 |     for i in range(8, len(m)-8):
 63 |         m[i][6] = m[6][i] = 1 if i % 2 ==0 else 0
 64 |     
 65 | def add_dark_and_reserving(ver, m):
 66 |     for j in range(8):
 67 |         m[8][j] = m[8][-j-1] = m[j][8] = m[-j-1][8] = 0
 68 |     m[8][8] = 0
 69 |     m[8][6] = m[6][8] = m[-8][8] = 1
 70 |     
 71 |     if ver > 6:
 72 |         for i in range(6):
 73 |             for j in (-9, -10, -11):
 74 |                 m[i][j] = m[j][i] = 0
 75 |                 
 76 | def place_bits(bits, m):
 77 |     bit = (int(i) for i in bits)
 78 | 
 79 |     up = True
 80 |     for a in range(len(m)-1, 0, -2):
 81 |         a = a-1 if a <= 6 else a
 82 |         irange = range(len(m)-1, -1, -1) if up else range(len(m))
 83 |         for i in irange:
 84 |             for j in (a, a-1):
 85 |                 if m[i][j] is None:
 86 |                     m[i][j] = next(bit)
 87 |         up = not up
 88 |   
 89 | def mask(mm, m):
 90 |     mps = get_mask_patterns(mm)
 91 |     scores = []
 92 |     for mp in mps:
 93 |         for i in range(len(mp)):
 94 |             for j in range(len(mp)):
 95 |                 mp[i][j] = mp[i][j] ^ m[i][j]
 96 |         scores.append(compute_score(mp))
 97 |     best = scores.index(min(scores))
 98 |     return best, mps[best]
 99 |     
100 | def get_mask_patterns(mm):
101 |     def formula(i, row, column):
102 |         if i == 0:
103 |             return (row + column) % 2 == 0
104 |         elif i == 1:
105 |             return row % 2 == 0
106 |         elif i == 2:
107 |             return column % 3 == 0
108 |         elif i == 3:
109 |             return (row + column) % 3 == 0
110 |         elif i == 4:
111 |             return (row // 2 + column // 3) % 2 == 0
112 |         elif i == 5:
113 |             return ((row * column) % 2) + ((row * column) % 3) == 0
114 |         elif i == 6:
115 |             return (((row * column) % 2) + ((row * column) % 3)) % 2 == 0
116 |         elif i == 7:
117 |             return 	(((row + column) % 2) + ((row * column) % 3)) % 2 == 0
118 | 
119 |     mm[-8][8] = None
120 |     for i in range(len(mm)):
121 |         for j in range(len(mm)):
122 |             mm[i][j] = 0 if mm[i][j] is not None else mm[i][j]
123 |     mps = []
124 |     for i in range(8):
125 |         mp = [ii[:] for ii in mm]
126 |         for row in range(len(mp)):
127 |             for column in range(len(mp)):
128 |                 mp[row][column] = 1 if mp[row][column] is None and formula(i, row, column) else 0
129 |         mps.append(mp)
130 |         
131 |     return mps
132 |             
133 | def compute_score(m):
134 |     def evaluation1(m):
135 |         def ev1(ma):
136 |             sc = 0
137 |             for mi in ma:
138 |                 j = 0
139 |                 while j < len(mi)-4:
140 |                     n = 4
141 |                     while mi[j:j+n+1] in [[1]*(n+1), [0]*(n+1)]:
142 |                         n += 1
143 |                     (sc, j) = (sc+n-2, j+n) if n > 4 else (sc, j+1)
144 |             return sc
145 |         return ev1(m) + ev1(list(map(list, zip(*m))))
146 |         
147 |     def evaluation2(m):
148 |         sc = 0
149 |         for i in range(len(m)-1):
150 |             for j in range(len(m)-1):
151 |                 sc += 3 if m[i][j] == m[i+1][j] == m[i][j+1] == m[i+1][j+1] else 0
152 |         return sc
153 |         
154 |     def evaluation3(m):
155 |         def ev3(ma):
156 |             sc = 0
157 |             for mi in ma:
158 |                 j = 0
159 |                 while j < len(mi)-10:
160 |                     if mi[j:j+11] == [1,0,1,1,1,0,1,0,0,0,0]:
161 |                         sc += 40
162 |                         j += 7
163 |                     elif mi[j:j+11] == [0,0,0,0,1,0,1,1,1,0,1]:
164 |                         sc += 40
165 |                         j += 4
166 |                     else:
167 |                         j += 1
168 |             return sc
169 |         return ev3(m) + ev3(list(map(list, zip(*m))))
170 |         
171 |     def evaluation4(m):
172 |         darknum = 0
173 |         for i in m:
174 |             darknum += sum(i)
175 |         percent = darknum  / (len(m)**2) * 100
176 |         s = int((50 - percent) / 5) * 5
177 |         return 2*s if s >=0 else -2*s
178 | 
179 |     score = evaluation1(m) + evaluation2(m)+ evaluation3(m) + evaluation4(m)
180 |     return score
181 |     
182 | def add_format_and_version_string(ver, ecl, mask_num, m):
183 |     fs = [int(i) for i in format_info_str[lindex[ecl]][mask_num]]
184 |     for j in range(6):
185 |         m[8][j] = m[-j-1][8] = fs[j]
186 |         m[8][-j-1] = m[j][8] = fs[-j-1]
187 |     m[8][7] = m[-7][8] = fs[6]
188 |     m[8][8] = m[8][-8] = fs[7]
189 |     m[7][8] = m[8][-7] = fs[8]
190 |     
191 |     if ver > 6:
192 |         vs = (int(i) for i in version_info_str[ver-7])
193 |         for j in range(5, -1, -1):
194 |             for i in (-9, -10, -11):
195 |                 m[i][j] = m[j][i] = next(vs)


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/structure.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from MyQR.mylibs.constant import required_remainder_bits, lindex, grouping_list
 4 | 
 5 | def structure_final_bits(ver, ecl, data_codewords, ecc):
 6 |     final_message = interleave_dc(ver, ecl, data_codewords) + interleave_ecc(ecc)
 7 |     
 8 |     # convert to binary & Add Remainder Bits if Necessary
 9 |     final_bits = ''.join(['0'*(8-len(i))+i for i in [bin(i)[2:] for i in final_message]]) + '0' * required_remainder_bits[ver-1]
10 |     
11 |     return final_bits
12 | 
13 | def interleave_dc(ver, ecl, data_codewords):
14 |     id = []
15 |     for t in zip(*data_codewords):
16 |         id += list(t)
17 |     g = grouping_list[ver-1][lindex[ecl]]
18 |     if g[3]:
19 |         for i in range(g[2]):
20 |             id.append(data_codewords[i-g[2]][-1])
21 |     return id
22 |     
23 | def interleave_ecc(ecc):
24 |     ie = []
25 |     for t in zip(*ecc):
26 |         ie += list(t)
27 |     return ie


--------------------------------------------------------------------------------
/video_downloader/MyQR/mylibs/theqrmodule.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from MyQR.mylibs import data, ECC, structure, matrix, draw
 4 | 
 5 | # ver: Version from 1 to 40
 6 | # ecl: Error Correction Level (L,M,Q,H)
 7 | # get a qrcode picture of 3*3 pixels per module
 8 | def get_qrcode(ver, ecl, str, save_place):
 9 |     # Data Coding
10 |     ver, data_codewords = data.encode(ver, ecl, str)
11 | 
12 |     # Error Correction Coding
13 |     ecc = ECC.encode(ver, ecl, data_codewords)
14 |     
15 |     # Structure final bits
16 |     final_bits = structure.structure_final_bits(ver, ecl, data_codewords, ecc)
17 |     
18 |     # Get the QR Matrix
19 |     qrmatrix = matrix.get_qrmatrix(ver, ecl, final_bits)
20 |         
21 |     # Draw the picture and Save it, then return the real ver and the absolute name
22 |     return ver, draw.draw_qrcode(save_place, qrmatrix)
23 | 


--------------------------------------------------------------------------------
/video_downloader/MyQR/myqr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | from MyQR.mylibs import theqrmodule
  6 | from PIL import Image
  7 |    
  8 | # Positional parameters
  9 | #   words: str
 10 | #
 11 | # Optional parameters
 12 | #   version: int, from 1 to 40
 13 | #   level: str, just one of ('L','M','Q','H')
 14 | #   picutre: str, a filename of a image
 15 | #   colorized: bool
 16 | #   constrast: float
 17 | #   brightness: float
 18 | #   save_name: str, the output filename like 'example.png'
 19 | #   save_dir: str, the output directory
 20 | #
 21 | # See [https://github.com/sylnsfar/qrcode] for more details!
 22 | def run(words, version=1, level='H', picture=None, colorized=False, contrast=1.0, brightness=1.0, save_name=None, save_dir=os.getcwd()):
 23 | 
 24 |     supported_chars = r"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ··,.:;+-*/\~!@#$%^&`'=<>[]()?_{}|"
 25 | 
 26 | 
 27 |     # check every parameter
 28 |     if not isinstance(words, str) or any(i not in supported_chars for i in words):
 29 |         raise ValueError('Wrong words! Make sure the characters are supported!')
 30 |     if not isinstance(version, int) or version not in range(1, 41):
 31 |         raise ValueError('Wrong version! Please choose a int-type value from 1 to 40!')
 32 |     if not isinstance(level, str) or len(level)>1 or level not in 'LMQH':
 33 |         raise ValueError("Wrong level! Please choose a str-type level from {'L','M','Q','H'}!")
 34 |     if picture:
 35 |         if not isinstance(picture, str) or not os.path.isfile(picture) or picture[-4:] not in ('.jpg','.png','.bmp','.gif'):
 36 |             raise ValueError("Wrong picture! Input a filename that exists and be tailed with one of {'.jpg', '.png', '.bmp', '.gif'}!")
 37 |         if picture[-4:] == '.gif' and save_name and save_name[-4:] != '.gif':
 38 |             raise ValueError('Wrong save_name! If the picuter is .gif format, the output filename should be .gif format, too!')
 39 |         if not isinstance(colorized, bool):
 40 |             raise ValueError('Wrong colorized! Input a bool-type value!')
 41 |         if not isinstance(contrast, float):
 42 |             raise ValueError('Wrong contrast! Input a float-type value!')
 43 |         if not isinstance(brightness, float):
 44 |             raise ValueError('Wrong brightness! Input a float-type value!')
 45 |     if save_name and (not isinstance(save_name, str) or save_name[-4:] not in ('.jpg','.png','.bmp','.gif')):
 46 |         raise ValueError("Wrong save_name! Input a filename tailed with one of {'.jpg', '.png', '.bmp', '.gif'}!")
 47 |     if not os.path.isdir(save_dir):
 48 |         raise ValueError('Wrong save_dir! Input a existing-directory!')
 49 |     
 50 |         
 51 |     def combine(ver, qr_name, bg_name, colorized, contrast, brightness, save_dir, save_name=None):
 52 |         from MyQR.mylibs.constant import alig_location
 53 |         from PIL import ImageEnhance, ImageFilter
 54 |         
 55 |         qr = Image.open(qr_name)
 56 |         qr = qr.convert('RGBA') if colorized else qr
 57 |         
 58 |         bg0 = Image.open(bg_name).convert('RGBA')
 59 |         bg0 = ImageEnhance.Contrast(bg0).enhance(contrast)
 60 |         bg0 = ImageEnhance.Brightness(bg0).enhance(brightness)
 61 | 
 62 |         if bg0.size[0] < bg0.size[1]:
 63 |             bg0 = bg0.resize((qr.size[0]-24, (qr.size[0]-24)*int(bg0.size[1]/bg0.size[0])))
 64 |         else:
 65 |             bg0 = bg0.resize(((qr.size[1]-24)*int(bg0.size[0]/bg0.size[1]), qr.size[1]-24))    
 66 |             
 67 |         bg = bg0 if colorized else bg0.convert('1')
 68 |         
 69 |         aligs = []
 70 |         if ver > 1:
 71 |             aloc = alig_location[ver-2]
 72 |             for a in range(len(aloc)):
 73 |                 for b in range(len(aloc)):
 74 |                     if not ((a==b==0) or (a==len(aloc)-1 and b==0) or (a==0 and b==len(aloc)-1)):
 75 |                         for i in range(3*(aloc[a]-2), 3*(aloc[a]+3)):
 76 |                             for j in range(3*(aloc[b]-2), 3*(aloc[b]+3)):
 77 |                                 aligs.append((i,j))
 78 | 
 79 |         for i in range(qr.size[0]-24):
 80 |             for j in range(qr.size[1]-24):
 81 |                 if not ((i in (18,19,20)) or (j in (18,19,20)) or (i<24 and j<24) or (i<24 and j>qr.size[1]-49) or (i>qr.size[0]-49 and j<24) or ((i,j) in aligs) or (i%3==1 and j%3==1) or (bg0.getpixel((i,j))[3]==0)):
 82 |                     qr.putpixel((i+12,j+12), bg.getpixel((i,j)))
 83 |         
 84 |         qr_name = os.path.join(save_dir, os.path.splitext(os.path.basename(bg_name))[0] + '_qrcode.png') if not save_name else os.path.join(save_dir, save_name)
 85 |         qr.resize((qr.size[0]*3, qr.size[1]*3)).save(qr_name)
 86 |         return qr_name
 87 | 
 88 |     tempdir = os.path.join(os.path.expanduser('~'), '.myqr')
 89 |     
 90 |     try:
 91 |         if not os.path.exists(tempdir):
 92 |             os.makedirs(tempdir)
 93 | 
 94 |         ver, qr_name = theqrmodule.get_qrcode(version, level, words, tempdir)
 95 | 
 96 |         if picture and picture[-4:]=='.gif':
 97 |             import imageio
 98 |              
 99 |             im = Image.open(picture)
100 |             duration = im.info.get('duration', 0)
101 |             im.save(os.path.join(tempdir, '0.png'))
102 |             while True:
103 |                 try:
104 |                     seq = im.tell()
105 |                     im.seek(seq + 1)
106 |                     im.save(os.path.join(tempdir, '%s.png' %(seq+1)))
107 |                 except EOFError:
108 |                     break
109 |             
110 |             imsname = []
111 |             for s in range(seq+1):
112 |                 bg_name = os.path.join(tempdir, '%s.png' % s)
113 |                 imsname.append(combine(ver, qr_name, bg_name, colorized, contrast, brightness, tempdir))
114 |             
115 |             ims = [imageio.imread(pic) for pic in imsname]
116 |             qr_name = os.path.join(save_dir, os.path.splitext(os.path.basename(picture))[0] + '_qrcode.gif') if not save_name else os.path.join(save_dir, save_name)
117 |             imageio.mimwrite(qr_name, ims, '.gif', **{ 'duration': duration/1000 })
118 |         elif picture:
119 |             qr_name = combine(ver, qr_name, picture, colorized, contrast, brightness, save_dir, save_name)
120 |         elif qr_name:
121 |             qr = Image.open(qr_name)
122 |             qr_name = os.path.join(save_dir, os.path.basename(qr_name)) if not save_name else os.path.join(save_dir, save_name)
123 |             qr.resize((qr.size[0]*3, qr.size[1]*3)).save(qr_name)
124 |           
125 |         return ver, level, qr_name
126 |         
127 |     except:
128 |         raise
129 |     finally:
130 |         import shutil
131 |         if os.path.exists(tempdir):
132 |             shutil.rmtree(tempdir) 


--------------------------------------------------------------------------------
/video_downloader/MyQR/terminal.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from MyQR.myqr import run
 5 | import os
 6 | 
 7 | def main():
 8 |     import argparse
 9 |     argparser = argparse.ArgumentParser()
10 |     argparser.add_argument('Words', help = 'The words to produce you QR-code picture, like a URL or a sentence. Please read the README file for the supported characters.')
11 |     argparser.add_argument('-v', '--version', type = int, choices = range(1,41), default = 1, help = 'The version means the length of a side of the QR-Code picture. From little size to large is 1 to 40.')
12 |     argparser.add_argument('-l', '--level', choices = list('LMQH'), default = 'H', help = 'Use this argument to choose an Error-Correction-Level: L(Low), M(Medium) or Q(Quartile), H(High). Otherwise, just use the default one: H')
13 |     argparser.add_argument('-p', '--picture', help = 'the picture  e.g. example.jpg')
14 |     argparser.add_argument('-c', '--colorized', action = 'store_true', help = "Produce a colorized QR-Code with your picture. Just works when there is a correct '-p' or '--picture'.")
15 |     argparser.add_argument('-con', '--contrast', type = float, default = 1.0, help = 'A floating point value controlling the enhancement of contrast. Factor 1.0 always returns a copy of the original image, lower factors mean less color (brightness, contrast, etc), and higher values more. There are no restrictions on this value. Default: 1.0')
16 |     argparser.add_argument('-bri', '--brightness', type = float, default = 1.0, help = 'A floating point value controlling the enhancement of brightness. Factor 1.0 always returns a copy of the original image, lower factors mean less color (brightness, contrast, etc), and higher values more. There are no restrictions on this value. Default: 1.0')
17 |     argparser.add_argument('-n', '--name', help = "The filename of output tailed with one of {'.jpg', '.png', '.bmp', '.gif'}. eg. exampl.png")
18 |     argparser.add_argument('-d', '--directory', default = os.getcwd(), help = 'The directory of output.')
19 |     args = argparser.parse_args()
20 |     
21 |     if args.picture and args.picture[-4:]=='.gif':
22 |         print('It may take a while, please wait for minutes...')
23 |     
24 |     try:
25 |         ver, ecl, qr_name = run(
26 |             args.Words,
27 |             args.version,
28 |             args.level,
29 |             args.picture,
30 |             args.colorized,
31 |             args.contrast,
32 |             args.brightness,
33 |             args.name,
34 |             args.directory
35 |             )   
36 |         print('Succeed! \nCheck out your', str(ver) + '-' + str(ecl), 'QR-code:', qr_name)
37 |     except:
38 |         raise


--------------------------------------------------------------------------------
/video_downloader/requirements.txt:
--------------------------------------------------------------------------------
1 | imageio==1.5
2 | numpy==1.11.1
3 | Pillow==3.3.1
4 | beautifulsoup4==4.3.2


--------------------------------------------------------------------------------
/video_downloader/video_downloader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from tkinter.filedialog import askdirectory
  3 | from MyQR.myqr import run
  4 | from urllib import request, parse
  5 | from bs4 import BeautifulSoup
  6 | 
  7 | import tkinter.messagebox as msgbox
  8 | import tkinter as tk
  9 | import webbrowser
 10 | import re
 11 | import json
 12 | import os
 13 | import types
 14 | import requests
 15 | import time
 16 | 
 17 | 
 18 | """
 19 | 类说明:爱奇艺、优酷等实现在线观看以及视频下载的类
 20 | 
 21 | Parameters:
 22 | 	width - tkinter主界面宽
 23 | 	height - tkinter主界面高
 24 | 
 25 | Returns:
 26 | 	无
 27 | 
 28 | Modify:
 29 | 	2017-05-09
 30 | """
 31 | class APP:
 32 | 	def __init__(self, width = 500, height = 300):
 33 | 		self.w = width
 34 | 		self.h = height
 35 | 		self.title = ' VIP视频破解助手'
 36 | 		self.root = tk.Tk(className=self.title)
 37 | 		self.url = tk.StringVar()
 38 | 		self.v = tk.IntVar()
 39 | 		self.v.set(1)
 40 | 
 41 | 
 42 | 		#Frame空间
 43 | 		frame_1 = tk.Frame(self.root)
 44 | 		frame_2 = tk.Frame(self.root)
 45 | 		frame_3 = tk.Frame(self.root)
 46 | 		
 47 | 		#Menu菜单
 48 | 		menu = tk.Menu(self.root)
 49 | 		self.root.config(menu = menu)
 50 | 		filemenu = tk.Menu(menu,tearoff=0)
 51 | 		moviemenu = tk.Menu(menu,tearoff = 0)
 52 | 		menu.add_cascade(label = '菜单', menu = filemenu)
 53 | 		menu.add_cascade(label = '友情链接', menu = moviemenu)
 54 | 		filemenu.add_command(label = '使用说明',command = lambda :webbrowser.open('http://blog.csdn.net/c406495762/article/details/71334633'))
 55 | 		filemenu.add_command(label = '关于作者',command = lambda :webbrowser.open('http://blog.csdn.net/c406495762'))
 56 | 		filemenu.add_command(label = '退出',command = self.root.quit)
 57 | 
 58 | 		#各个网站链接
 59 | 		moviemenu.add_command(label = '网易公开课',command = lambda :webbrowser.open('http://open.163.com/'))
 60 | 		moviemenu.add_command(label = '腾讯视频',command = lambda :webbrowser.open('http://v.qq.com/'))
 61 | 		moviemenu.add_command(label = '搜狐视频',command = lambda :webbrowser.open('http://tv.sohu.com/'))
 62 | 		moviemenu.add_command(label = '芒果TV',command = lambda :webbrowser.open('http://www.mgtv.com/'))
 63 | 		moviemenu.add_command(label = '爱奇艺',command = lambda :webbrowser.open('http://www.iqiyi.com/'))
 64 | 		moviemenu.add_command(label = 'PPTV',command = lambda :webbrowser.open('http://www.bilibili.com/'))
 65 | 		moviemenu.add_command(label = '优酷',command = lambda :webbrowser.open('http://www.youku.com/'))
 66 | 		moviemenu.add_command(label = '乐视',command = lambda :webbrowser.open('http://www.le.com/'))
 67 | 		moviemenu.add_command(label = '土豆',command = lambda :webbrowser.open('http://www.tudou.com/'))
 68 | 		moviemenu.add_command(label = 'A站',command = lambda :webbrowser.open('http://www.acfun.tv/'))
 69 | 		moviemenu.add_command(label = 'B站',command = lambda :webbrowser.open('http://www.bilibili.com/'))
 70 | 
 71 | 		#控件内容设置
 72 | 		group = tk.Label(frame_1,text = '请选择一个视频播放通道：', padx = 10, pady = 10)
 73 | 		tb1 = tk.Radiobutton(frame_1,text = '通道一', variable = self.v, value = 1, width = 10, height = 3)
 74 | 		tb2 = tk.Radiobutton(frame_1,text = '通道二', variable = self.v, value = 2, width = 10, height = 3)
 75 | 		label1 = tk.Label(frame_2, text = "请输入视频链接：")
 76 | 		entry = tk.Entry(frame_2, textvariable = self.url, highlightcolor = 'Fuchsia', highlightthickness = 1,width = 35)
 77 | 		label2 = tk.Label(frame_2, text = " ")
 78 | 		play = tk.Button(frame_2, text = "播放", font = ('楷体',12), fg = 'Purple', width = 2, height = 1, command = self.video_play)
 79 | 		label3 = tk.Label(frame_2, text = " ")
 80 | 		# download = tk.Button(frame_2, text = "下载", font = ('楷体',12), fg = 'Purple', width = 2, height = 1, command = self.download_wmxz)
 81 | 		QR_Code = tk.Button(frame_3, text = "手机观看", font = ('楷体',12), fg = 'Purple', width = 10, height = 2, command = self.QR_Code)
 82 | 		label_explain = tk.Label(frame_3, fg = 'red', font = ('楷体',12), text = '\n注意：支持大部分主流视频网站的视频播放！\n此软件仅用于交流学习，请勿用于任何商业用途！')
 83 | 		label_warning = tk.Label(frame_3, fg = 'blue', font = ('楷体',12),text = '\n建议：将Chrome内核浏览器设置为默认浏览器\n作者:Jack_Cui')
 84 | 
 85 | 
 86 | 
 87 | 		#控件布局
 88 | 		frame_1.pack()
 89 | 		frame_2.pack()
 90 | 		frame_3.pack()
 91 | 		group.grid(row = 0, column = 0)
 92 | 		tb1.grid(row = 0, column = 1)
 93 | 		tb2.grid(row = 0, column = 2)
 94 | 		label1.grid(row = 0, column = 0)
 95 | 		entry.grid(row = 0, column = 1)
 96 | 		label2.grid(row = 0, column = 2)
 97 | 		play.grid(row = 0, column = 3,ipadx = 10, ipady = 10)
 98 | 		label3.grid(row = 0, column = 4)
 99 | 		# download.grid(row = 0, column = 5,ipadx = 10, ipady = 10)
100 | 		QR_Code.grid(row = 0, column = 0)
101 | 		label_explain.grid(row = 1, column = 0)
102 | 		label_warning.grid(row = 2, column = 0)
103 | 
104 | 	"""
105 | 	函数说明:jsonp解析
106 | 
107 | 	Parameters:
108 | 		_jsonp - jsonp字符串
109 | 
110 | 	Returns:
111 | 		_json - json格式数据
112 | 
113 | 	Modify:
114 | 		2017-05-11
115 | 	"""
116 | 	def loads_jsonp(self, _jsonp):
117 | 		try:
118 | 			_json = json.loads(re.match(".*?({.*}).*",_jsonp,re.S).group(1))
119 | 			return _json
120 | 		except:
121 | 			raise ValueError('Invalid Input')
122 | 
123 | 	"""
124 | 	函数说明:视频播放
125 | 
126 | 	Parameters:
127 | 		self
128 | 
129 | 	Returns:
130 | 		无
131 | 
132 | 	Modify:
133 | 		2017-05-09
134 | 	"""
135 | 	def video_play(self):
136 | 		#视频解析网站地址
137 | 		port_1 = 'http://www.wmxz.wang/video.php?url='
138 | 		port_2 = 'http://www.vipjiexi.com/tong.php?url='
139 | 
140 | 		#正则表达是判定是否为合法链接
141 | 		if re.match(r'^https?:/{2}\w.+$', self.url.get()):
142 | 			if self.v.get() == 1:
143 | 				#视频链接获取
144 | 				ip = self.url.get()
145 | 				#视频链接加密
146 | 				ip = parse.quote_plus(ip)
147 | 				#浏览器打开
148 | 				webbrowser.open(port_1 + self.url.get())
149 | 			elif self.v.get() == 2:
150 | 				#链接获取
151 | 				ip = self.url.get()
152 | 				#链接加密
153 | 				ip = parse.quote_plus(ip)
154 | 
155 | 				#获取time、key、url
156 | 				get_url = 'http://www.vipjiexi.com/x2/tong.php?url=%s' % ip 
157 | 				# get_url_head = {
158 | 				# 	'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19',
159 | 				# 	'Referer':'http://www.vipjiexi.com/',
160 | 				# }
161 | 				# get_url_req = request.Request(url = get_url, headers = get_url_head)
162 | 				# get_url_response = request.urlopen(get_url_req)
163 | 				# get_url_html = get_url_response.read().decode('utf-8')
164 | 				# bf = BeautifulSoup(get_url_html, 'lxml')
165 | 				# a = str(bf.find_all('script'))
166 | 				# pattern = re.compile('"api.php", {"time":"(\d+)", "key": "(.+)", "url": "(.+)","type"', re.IGNORECASE)
167 | 				# string = pattern.findall(a)
168 | 				# now_time = string[0][0]
169 | 				# now_key = string[0][1]
170 | 				# now_url = string[0][2] 
171 | 
172 | 				# #请求播放,获取Success = 1
173 | 				# get_movie_url = 'http://www.vipjiexi.com/x2/api.php'
174 | 				# get_movie_data = {
175 | 				# 	'key':'%s' % now_key,
176 | 				# 	'time':'%s' % now_time,
177 | 				# 	'type':'',
178 | 				# 	'url':'%s' % now_url
179 | 				# }
180 | 				# get_movie_head = {
181 | 				# 	'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19',
182 | 				# 	'Referer':'http://www.vipjiexi.com/x2/tong.php?',
183 | 				# 	'url':'%s' % ip,
184 | 				# }
185 | 				# get_movie_req = request.Request(url = get_movie_url, headers = get_movie_head)
186 | 				# get_movie_data = parse.urlencode(get_movie_data).encode('utf-8')
187 | 				# get_movie_response = request.urlopen(get_movie_req, get_movie_data)
188 | 				#请求之后立刻打开
189 | 				webbrowser.open(get_url)
190 | 
191 | 		else:
192 | 			msgbox.showerror(title='错误',message='视频链接地址无效，请重新输入！')
193 | 
194 | 	"""
195 | 	函数说明:视频下载，通过无名小站抓包(已经无法使用)
196 | 
197 | 	Parameters:
198 | 		self
199 | 
200 | 	Returns:
201 | 		无
202 | 
203 | 	Modify:
204 | 		2017-06-15
205 | 	"""
206 | 	def download_wmxz(self):	
207 | 		if re.match(r'^https?:/{2}\w.+$', self.url.get()):
208 | 			#视频链接获取
209 | 			ip = self.url.get()
210 | 			#视频链接加密
211 | 			ip = parse.quote_plus(ip)
212 | 
213 | 			#获取保存视频的url
214 | 			get_url = 'http://www.sfsft.com/index.php?url=%s' % ip 
215 | 			head = {
216 | 				'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19',
217 | 				'Referer':'http://www.sfsft.com/index.php?url=%s' % ip
218 | 			}
219 | 			get_url_req = request.Request(url = get_url, headers = head)
220 | 			get_url_response = request.urlopen(get_url_req)
221 | 			get_url_html = get_url_response.read().decode('utf-8')
222 | 			bf = BeautifulSoup(get_url_html, 'lxml')
223 | 			a = str(bf.find_all('script'))
224 | 			pattern = re.compile("url : '(.+)',", re.IGNORECASE)
225 | 			url = pattern.findall(a)[0]
226 | 
227 | 			#获取视频地址
228 | 			get_movie_url = 'http://www.sfsft.com/api.php'
229 | 			get_movie_data = {
230 | 				'up':'0',
231 | 				'url':'%s' % url,
232 | 			}
233 | 			get_movie_req = request.Request(url = get_movie_url, headers = head)
234 | 			get_movie_data = parse.urlencode(get_movie_data).encode('utf-8')
235 | 			get_movie_response = request.urlopen(get_movie_req, get_movie_data)
236 | 			get_movie_html = get_movie_response.read().decode('utf-8')
237 | 			get_movie_data = json.loads(get_movie_html)
238 | 			webbrowser.open(get_movie_data['url'])
239 | 		else:
240 | 			msgbox.showerror(title='错误',message='视频链接地址无效，请重新输入！')
241 | 
242 | 
243 | 	"""
244 | 	函数说明:生成二维码,手机观看
245 | 
246 | 	Parameters:
247 | 		self
248 | 
249 | 	Returns:
250 | 		无
251 | 
252 | 	Modify:
253 | 		2017-05-12
254 | 	"""
255 | 	def QR_Code(self):	
256 | 		if re.match(r'^https?:/{2}\w.+$', self.url.get()):
257 | 			#视频链接获取
258 | 			ip = self.url.get()
259 | 			#视频链接加密
260 | 			ip = parse.quote_plus(ip)
261 | 
262 | 			url = 'http://www.wmxz.wang/video.php?url=%s' % ip
263 | 			words = url
264 | 			images_pwd = os.getcwd() + '\Images\\'
265 | 			png_path = images_pwd + 'bg.png'
266 | 			qr_name = 'qrcode.png'
267 | 			qr_path = images_pwd + 'qrcode.png'
268 | 
269 | 			run(words = words, picture = png_path, save_name = qr_name, save_dir = images_pwd)
270 | 
271 | 			top = tk.Toplevel(self.root)
272 | 			img = tk.PhotoImage(file = qr_path)
273 | 			text_label = tk.Label(top, fg = 'red', font = ('楷体',15), text = "手机浏览器扫描二维码，在线观看视频！")
274 | 			img_label = tk.Label(top, image = img)
275 | 			text_label.pack()
276 | 			img_label.pack()
277 | 			top.mainloop()
278 | 
279 | 		else:
280 | 			msgbox.showerror(title='错误',message='视频链接地址无效，请重新输入！')
281 | 
282 | 	"""
283 | 	函数说明:tkinter窗口居中
284 | 
285 | 	Parameters:
286 | 		self
287 | 
288 | 	Returns:
289 | 		无
290 | 
291 | 	Modify:
292 | 		2017-05-09
293 | 	"""
294 | 	def center(self):
295 | 		ws = self.root.winfo_screenwidth()
296 | 		hs = self.root.winfo_screenheight()
297 | 		x = int( (ws/2) - (self.w/2) )
298 | 		y = int( (hs/2) - (self.h/2) )
299 | 		self.root.geometry('{}x{}+{}+{}'.format(self.w, self.h, x, y))
300 | 
301 | 	"""
302 | 	函数说明:loop等待用户事件
303 | 
304 | 	Parameters:
305 | 		self
306 | 
307 | 	Returns:
308 | 		无
309 | 
310 | 	Modify:
311 | 		2017-05-09
312 | 	"""
313 | 	def loop(self):
314 | 		self.root.resizable(False, False)	#禁止修改窗口大小
315 | 		self.center()						#窗口居中
316 | 		self.root.mainloop()
317 | 
318 | if __name__ == '__main__':
319 | 	app = APP()			#实例化APP对象
320 | 	app.loop()			#loop等待用户事件
321 | 
322 | 
323 | 
324 | 
325 | 


--------------------------------------------------------------------------------