├── .gitignore ├── 5tpsMp3 ├── 5tpsMp3.py └── 5tpsMp3_py2.py ├── Ken777 ├── Ken777.py ├── Ken777制作书籍汇总.TXT └── downloaded.txt ├── README.md ├── _test.py ├── doc └── weiphone.md ├── flvcd.py ├── ifengVideo └── ifengVideo.py ├── itpub.py ├── lib ├── __init__.py ├── common.py ├── config.ini └── config.py ├── opencourse.py ├── opencourse_old.py ├── verycd ├── simplecd.py └── verycd.py ├── weiphone.py └── youku └── youku_join.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | __pycache__/ 3 | 4 | _test.py -------------------------------------------------------------------------------- /5tpsMp3/5tpsMp3.py: -------------------------------------------------------------------------------- 1 | 2 | import threading 3 | import signal 4 | import json 5 | import queue 6 | 7 | from lib.common import * 8 | 9 | progress_file = '5tps.json' # 保存进度的文件名 10 | o5tps = {} # 要解析的对象 11 | cur_pos = 0 12 | 13 | 14 | class Parser(): 15 | BASE_URL = 'http://www.5tps.com' 16 | 17 | def __init__(self): 18 | # 每个子页面的下载链接、fileName的正则 19 | self.rDownUrl = re.compile( 20 | '点此下载.*') 21 | self.rFileName = re.compile(r'[^/]+\.mp3') # 从上面的DownUrl得到 22 | 23 | #得到 out_dict{'start_url':'', 'title':'', 'urls':[], 'content':'', 'total_size':10} 24 | def parseStartUrl(self, start_url): 25 | """ 26 | parse out download page from start url. 27 | eg. we can get 'http://www.5tps.com/down/8297_52_1_1.html' from 'http://www.5tps.com/html/8297.html' 28 | """ 29 | out_dict = {'start_url': start_url} 30 | html = getHtml(start_url, 'gbk') 31 | 32 | #标题 33 | titleLine = r1(r'