├── README.md ├── config.py ├── drama.py ├── dramas.py ├── logger.py ├── scan.py ├── thunder.py ├── urlread.py ├── verification_code.py └── yc_api.py /README.md: -------------------------------------------------------------------------------- 1 | # thunder_usdrama 2 | 自动抓取字幕组热门美剧并加入迅雷远程下载 3 | 4 | ### 系统需要 5 | 6 | 1、python2、python3共存 7 | 8 | 2、python3安装lxml 9 | 10 | ### 使用说明 11 | 12 | 1、在config.py中修改username、password为迅雷账号密码,修改mydir为py文件所在目录 13 | 14 | 2、运行python3 scan.py即可 15 | 16 | ### 原理说明 17 | 18 | 1、脚本抓取字幕组一周排行榜( http://www.zimuzu.tv/html/top/week.html )内容,将本周收藏和浏览最多的剧两榜求交集,即为下载的美剧清单,获取下载链接后调取迅雷远程api加入下载队列; 19 | 20 | 2、若有关注的美剧,可在dramas.py中将剧的id加入到self.watchlist; 21 | 22 | 3、网页分析部分用python3实现,迅雷远程部分用python2实现; 23 | 24 | 4、yc_api、verification_code.py从其他项目中获得(具体项目追溯不到了),水平不够未成功转为python3版本(这也是本脚本要双版本python的原因),但其中的登录部分已经失效,进行了重写; 25 | 26 | 5、如果你的python2启动命令不是默认的"python",请修改scan.py中call_thunder函数的cmd -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | thunder = { 2 | 'username': '...', 3 | 'password': '...', 4 | 'cookie_path': 'cookie.txt', 5 | 'verification_image_path': './vcode.jpg' 6 | } 7 | mydir = '/volume1/homes/admin/python/thunder' 8 | 9 | -------------------------------------------------------------------------------- /drama.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | from lxml import etree 4 | from urlread import urlread 5 | from logger import logger 6 | import re 7 | import json 8 | 9 | class DramaDetail: 10 | def __init__(self, id): 11 | if not id: 12 | raise Exception('Missing drama id.') 13 | self.id = str(id) 14 | self.url = 'http://www.zimuzu.tv/resource/{}'.format(self.id) 15 | self.results = {} 16 | self.name = 'unknown' 17 | 18 | def get_downlad_page(self): 19 | html = urlread().urlread(self.url) 20 | page = etree.HTML(html) 21 | div = page.xpath(u"//div[@class='view-res-list']")[0] 22 | a = div.find(".//h3") 23 | a.getparent().remove(a) 24 | href = div.find(".//a[@class='f3']") 25 | pagelink = href.attrib['href'] 26 | logger.info('download page: {}'.format(pagelink)) 27 | return pagelink 28 | 29 | def get_download_html(self): 30 | html = urlread().urlread(self.get_downlad_page()) 31 | file_list = None 32 | page = etree.HTML(html) 33 | scripts = page.xpath("//script") 34 | for sc in scripts: 35 | if sc.text is not None and sc.text != '': 36 | t = re.search(r"var share_prefix = '(.*)'", sc.text) 37 | if t is not None: 38 | self.name = t.group(1) 39 | m = re.search(r'var file_list=(\{.*\})', sc.text) 40 | if m is not None: 41 | file_list = json.loads(m.group(1)) 42 | dls = page.xpath("//dl") 43 | for dl in dls: 44 | title = dl.find(".//dt") 45 | fmt = title.find("span").text 46 | if fmt.find("中文字幕") != -1: 47 | s = re.match(r'第(\d+)季', title.find("strong").text) 48 | if s is None: 49 | continue 50 | season = str(int(s.group(1))) 51 | if season not in self.results.keys(): 52 | self.results[season] = {} 53 | ddd = dl.findall(".//dd") 54 | for dd in ddd: 55 | if 'itemid' in dd.attrib: 56 | episode = str(int(re.match(r'第(\d+)集', dd.find("b").text).group(1))) 57 | itemid = dd.attrib['itemid'] 58 | link = self.filterLink(file_list[itemid]) 59 | if episode not in self.results[season].keys(): 60 | self.results[season][episode] = link 61 | return {"id":self.id,"name":self.name,"results":self.results} 62 | 63 | def filterLink(self, file_list): 64 | for i in range(3): 65 | try: 66 | return file_list[str(i)] 67 | except: 68 | continue 69 | return None 70 | 71 | if __name__ == "__main__": 72 | a = DramaDetail(30675) 73 | print(a.get_download_html()) 74 | 75 | 76 | -------------------------------------------------------------------------------- /dramas.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | from lxml import etree 4 | from urlread import urlread 5 | from logger import logger 6 | 7 | class USDramaScan: 8 | def __init__(self): 9 | self.url = "http://www.zimuzu.tv/html/top/week.html" 10 | self.watchlist = ['11029'] 11 | 12 | def get_dramas(self): 13 | html = urlread().urlread(self.url) 14 | rankings = [] 15 | page = etree.HTML(html) 16 | divs = page.xpath("//div[starts-with(@class,'box xy-list')]") 17 | for div in divs: 18 | ranking = [] 19 | title = div.find(".//div[@class='a0']") 20 | if title.text.find("的剧")!=-1: 21 | lis = div.findall(".//li") 22 | for li in lis: 23 | ttt = li.findall(".//div[@class='a0']") 24 | for t in ttt: 25 | id = t.find("div[@class='fl info']/a").attrib['href'].split("/")[2] 26 | # title = t.find(".//strong").text 27 | # d = {"title":title,"id":id} 28 | ranking.append(id) 29 | if len(ranking) == 10: 30 | rankings.append(ranking) 31 | if len(rankings)>=2: 32 | prev = rankings[0] 33 | dramas = [] 34 | for ranking in rankings: 35 | dramas = list(set(prev).intersection(set(ranking))) 36 | prev = ranking 37 | else: 38 | dramas = rankings[0] 39 | dramas = list(set(dramas).union(set(self.watchlist))) 40 | dramas = list(set(dramas)) 41 | logger.info('current dramas found: {}'.format(len(dramas))) 42 | return dramas 43 | 44 | if __name__ == "__main__": 45 | a = USDramaScan() 46 | b = a.get_dramas() 47 | print(b) 48 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import logging 4 | 5 | LOG_LEVEL = logging.INFO 6 | 7 | logger = logging.getLogger() 8 | if not logger.handlers: 9 | logger.setLevel(LOG_LEVEL) 10 | sh = logging.StreamHandler() 11 | sh.setLevel(LOG_LEVEL) 12 | sh.setFormatter(logging.Formatter('[%(asctime)s %(levelname)s]: %(message)s')) 13 | logger.addHandler(sh) 14 | 15 | -------------------------------------------------------------------------------- /scan.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | from dramas import USDramaScan 4 | from drama import DramaDetail 5 | import subprocess, os, json 6 | from logger import logger 7 | import time 8 | import config 9 | 10 | class scan: 11 | def __init__(self): 12 | self.mydir = config.mydir 13 | self.task_file=self.mydir+'/task.txt' 14 | 15 | def dramas_on_page(self): 16 | return USDramaScan().get_dramas() 17 | 18 | def drama_links(self, id): 19 | return DramaDetail(id).get_download_html() 20 | 21 | def get_record(self, id): 22 | file = '{}/record/{}.json'.format(self.mydir, id) 23 | if not os.path.exists(file): 24 | with open(file, 'w') as f: 25 | f.write('{}') 26 | return {} 27 | else: 28 | with open(file, 'r') as f: 29 | return json.loads(f.read()) 30 | 31 | def write_links(self, tasks): 32 | with open(self.task_file, 'w') as f: 33 | f.write(json.dumps(tasks)) 34 | 35 | def run(self): 36 | tasks = [] 37 | dramas = self.dramas_on_page() 38 | for drama in dramas: 39 | data = self.drama_links(drama) 40 | id = data['id'] 41 | name = data['name'] 42 | results = data['results'] 43 | record = self.get_record(id) 44 | for season in results.keys(): 45 | for ep in results[season].keys(): 46 | if season not in record.keys() or int(ep) not in record[season]: 47 | logger.info("new drama found: {} - season {} - episode {}".format(name, season, ep)) 48 | tasks.append({"id":id,"season":season,"link":results[season][ep],"ep":ep,"name":name}) 49 | time.sleep(5) 50 | self.write_links(tasks) 51 | 52 | def call_thunder(self): 53 | cmd = ['python', 'thunder.py'] 54 | a = subprocess.Popen(cmd, cwd=self.mydir, stdout=subprocess.PIPE) 55 | a.wait() 56 | logger.info(a.stdout.read()) 57 | 58 | if __name__ == "__main__": 59 | while True: 60 | a = scan() 61 | a.run() 62 | a.call_thunder() 63 | time.sleep(3600*6) 64 | 65 | -------------------------------------------------------------------------------- /thunder.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | #this is a py27 file! 3 | 4 | from yc_api import * 5 | import verification_code, config 6 | import json, time, os 7 | import logging 8 | 9 | mydir = config.mydir 10 | LOG_LEVEL = logging.INFO 11 | 12 | logger = logging.getLogger() 13 | if not logger.handlers: 14 | logger.setLevel(LOG_LEVEL) 15 | sh = logging.StreamHandler() 16 | sh.setLevel(LOG_LEVEL) 17 | sh.setFormatter(logging.Formatter('[%(asctime)s %(levelname)s]: %(message)s')) 18 | logger.addHandler(sh) 19 | 20 | with open('{}/task.txt'.format(mydir), 'r') as f: 21 | tasks = json.loads(f.read()) 22 | 23 | verification_code_reader = verification_code.default_verification_code_reader('file', config.thunder.get('verification_image_path')) 24 | auto_login = True 25 | dl = ThunderRemoteDownload(config.thunder.get('username'), config.thunder.get('password'), config.thunder.get('cookie_path'), auto_login, verification_code_reader) 26 | 27 | copytasks = list(tasks) 28 | for t in tasks: 29 | season = t['season'] 30 | id = t['id'] 31 | url = t['link'] 32 | ep = t['ep'] 33 | name = t['name'] 34 | logger.info('adding task: ' + name + ' - season ' + season + ' - episode ' + str(ep)) 35 | try: 36 | url = url.rstrip('>') 37 | result = dl.create_task(url) 38 | with open('{}/record/{}.json'.format(mydir, id), 'r') as f: 39 | record = json.loads(f.read()) 40 | if season not in record.keys(): 41 | record[season] = [] 42 | record[season].append(int(ep)) 43 | record[season] = list(set(record[season])) 44 | record['name'] = name 45 | with open('{}/record/{}.json'.format(mydir, id), 'w') as f: 46 | f.write(json.dumps(record)) 47 | 48 | copytasks.remove(t) 49 | with open('{}/task.txt'.format(mydir), 'w') as f: 50 | f.write(copytasks) 51 | except Exception as e: 52 | logger.warning(e) 53 | 54 | time.sleep(10) 55 | 56 | -------------------------------------------------------------------------------- /urlread.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | from logger import logger 4 | import urllib.parse, urllib.request 5 | import http.cookiejar 6 | from socket import timeout 7 | 8 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' 9 | 10 | class urlread: 11 | def __init__(self, ucookiejar=None): 12 | self.header = { 13 | 'User-Agent': USER_AGENT, 14 | 'Accept-Encoding': 'gzip, deflate', 15 | 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4' 16 | } 17 | if ucookiejar is None: 18 | self.cookiejar = http.cookiejar.CookieJar() 19 | else: 20 | self.cookiejar = ucookiejar 21 | pro = urllib.request.HTTPCookieProcessor(self.cookiejar) 22 | self.opener = urllib.request.build_opener(pro) 23 | header = [] 24 | for key, value in self.header.items(): 25 | elem = (key, value) 26 | header.append(elem) 27 | self.opener.addheaders = header 28 | 29 | def urlopen(self, url, postData=None): 30 | logger.info('urlopen: {}'.format(url)) 31 | if postData is None: 32 | resp = self.opener.open(url) 33 | else: 34 | if type(postData) == dict: 35 | postData = urllib.parse.urlencode(postData).encode() 36 | resp = self.opener.open(url, postData, timeout=60) 37 | return resp 38 | 39 | def __urlread(self, url, postData=None): 40 | response = self.urlopen(url, postData) 41 | data = response.read() 42 | if response.info().get('Content-Encoding') == 'gzip': 43 | data = ungzip(data) 44 | elif response.info().get('Content-Encoding') == 'deflate': 45 | data = undeflate(data) 46 | return data 47 | 48 | def urlread(self, url, postData=None): 49 | logger.info('urlread: {}'.format(url)) 50 | data = self.__urlread(url, postData) 51 | return data.decode() 52 | 53 | 54 | def ungzip(s): 55 | import gzip 56 | return gzip.decompress(s) 57 | 58 | def undeflate(s): 59 | import zlib 60 | return zlib.decompress(s, -zlib.MAX_WBITS) 61 | 62 | -------------------------------------------------------------------------------- /verification_code.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | #this is a py27 file! 3 | 4 | def file_path_verification_code_reader(path): 5 | def reader(image): 6 | with open(path, 'wb') as output: 7 | output.write(image) 8 | print 'Verification code picture is saved to %s, please open it manually and enter what you see.' % path 9 | code = raw_input('Verification code: ') 10 | return code 11 | return reader 12 | 13 | # def ascii_verification_code_reader(image_data): 14 | # import ascii_verification_code_reader 15 | # print ascii_verification_code_reader.convert_to_ascii(image_data) 16 | # code = raw_input('Verification code: ') 17 | # return code 18 | 19 | def default_verification_code_reader(reader_type, vcode_image_path): 20 | # if reader_type == 'ascii': 21 | # return ascii_verification_code_reader 22 | 23 | if not vcode_image_path: 24 | vcode_image_path = './vcode.jpg' 25 | return file_path_verification_code_reader(vcode_image_path) 26 | 27 | -------------------------------------------------------------------------------- /yc_api.py: -------------------------------------------------------------------------------- 1 | #this is a py27 file! 2 | __all__ = ['ThunderRemoteDownload', 'str_filesize'] 3 | 4 | import json 5 | import logging 6 | import os.path 7 | import re 8 | import time 9 | import urllib 10 | import urllib2 11 | import cookielib 12 | import hashlib 13 | import base64 14 | 15 | USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' 16 | LOG_LEVEL = logging.INFO 17 | 18 | logger = logging.getLogger() 19 | logger.setLevel(LOG_LEVEL) 20 | 21 | sh = logging.StreamHandler() 22 | sh.setLevel(LOG_LEVEL) 23 | sh.setFormatter(logging.Formatter('[%(asctime)s %(levelname)s]: %(message)s')) 24 | logger.addHandler(sh) 25 | 26 | 27 | def retry(f_or_arg, *args): 28 | # retry_sleeps = [1, 1, 1] 29 | retry_sleeps = [1, 2, 3, 5, 10, 20, 30, 60] + [60] * 60 30 | 31 | def decorator(f): 32 | def withretry(*args, **kwargs): 33 | for second in retry_sleeps: 34 | try: 35 | return f(*args, **kwargs) 36 | except (urllib2.URLError, urllib2.HTTPError): 37 | import traceback 38 | logger.debug("Exception happened. Retrying...") 39 | logger.debug(traceback.format_exc()) 40 | time.sleep(second) 41 | raise 42 | 43 | return withretry 44 | 45 | if callable(f_or_arg) and not args: 46 | return decorator(f_or_arg) 47 | else: 48 | a = f_or_arg 49 | assert type(a) == int 50 | assert not args 51 | retry_sleeps = [1] * a 52 | return decorator 53 | 54 | 55 | class ThunderRemoteDownload(object): 56 | 57 | def __init__(self, username=None, password=None, cookie_path=None, login=True, verification_code_reader=None): 58 | self.username = username 59 | self.password = password 60 | self.cookie_path = cookie_path 61 | if cookie_path: 62 | self.cookiejar = cookielib.LWPCookieJar() 63 | if os.path.exists(cookie_path): 64 | self.load_cookies() 65 | else: 66 | self.cookiejar = cookielib.CookieJar() 67 | 68 | self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar)) 69 | self.verification_code_reader = verification_code_reader 70 | self.login_time = None 71 | if login: 72 | self.id = self.get_userid_or_none() 73 | if not self.id: 74 | self.login() 75 | self.id = self.get_userid() 76 | 77 | self.selected_peer_id = None 78 | self.selected_peer_name = "" 79 | self.cached_peer_list = [] 80 | self.default_target_dir = "" 81 | self.user_define_target_dirs = [] 82 | self.peer_drives = [] 83 | 84 | self.__load_last_configs() 85 | self.__init_default_peer() 86 | 87 | def __load_last_configs(self): 88 | tmp = self.get_cookie('config.com', 'selected_peer_id') 89 | if tmp: 90 | self.selected_peer_id = tmp 91 | 92 | tmp = self.get_cookie('config.com', 'user_define_target_dirs') 93 | if tmp: 94 | self.user_define_target_dirs = json.loads(tmp) 95 | 96 | @retry 97 | def urlopen(self, url, **args): 98 | logger.info('urlopen: {}'.format(url)) 99 | # import traceback 100 | # for line in traceback.format_stack(): 101 | # print line.strip() 102 | if 'data' in args and type(args['data']) == dict: 103 | args['data'] = urlencode(args['data']) 104 | logger.debug(args['data']) 105 | resp = self.opener.open(urllib2.Request(url, **args), timeout=60) 106 | ### 107 | cookies_headers = resp.headers.getheaders('Set-Cookie') 108 | logger.debug('cookie: {!s}'.format(cookies_headers)) 109 | ### 110 | return resp 111 | 112 | def __urlread(self, url, **args): 113 | args.setdefault('headers', {}) 114 | headers = args['headers'] 115 | headers.setdefault('Accept-Encoding', 'gzip, deflate') 116 | # headers.setdefault('Referer', 'http://lixian.vip.xunlei.com/task.html') 117 | headers.setdefault('User-Agent', USER_AGENT) 118 | # headers.setdefault('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8') 119 | # headers.setdefault('Accept-Language', 'zh-cn,zh;q=0.7,en-us;q=0.3') 120 | response = self.urlopen(url, **args) 121 | data = response.read() 122 | if response.info().get('Content-Encoding') == 'gzip': 123 | data = ungzip(data) 124 | elif response.info().get('Content-Encoding') == 'deflate': 125 | data = undeflate(data) 126 | return data 127 | 128 | def urlread(self, url, **args): 129 | logger.info('urlread') 130 | logger.info(' V') 131 | data = self.__urlread(url, **args) 132 | if self.is_session_timeout(data): 133 | logger.debug('session timed out') 134 | self.login() 135 | data = self.__urlread(url, **args) 136 | return data 137 | 138 | def load_cookies(self): 139 | self.cookiejar.load(self.cookie_path, ignore_discard=True, ignore_expires=True) 140 | 141 | def save_cookies(self): 142 | if self.cookie_path: 143 | self.cookiejar.save(self.cookie_path, ignore_discard=True) 144 | 145 | def get_cookie(self, domain, k): 146 | if self.has_cookie(domain, k): 147 | return self.cookiejar._cookies[domain]['/'][k].value 148 | else: 149 | return None 150 | 151 | def has_cookie(self, domain, k): 152 | return domain in self.cookiejar._cookies and k in self.cookiejar._cookies[domain]['/'] 153 | 154 | def set_cookie(self, domain, k, v): 155 | c = cookielib.Cookie(version=0, name=k, value=v, port=None, port_specified=False, domain=domain, domain_specified=True, 156 | domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, 157 | comment=None, comment_url=None, rest={}, rfc2109=False) 158 | self.cookiejar.set_cookie(c) 159 | 160 | def del_cookie(self, domain, k): 161 | if self.has_cookie(domain, k): 162 | self.cookiejar.clear(domain=domain, path="/", name=k) 163 | 164 | def get_cookie_header(self): 165 | def domain_header(domain): 166 | root = self.cookiejar._cookies[domain]['/'] 167 | return '; '.join(k + '=' + root[k].value for k in root) 168 | 169 | return domain_header('.xunlei.com') + '; ' + domain_header('.vip.xunlei.com') 170 | 171 | def get_userid(self): 172 | if self.has_cookie('.xunlei.com', 'userid'): 173 | return self.get_cookie('.xunlei.com', 'userid') 174 | else: 175 | raise Exception('Probably login failed') 176 | 177 | def get_userid_or_none(self): 178 | return self.get_cookie('.xunlei.com', 'userid') 179 | 180 | def get_username(self): 181 | return self.get_cookie('.xunlei.com', 'usernewno') 182 | 183 | def get_referer(self): 184 | return 'http://dynamic.cloud.vip.xunlei.com/user_task?userid=%s' % self.id 185 | 186 | def gen_jsonp_function_name(self): 187 | return 'jQuery{}_{}'.format(id(self), current_timestamp()) 188 | 189 | def check_device_id(self): 190 | if not self.has_cookie('.xunlei.com', 'deviceid'): 191 | url1 = 'https://login.xunlei.com/risk?cmd=algorithm&t='+str(current_timestamp()) 192 | sign_fun = self.__urlread(url1).decode() 193 | import js2py 194 | xl_al = js2py.eval_js(sign_fun) 195 | SB = USER_AGENT + "###zh-cn###24###960x1440###-540###true###true###true###undefined###undefined###x86###Win32#########"+md5(str(current_timestamp()).encode()) 196 | xl_fp_raw = base64.b64encode(SB.encode()).decode() 197 | xl_fp = md5(xl_fp_raw.encode()) 198 | xl_fp_sign = xl_al(xl_fp_raw) 199 | device_data = {'xl_fp_raw': xl_fp_raw, 'xl_fp': xl_fp, 'xl_fp_sign':xl_fp_sign} 200 | device_url = 'http://login.xunlei.com/risk?cmd=report' 201 | self.urlopen(device_url, data=device_data).read() 202 | if not self.has_cookie('.xunlei.com', '_x_t_'): 203 | self.set_cookie('.xunlei.com', '_x_t_', '0') 204 | 205 | def double_check_login(self): 206 | callback = self.gen_jsonp_function_name() 207 | url = 'http://hub.yuancheng.xunlei.com/check/vipcache?callback={}&_={}'.format(callback, current_timestamp()) 208 | resp = self.urlopen(url, headers={'User-Agent': USER_AGENT}).read() 209 | try: 210 | resp = get_response_info(resp, callback) 211 | except AssertionError as e: 212 | logger.warning('response is not jsonp when double_check_login') 213 | return False 214 | 215 | if resp.get('userid') and resp.get('userid') == self.id: 216 | return True 217 | return False 218 | 219 | def has_logged_in(self): 220 | id = self.get_userid_or_none() 221 | if not id: 222 | return False 223 | 224 | return self.double_check_login() 225 | 226 | 227 | def is_session_timeout(self, html): 228 | logger.info('is_session_timeout?') 229 | logger.debug('html: {}'.format(html)) 230 | # timeout warning 1: 231 | # jQuery4444817808_1480233929775({"msg": "user not login", "rtn": 1004}) 232 | timeout_test = r'(not login)|("rtn": 1004)' 233 | if re.search(timeout_test, html): 234 | return True 235 | 236 | maybe_timeout = html == '''rebuild({"rtcode":-1,"list":[]})''' 237 | if maybe_timeout: 238 | if self.login_time and time.time() - self.login_time > 60 * 10: # 10 minutes 239 | return True 240 | 241 | return False 242 | 243 | def read_verification_code(self): 244 | if not self.verification_code_reader: 245 | raise NotImplementedError('Verification code required') 246 | else: 247 | verification_code_url = 'http://verify1.xunlei.com/image?t=MVA&cachetime=%s' % current_timestamp() 248 | image = self.urlopen(verification_code_url).read() 249 | return self.verification_code_reader(image) 250 | 251 | def login(self): 252 | username = self.username 253 | password = self.password 254 | if not username and self.has_cookie('.xunlei.com', 'usernewno'): 255 | username = self.get_username() 256 | if not username: 257 | raise Exception('Missing username') 258 | if not password: 259 | raise Exception('Missing password') 260 | 261 | logger.info('login') 262 | self.check_device_id() 263 | check_url = 'http://login.xunlei.com/check/?u=%s&business_type=113&v=101&cachetime=%d&' % (username, current_timestamp()) 264 | login_page = self.urlopen(check_url).read() 265 | # verification_code = self.get_cookie('.xunlei.com', 'check_result') 266 | # if not verification_code: 267 | # verification_code = self.read_verification_code() 268 | # if verification_code: 269 | # verification_code = verification_code.upper() 270 | # else: 271 | # verification_code = verification_code[2:].upper() 272 | # assert verification_code 273 | print self.get_cookie('.xunlei.com', 'deviceid') 274 | login_page = self.urlopen('https://login.xunlei.com/sec2login/?csrf_token={}'.format(hashlib.md5(self.get_cookie('.xunlei.com', 'deviceid')[:32]).hexdigest()), headers={'User-Agent': USER_AGENT}, 275 | data={'u': username, 'p': password, 'verifycode': '', 'login_enable': '0', 276 | 'business_type': '113', 'v': '101', 'cachetime': current_timestamp()}) 277 | print self.cookiejar._cookies 278 | self.id = self.get_userid() 279 | 280 | if not self.double_check_login(): 281 | raise RuntimeError('login failed') 282 | 283 | self.save_cookies() 284 | self.login_time = time.time() 285 | 286 | def logout(self): 287 | logger.info('logout') 288 | session_id = self.get_cookie('.xunlei.com', 'sessionid') 289 | if not session_id: 290 | return 291 | url = 'http://login.xunlei.com/unregister?sessionid={}'.format(session_id) 292 | self.urlopen(url) 293 | ckeys = ["sessionid", "usrname", "nickname", "usernewno", "userid"] 294 | for k in ckeys: 295 | self.set_cookie('.xunlei.com', k, '') 296 | self.save_cookies() 297 | self.login_time = None 298 | 299 | def select_peer(self, pid): 300 | logger.info('select peer: {}'.format(pid)) 301 | self.selected_peer_id = pid 302 | self.set_cookie('config.com', 'selected_peer_id', pid) 303 | self.save_cookies() 304 | 305 | self.__init_default_peer() 306 | 307 | def get_selected_peer_name(self): 308 | return self.selected_peer_name 309 | 310 | def __init_default_peer(self): 311 | if not self.cached_peer_list: 312 | peers = self.list_peer() 313 | if not peers: 314 | raise Exception('No peer downloader') 315 | 316 | if not self.selected_peer_id: 317 | peers = self.cached_peer_list 318 | self.selected_peer_id = peers[0].get('pid') 319 | self.set_cookie('config.com', 'selected_peer_id', self.selected_peer_id) 320 | self.save_cookies() 321 | 322 | #check the peer still online 323 | the_peer = None 324 | for p in self.cached_peer_list: 325 | if p.get('pid') == self.selected_peer_id: 326 | the_peer = p 327 | break 328 | 329 | if not the_peer: 330 | raise Exception('It seems the selected downloader is unbound') 331 | 332 | if not the_peer.get('online') in [1, '1']: 333 | raise Exception('The selected downloader is offline') 334 | 335 | self.selected_peer_name = the_peer.get('name') 336 | 337 | #login the peer 338 | drive_list = self.login_peer(self.selected_peer_id) 339 | if not drive_list: 340 | raise Exception('Error when login the downloader') 341 | self.peer_drives = drive_list 342 | logger.debug('peer drives: {!s}'.format(drive_list)) 343 | 344 | #get the peer's settings and save its default target dir 345 | setting = self.get_peer_setting(self.selected_peer_id) 346 | if not setting: 347 | raise Exception('Error when retrieving the setting of the downloader') 348 | 349 | self.default_target_dir = setting.get('defaultPath') 350 | 351 | @retry(3) 352 | def list_peer(self): 353 | logger.info('list_peer') 354 | 355 | callback = self.gen_jsonp_function_name() 356 | url = 'http://homecloud.yuancheng.xunlei.com/listPeer?type=0&v=2&ct=0&callback={}&_={}'.format(callback, current_timestamp()) 357 | resp = self.urlread(url) 358 | try: 359 | resp = get_response_info(resp, callback) 360 | except AssertionError as e: 361 | msg = 'response is not jsonp when list_peer' 362 | logger.warning(msg) 363 | logger.debug(resp) 364 | raise Exception(msg) 365 | 366 | result = [] 367 | if resp.get('rtn') == 0: 368 | result = resp.get('peerList') 369 | self.cached_peer_list = result 370 | 371 | return result 372 | 373 | @retry(3) 374 | def login_peer(self, pid): 375 | """ 376 | :param pid: 377 | :return: drive list of this peer - ["C", "D", ...] 378 | """ 379 | logger.info('login_peer') 380 | 381 | callback = self.gen_jsonp_function_name() 382 | url = 'http://homecloud.yuancheng.xunlei.com/login?pid={}&clientType=&v=2&ct=0&callback={}&_={}'.format(pid, callback, current_timestamp()) 383 | resp = self.urlread(url) 384 | try: 385 | resp = get_response_info(resp, callback) 386 | except AssertionError as e: 387 | msg = 'response is not jsonp when login_peer' 388 | logger.warning(msg) 389 | logger.debug(resp) 390 | raise Exception(msg) 391 | 392 | result = [] 393 | if resp.get('rtn') == 0: 394 | result = [x[0] for x in resp.get('pathList')] 395 | 396 | return result 397 | 398 | @retry(3) 399 | def get_peer_setting(self, pid): 400 | logger.info('get_peer_setting: {}'.format(pid)) 401 | 402 | callback = self.gen_jsonp_function_name() 403 | url = 'http://homecloud.yuancheng.xunlei.com/settings?pid={}&v=2&ct=0&callback={}&_={}'.format( 404 | pid, callback, current_timestamp() 405 | ) 406 | resp = self.urlread(url) 407 | try: 408 | resp = get_response_info(resp, callback) 409 | except AssertionError as e: 410 | msg = 'response is not jsonp when get_peer_setting' 411 | logger.warning(msg) 412 | logger.debug(resp) 413 | raise Exception(msg) 414 | 415 | result = {} 416 | if resp.get('rtn') == 0: 417 | result = resp 418 | 419 | return result 420 | 421 | @retry(3) 422 | def list_downloading(self, start=0, len=100): 423 | logger.info('list_downloading') 424 | 425 | callback = self.gen_jsonp_function_name() 426 | url = 'http://homecloud.yuancheng.xunlei.com/list?pid={}&type=0&pos={}&number={}&needUrl=1&v=2&ct=0&callback={}&_={}'.format( 427 | self.selected_peer_id, start, len, callback, current_timestamp() 428 | ) 429 | resp = self.urlread(url) 430 | try: 431 | resp = get_response_info(resp, callback) 432 | except AssertionError as e: 433 | msg = 'response is not jsonp when list_downloading' 434 | logger.warning(msg) 435 | logger.debug(resp) 436 | raise Exception(msg) 437 | 438 | result = [] 439 | if resp.get('rtn') == 0: 440 | result = resp.get('tasks') 441 | 442 | return result 443 | 444 | @retry(3) 445 | def list_finished(self, start=0, len=100): 446 | logger.info('list_finished') 447 | 448 | callback = self.gen_jsonp_function_name() 449 | url = 'http://homecloud.yuancheng.xunlei.com/list?pid={}&type=1&pos={}&number={}&needUrl=1&v=2&ct=0&callback={}&_={}'.format( 450 | self.selected_peer_id, start, len, callback, current_timestamp() 451 | ) 452 | resp = self.urlread(url) 453 | try: 454 | resp = get_response_info(resp, callback) 455 | except AssertionError as e: 456 | msg = 'response is not jsonp when list_finished' 457 | logger.warning(msg) 458 | logger.debug(resp) 459 | raise Exception(msg) 460 | 461 | result = [] 462 | if resp.get('rtn') == 0: 463 | result = resp.get('tasks') 464 | 465 | return result 466 | 467 | @retry(3) 468 | def list_trash(self, start=0, len=100): 469 | logger.info('list_trash') 470 | 471 | callback = self.gen_jsonp_function_name() 472 | url = 'http://homecloud.yuancheng.xunlei.com/list?pid={}&type=2&pos={}&number={}&needUrl=1&v=2&ct=0&callback={}&_={}'.format( 473 | self.selected_peer_id, start, len, callback, current_timestamp() 474 | ) 475 | resp = self.urlread(url) 476 | try: 477 | resp = get_response_info(resp, callback) 478 | except AssertionError as e: 479 | msg = 'response is not jsonp when list_trash' 480 | logger.warning(msg) 481 | logger.debug(resp) 482 | raise Exception(msg) 483 | 484 | result = [] 485 | if resp.get('rtn') == 0: 486 | result = resp.get('tasks') 487 | 488 | return result 489 | 490 | @retry(3) 491 | def get_free_space_of_downloader(self, pid=None): 492 | logger.info('get_free_space_of_downloader') 493 | 494 | if not pid: 495 | pid = self.selected_peer_id 496 | callback = self.gen_jsonp_function_name() 497 | url = 'http://homecloud.yuancheng.xunlei.com/boxSpace?pid={}&v=2&ct=0&callback={}&_={}'.format( 498 | pid, callback, current_timestamp() 499 | ) 500 | resp = self.urlread(url) 501 | try: 502 | resp = get_response_info(resp, callback) 503 | except AssertionError as e: 504 | msg = 'response is not jsonp when get_free_space_of_downloader' 505 | logger.warning(msg) 506 | logger.debug(resp) 507 | raise Exception(msg) 508 | 509 | result = [] 510 | if resp.get('rtn') == 0: 511 | result = resp.get('space') 512 | def filter(x): 513 | x['remain'] = str_filesize(int(x['remain'])) 514 | return x 515 | result = [filter(x) for x in result] 516 | 517 | return result 518 | 519 | def resolve_url(self, url): 520 | logger.info('resolve_url') 521 | 522 | callback = self.gen_jsonp_function_name() 523 | payload = dict(url=url) 524 | payload = dict(json=json.dumps(payload)) 525 | url = 'http://homecloud.yuancheng.xunlei.com/urlResolve?pid={}&v=2&ct=0&callback={}'.format( 526 | self.selected_peer_id, callback 527 | ) 528 | resp = self.urlread(url, data=payload) 529 | try: 530 | resp = get_response_info(resp, callback) 531 | except AssertionError as e: 532 | msg = 'response is not jsonp when resolve_url' 533 | logger.warning(msg) 534 | logger.debug(resp) 535 | raise Exception(msg) 536 | 537 | result = dict(url="", infohash="", size=0, name="") 538 | if resp.get('rtn') == 0 and resp.has_key('taskInfo'): 539 | result['infohash'] = resp.get('infohash', '') 540 | result['url'] = resp.get('taskInfo').get('url') 541 | result['size'] = resp.get('taskInfo').get('size') 542 | result['name'] = resp.get('taskInfo').get('name') 543 | 544 | return result 545 | 546 | def add_target_dir(self, dir): 547 | if not re.match(r'^[a-zA-Z]{1}:[\/\\]{1}.+$', dir): 548 | raise Exception('The dir is invalid path') 549 | if not dir[0].upper() in self.peer_drives: 550 | raise Exception('The downloader has no such drive: {}'.format(dir[0])) 551 | if not dir in self.user_define_target_dirs: 552 | self.user_define_target_dirs.append(dir) 553 | self.set_cookie('config.com', 'user_define_target_dirs', json.dumps(self.user_define_target_dirs)) 554 | self.save_cookies() 555 | 556 | def remove_target_dir(self, dir_index): 557 | if dir_index+1 > len(self.user_define_target_dirs): 558 | raise Exception('The index exceed range') 559 | del self.user_define_target_dirs[dir_index] 560 | self.set_cookie('config.com', 'user_define_target_dirs', json.dumps(self.user_define_target_dirs)) 561 | self.save_cookies() 562 | 563 | def list_target_dirs(self): 564 | return self.user_define_target_dirs 565 | 566 | def clear_target_dirs(self): 567 | self.user_define_target_dirs = [] 568 | self.set_cookie('config.com', 'user_define_target_dirs', json.dumps(self.user_define_target_dirs)) 569 | self.save_cookies() 570 | 571 | @retry(3) 572 | def create_task(self, url, path_index=None): 573 | logger.info('create_task') 574 | 575 | #resolve the url first 576 | url_info = self.resolve_url(url) 577 | size = url_info.get('size') 578 | if size == 0: 579 | raise Exception('Invalid URL provided') 580 | hash = url_info.get('infohash') 581 | name = url_info.get('name') 582 | url = url_info.get('url') 583 | 584 | #get the target dir 585 | target_path = self.default_target_dir 586 | if path_index != None: 587 | if path_index >= len(self.user_define_target_dirs): 588 | raise Exception('path_index out of range') 589 | target_path = self.user_define_target_dirs[path_index] 590 | 591 | callback = self.gen_jsonp_function_name() 592 | if hash: 593 | payload = dict(path=target_path, infohash=hash, name=name, btSub=[1]) 594 | payload = dict(json=json.dumps(payload)) 595 | url = 'http://homecloud.yuancheng.xunlei.com/createBtTask?pid={}&v=2&ct=0&callback={}'.format( 596 | self.selected_peer_id, callback 597 | ) 598 | resp = self.urlread(url, data=payload) 599 | try: 600 | resp = get_response_info(resp, callback) 601 | except AssertionError as e: 602 | msg = 'response is not jsonp when create_task' 603 | logger.warning(msg) 604 | logger.debug(resp) 605 | raise Exception(msg) 606 | 607 | if resp.get('rtn') == 202: 608 | raise Exception('Already downloading/downloaded') 609 | 610 | return resp.get('rtn') == 0 611 | else: 612 | task = dict(url=url, name=name, gcid="", cid="", filesize=size, ext_json={"autoname":1}) 613 | payload = dict(path=target_path, tasks=[task]) 614 | payload = dict(json=json.dumps(payload)) 615 | url = 'http://homecloud.yuancheng.xunlei.com/createTask?pid={}&v=2&ct=0&callback={}'.format( 616 | self.selected_peer_id, callback 617 | ) 618 | resp = self.urlread(url, data=payload) 619 | try: 620 | resp = get_response_info(resp, callback) 621 | except AssertionError as e: 622 | msg = 'response is not jsonp when create_task' 623 | logger.warning(msg) 624 | logger.debug(resp) 625 | raise Exception(msg) 626 | 627 | if resp.get('tasks')[0].get('result') == 202: 628 | raise Exception('Already downloading/downloaded') 629 | 630 | return resp.get('rtn') == 0 and resp.get('tasks')[0].get('result') == 0 631 | 632 | @retry(3) 633 | def trash_task(self, task_id, task_state, permanently_del=False): 634 | """ 635 | delete the task, but still in the trash, and the file is not deleted too, you can restore it with web gui. 636 | if permanently_del=True, the task can not be restored with any chance. 637 | :param task_id: 638 | :param task_state: 639 | :return: 640 | """ 641 | logger.info('trash_task') 642 | 643 | param_task = '{}_{}'.format(task_id, task_state) 644 | recycle = 1 645 | delete_file = 'false' 646 | if permanently_del: 647 | recycle = 0 648 | delete_file = 'true' 649 | callback = self.gen_jsonp_function_name() 650 | url = 'http://homecloud.yuancheng.xunlei.com/del?pid={}&tasks={}&recycleTask={}&deleteFile={}&v=2&ct=0&callback={}&_={}'.format( 651 | self.selected_peer_id, param_task, recycle, delete_file, callback, current_timestamp() 652 | ) 653 | resp = self.urlread(url) 654 | try: 655 | resp = get_response_info(resp, callback) 656 | except AssertionError as e: 657 | msg = 'response is not jsonp when trash_task' 658 | logger.warning(msg) 659 | logger.debug(resp) 660 | raise Exception(msg) 661 | 662 | return resp.get('rtn') == 0 and resp.get('tasks')[0].get('result') == 0 663 | 664 | @retry(3) 665 | def pause_task(self, task_id, task_state): 666 | logger.info('pause_task') 667 | 668 | param_task = '{}_{}'.format(task_id, task_state) 669 | callback = self.gen_jsonp_function_name() 670 | url = 'http://homecloud.yuancheng.xunlei.com/pause?pid={}&tasks={}&v=2&ct=0&callback={}&_={}'.format( 671 | self.selected_peer_id, param_task, callback, current_timestamp() 672 | ) 673 | resp = self.urlread(url) 674 | try: 675 | resp = get_response_info(resp, callback) 676 | except AssertionError as e: 677 | msg = 'response is not jsonp when pause_task' 678 | logger.warning(msg) 679 | logger.debug(resp) 680 | raise Exception(msg) 681 | 682 | return resp.get('rtn') == 0 and resp.get('tasks')[0].get('result') == 0 683 | 684 | 685 | 686 | def current_timestamp(): 687 | return int(time.time() * 1000) 688 | 689 | 690 | def current_random(): 691 | from random import randint 692 | return '%s%06d.%s' % (current_timestamp(), randint(0, 999999), randint(100000000, 9999999999)) 693 | 694 | 695 | def convert_task(data): 696 | expired = {'0': False, '4': True}[data['flag']] 697 | assert re.match(r'[^:]+', data['url']), 'Invalid URL in: ' + repr(data) 698 | task = {'id': data['id'], 699 | 'type': re.match(r'[^:]+', data['url']).group().lower(), 700 | 'name': decode_dirty_name(unescape_html(data['taskname'])), 701 | 'status': int(data['download_status']), 702 | 'status_text': {'0': 'waiting', '1': 'downloading', '2': 'completed', '3': 'failed', '5': 'pending'}[data['download_status']], 703 | 'expired': expired, 704 | 'size': int(data['ysfilesize']), 705 | 'original_url': unescape_html(data['url']), 706 | 'xunlei_url': data['lixian_url'] or None, 707 | 'bt_hash': data['cid'], 708 | 'dcid': data['cid'], 709 | 'gcid': data['gcid'], 710 | 'date': data['dt_committed'][:10].replace('-', '.'), 711 | 'progress': '%s%%' % data['progress'], 712 | 'speed': '%s' % data['speed'], 713 | } 714 | return task 715 | 716 | 717 | def parse_json_response(html): 718 | m = re.match(ur'^\ufeff?rebuild\((\{.*\})\)$', html) 719 | if not m: 720 | logger.debug(html) 721 | raise RuntimeError('Invalid response') 722 | return json.loads(m.group(1)) 723 | 724 | 725 | def parse_json_tasks(result): 726 | tasks = result['info']['tasks'] 727 | return map(convert_task, tasks) 728 | 729 | 730 | def parse_task(html): 731 | inputs = re.findall(r']+/>', html) 732 | 733 | def parse_attrs(html): 734 | return dict((k, v1 or v2) for k, v1, v2 in re.findall(r'''\b(\w+)=(?:'([^']*)'|"([^"]*)")''', html)) 735 | 736 | info = dict((x['id'], unescape_html(x['value'])) for x in map(parse_attrs, inputs)) 737 | mini_info = {} 738 | mini_map = {} 739 | # mini_info = dict((re.sub(r'\d+$', '', k), info[k]) for k in info) 740 | for k in info: 741 | mini_key = re.sub(r'\d+$', '', k) 742 | mini_info[mini_key] = info[k] 743 | mini_map[mini_key] = k 744 | taskid = mini_map['taskname'][8:] 745 | url = mini_info['f_url'] 746 | task_type = re.match(r'[^:]+', url).group().lower() 747 | task = {'id': taskid, 748 | 'type': task_type, 749 | 'name': mini_info['taskname'], 750 | 'status': int(mini_info['d_status']), 751 | 'status_text': {'0': 'waiting', '1': 'downloading', '2': 'completed', '3': 'failed', '5': 'pending'}[mini_info['d_status']], 752 | 'size': int(mini_info.get('ysfilesize', 0)), 753 | 'original_url': mini_info['f_url'], 754 | 'xunlei_url': mini_info.get('dl_url', None), 755 | 'bt_hash': mini_info['dcid'], 756 | 'dcid': mini_info['dcid'], 757 | 'gcid': parse_gcid(mini_info.get('dl_url', None)), 758 | } 759 | 760 | m = re.search(r']*>([^<>]*)', html) 761 | task['progress'] = m and m.group(1) or '' 762 | m = re.search(r']*id="speed\d+">([^<>]*)', html) 763 | task['speed'] = m and m.group(1).replace(' ', '') or '' 764 | m = re.search(r'([^<>]*)', html) 765 | task['date'] = m and m.group(1) or '' 766 | 767 | return task 768 | 769 | 770 | def parse_history(html): 771 | rwbox = re.search(r'