├── PHP ├── FileDownloader.php ├── HtmlDownloader.php ├── HtmlParser.php ├── Spider.php ├── composer.json └── index.php ├── README.md └── python ├── conf.py ├── conf.pyc ├── entity ├── __init__.py ├── __init__.pyc ├── fileinfor.py └── fileinfor.pyc ├── filedeal ├── __init__.py ├── __init__.pyc ├── file_downloader.py └── file_downloader.pyc ├── index.py ├── spider ├── __init__.py ├── __init__.pyc ├── html_downloader.py ├── html_downloader.pyc ├── html_parser.py ├── html_parser.pyc ├── spiderman.py └── spiderman.pyc └── test ├── __init__.py ├── test_bs4.py ├── test_httplib.py ├── test_json.py ├── test_progress.py └── test_urllib.py /PHP/FileDownloader.php: -------------------------------------------------------------------------------- 1 | array( 15 | 'header' => 16 | "Host: www.imooc.com\r\n" . 17 | "Referer: http://m.120ask.com/health/show?page=2&id=84882&type=17\r\n" . 18 | "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36\r\n" 19 | ), 20 | ); 21 | $context = stream_context_create($header); 22 | $data = file_get_contents($url, 0, $context); 23 | return $data; 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /PHP/HtmlParser.php: -------------------------------------------------------------------------------- 1 | filter('#main > div.course-infos > div.w.pr > div.hd.clearfix > h2')->text(); 24 | 25 | $course = $crawler->filterXPath('//*[@id="main"]/div[2]/div[1]/div[1]/div[3]/div/ul/li/a')->each( 26 | function (Crawler $node, $i) { 27 | $url = explode('/', $node->filter('a')->attr('href')); 28 | $id = $url[2]; 29 | $downloadData = json_decode(file_get_contents(str_replace('{}', $id, self::DOWNLOAD_URL)), true); 30 | return [ 31 | 'id' => $url['2'], 32 | 'title' => str_replace(['\r','\n','开始学习',' ',], ['','','',''], trim($node->text())), 33 | //'title' => str_replace('开始学习', '', trim($node->text())), 34 | 'url' => $downloadData['data']['result']['mpath'][2] 35 | ]; 36 | }); 37 | $result = [ 38 | 'subject' => $subject, 39 | 'data' => $course 40 | ]; 41 | return $result; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /PHP/Spider.php: -------------------------------------------------------------------------------- 1 | '普清', 'M' => '高清', 'H' => '超清'];//视频品质描述 19 | const PERSUM = 0.0;//用于描述总进度 20 | const PERLIST = [];//记录每个线程的进度 21 | 22 | private $htmlData; 23 | 24 | public function __construct($id) 25 | { 26 | $this->downloader = new HtmlDownloader(); 27 | $this->parser = new HtmlParser(); 28 | $this->id = $id; 29 | } 30 | 31 | public function run() 32 | { 33 | echo "#####################################################################\n"; 34 | echo "#慕课网视频抓取器\n"; 35 | echo "author:igo9go\n"; 36 | echo "github:https://github.com/igo9go/\n"; 37 | echo "#到慕课网官网打开想要下载的课程的章节列表页面,查看当前url链接\n"; 38 | echo "#例如http://www.imooc.com/learn/615,则课程编号为615\n"; 39 | echo "#####################################################################\n"; 40 | $url = self::COURSEURL . $this->id; 41 | 42 | 43 | echo "将要下载的课程连接为" . $url . "\n"; 44 | echo "开始解析视频,请稍后\n"; 45 | $this->crawl($url); 46 | echo "共有" . count($this->htmlData['data']) . "条视频\n"; 47 | echo "课程名称:" . $this->htmlData['subject'] . PHP_EOL; 48 | echo "开始下载,请等待"; 49 | 50 | $fildir = './' . $this->htmlData['subject']; 51 | if (!is_dir($fildir)) { 52 | mkdir($fildir); 53 | } 54 | $i = 1; 55 | foreach ($this->htmlData['data'] as $item) { 56 | $file = file_get_contents($item['url']); 57 | $fileName = $item['title'] . '.mp4'; 58 | file_put_contents($fildir . '/' . $fileName, $file); 59 | echo '第' . $i . '个视频已完成下载' . PHP_EOL; 60 | $i++; 61 | } 62 | return '下载完成'; 63 | } 64 | 65 | public function crawl($url) 66 | { 67 | $htmlContent = $this->downloader->download($url); 68 | $this->htmlData = $this->parser->parse($htmlContent); 69 | } 70 | } -------------------------------------------------------------------------------- /PHP/composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "immoc", 3 | "type": "library", 4 | "license": "mit", 5 | "authors": [ 6 | { 7 | "name": "igo9go", 8 | "email": "1@qq.com" 9 | } 10 | ], 11 | "require": { 12 | "symfony/dom-crawler": "2.8.*|3.0.*|3.1.*", 13 | "symfony/css-selector": "2.8.*|3.0.*|3.1.*" 14 | }, 15 | "minimum-stability": "dev" 16 | } -------------------------------------------------------------------------------- /PHP/index.php: -------------------------------------------------------------------------------- 1 | run(); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # downloaa_imooc 2 | 下载慕课网视频 3 | 4 | 运行步骤: 5 | 6 | 1. git clone https://github.com/igo9go/downloaa_imooc 7 | 8 | 2. composer install 9 | 10 | 3. php index.php 744(744为课程ID) 11 | 12 | 13 | ![](http://oc9orpe44.bkt.clouddn.com/17-2-7/12718178-file_1486460082478_d782.png) 14 | 15 | ![](http://oc9orpe44.bkt.clouddn.com/17-2-7/57911326-file_1486460108434_9092.png) -------------------------------------------------------------------------------- /python/conf.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | #全局变量 3 | import threading 4 | 5 | DOWNLOAD_URL = 'http://www.imooc.com/course/ajaxmediainfo/?mid={}&mode=flash'#下载链接 6 | COURSEURL = "http://www.imooc.com/learn/"#课程链接 7 | #COURSEURL = "http://coding.imooc.com/learn/list/74.html" 8 | 9 | CHOOSE=['H','M','L']#视频品质 10 | 11 | STATE='L'#视频默认品质 12 | 13 | LOCK = threading.Lock()#线程锁 14 | 15 | INFOR = {'L':u'普清','M':u'高清','H':u'超清'}#视频品质描述 16 | 17 | PERSUM=0.0#用于描述总进度 18 | 19 | PERLIST=[]#记录每个线程的进度 -------------------------------------------------------------------------------- /python/conf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/conf.pyc -------------------------------------------------------------------------------- /python/entity/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Xaxdus' 2 | -------------------------------------------------------------------------------- /python/entity/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/entity/__init__.pyc -------------------------------------------------------------------------------- /python/entity/fileinfor.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | ''' 3 | 这个类用于描述文件信息的模型,将文件的所有信息封装成一个类,便于管理 4 | ''' 5 | class FileInfor(object): 6 | def __init__(self): 7 | self.__subject=''#教程名称 8 | self.__filename=''#课程名称(也是下载的每个文件名称) 9 | self.__mid= ''#课程的ID号 10 | self.__url={}#下载链接(分高中低,H M L) 11 | @property 12 | def subject(self): 13 | return self.__subject 14 | 15 | @subject.setter 16 | def subject(self,value): 17 | self.__subject = value 18 | 19 | @property 20 | def filename(self): 21 | return self.__filename 22 | 23 | @filename.setter 24 | def filename(self,value): 25 | self.__filename = value 26 | 27 | @property 28 | def mid(self): 29 | return self.__mid 30 | 31 | @mid.setter 32 | def mid(self,value): 33 | self.__mid = value 34 | 35 | @property 36 | def url(self): 37 | return self.__url 38 | 39 | @url.setter 40 | def url(self,value): 41 | self.__url = value -------------------------------------------------------------------------------- /python/entity/fileinfor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/entity/fileinfor.pyc -------------------------------------------------------------------------------- /python/filedeal/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Xaxdus' 2 | -------------------------------------------------------------------------------- /python/filedeal/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/filedeal/__init__.pyc -------------------------------------------------------------------------------- /python/filedeal/file_downloader.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import os 3 | import threading 4 | import urllib 5 | import sys 6 | import conf 7 | 8 | 9 | class File_Downloader(threading.Thread): 10 | ''' 11 | 这个类主要是用来下载视频文件的,继承了线程类 12 | ''' 13 | def __init__(self,fileInfo,id): 14 | threading.Thread.__init__(self) 15 | #先创建顶层文件夹 16 | self.__fileInfor = fileInfo 17 | self.__id = id 18 | self.createdir() 19 | 20 | 21 | def run(self): 22 | fileurl=self.__fileInfor.url[conf.STATE] 23 | filepath = self.filedir+os.sep+self.__fileInfor.filename+'.mp4' 24 | urllib.urlretrieve(fileurl,filepath, self.Schedule)#下载文件 25 | 26 | 27 | #创建顶层文件夹 28 | def createdir(self): 29 | self.filedir = self.__fileInfor.subject+"("+conf.INFOR[conf.STATE]+")" 30 | if os.path.exists(self.filedir) == False: 31 | os.mkdir(self.filedir) 32 | #下载任务 33 | def Schedule(self,blocknum,blocksize,totalsize): 34 | ''''' 35 | blocknum:已经下载的数据块 36 | blocksize:数据块的大小 37 | totalsize:远程文件的大小 38 | ''' 39 | per = 100.0 * blocknum * blocksize / totalsize 40 | if per > 100 : 41 | per = 100 42 | conf.LOCK.acquire() 43 | conf.PERLIST[self.__id]= per#记录每个线程的下载百分比,用于计算整个的进度状况 44 | nowsum = 0;#当前的进度 45 | for item in conf.PERLIST: 46 | nowsum+=item 47 | str = u'当前下载进度:---------------->>>>>>>> %.2f%%' % (100*nowsum/conf.PERSUM) 48 | sys.stdout.write(str+"\r") 49 | sys.stdout.flush() 50 | conf.LOCK.release() 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /python/filedeal/file_downloader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/filedeal/file_downloader.pyc -------------------------------------------------------------------------------- /python/index.py: -------------------------------------------------------------------------------- 1 | #!D:\Python27\python 2 | # -*- coding: utf-8 -*- 3 | 4 | from spider.spiderman import SpiderMan 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | if __name__=="__main__": 14 | spider = SpiderMan() 15 | spider.cmdshow_gbk() 16 | #spider.crawl("http://www.imooc.com/learn/110") -------------------------------------------------------------------------------- /python/spider/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Xaxdus' 2 | -------------------------------------------------------------------------------- /python/spider/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/spider/__init__.pyc -------------------------------------------------------------------------------- /python/spider/html_downloader.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import cookielib 3 | import urllib2 4 | class Html_Downloader(object): 5 | ''' 6 | 这个类主要是下载html使用的是urllib2模块 7 | ''' 8 | def download(self, url): 9 | if url is None: 10 | return None 11 | request = urllib2.Request(url) 12 | #下面的两个header是为了模拟手机浏览器,因为慕课网app可以不用注册就可以访问视频,所以把咱们的程序模拟成手机浏览器,就可以直接下载了 13 | request.add_header('user-agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36') 14 | request.add_header('host','www.imooc.com') 15 | response= urllib2.urlopen(request) 16 | if response.getcode()!=200: 17 | return None 18 | return response.read() -------------------------------------------------------------------------------- /python/spider/html_downloader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/spider/html_downloader.pyc -------------------------------------------------------------------------------- /python/spider/html_parser.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import re 3 | import urlparse 4 | from bs4 import BeautifulSoup 5 | from conf import DOWNLOAD_URL 6 | from entity.fileinfor import FileInfor 7 | from spider.html_downloader import Html_Downloader 8 | 9 | 10 | class Html_Parser(object): 11 | ''' 12 | html解析器:从中提取出视频信息 13 | ''' 14 | def __init__(self): 15 | self.res_data=[]#用来存放视频信息 16 | 17 | 18 | def parser(self, html_cont): 19 | ''' 20 | 21 | :param html_cont: html内容 22 | :return: 23 | ''' 24 | if html_cont is None: 25 | return 26 | # 使用BeautifulSoup模块对html进行解析 27 | soup = BeautifulSoup(html_cont,'html.parser',from_encoding='utf-8')#str ='

Hibernate注解

' 28 | subject = soup.find('div',class_ = "hd").get_text() 29 | links = soup.find_all('a',class_='J-media-item') 30 | html_down = Html_Downloader()#这个主要是请求视频的真实链接,抓包的时候你就会明白 31 | 32 | #下面的代码是将视频信息封装成对象添加到res_data列表中 33 | for link in links: 34 | fileinfor = FileInfor() 35 | fileinfor.subject = subject.strip() 36 | fileinfor.filename= link.get_text().strip().replace(':','_').replace("\r\n","").replace(u'开始学习',"").replace(' ', '') 37 | fileinfor.mid = link['href'].split('/')[2] 38 | json = html_down.download(DOWNLOAD_URL.replace('{}',fileinfor.mid)).replace('\/','/').encode('utf-8') 39 | # print json 40 | dic_json=eval(json) 41 | # print dic_json['data']['result']['mpath'][0] 42 | fileinfor.url['L']=dic_json['data']['result']['mpath'][0] 43 | fileinfor.url['M']=dic_json['data']['result']['mpath'][1] 44 | fileinfor.url['H']=dic_json['data']['result']['mpath'][2] 45 | self.res_data.append(fileinfor) 46 | print self.res_data 47 | return self.res_data 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /python/spider/html_parser.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/spider/html_parser.pyc -------------------------------------------------------------------------------- /python/spider/spiderman.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | from conf import COURSEURL, CHOOSE 5 | import conf 6 | from filedeal import file_downloader 7 | from spider import html_parser 8 | from spider import html_downloader 9 | 10 | ''' 11 | 12 | 这个类是爬虫的主逻辑 13 | ''' 14 | 15 | class SpiderMan(object): 16 | 17 | def __init__(self): 18 | self.downloader = html_downloader.Html_Downloader()#html下载器 19 | self.parser = html_parser.Html_Parser()#html解析器 20 | 21 | 22 | 23 | def crawl(self,url): 24 | ''' 25 | 26 | :param url: 需要爬取的url 27 | :return: 28 | ''' 29 | #下载好的html 30 | html_cont = self.downloader.download(url) 31 | #爬取到的视频数据信息 32 | self.res_datas = self.parser.parser(html_cont) 33 | 34 | 35 | 36 | def download(self,res_datas): 37 | ''' 38 | 39 | :param res_datas: 视频数据信息列表 40 | :return: 41 | ''' 42 | id = 0 #设置线程的id号,只是为了进度条显示的时候进行分类信息 43 | for res_data in res_datas: 44 | downloader = file_downloader.File_Downloader(res_data,id)#视频文件下载线程,给每个文件分配一个线程(有点偷懒了) 45 | id += 1 46 | conf.PERLIST.append(0)#百分比列表 47 | downloader.start() 48 | 49 | def cmdshow_gbk(self): 50 | print u'#####################################################################' 51 | print u"#慕课网视频抓取器" 52 | print u"author:七夜" 53 | print u"博客:http://blog.csdn.net/qiye_/和http://www.cnblogs.com/qiyeboy/ 同步更新 " 54 | print u"微信公众号:qiye_python" 55 | print u"github:https://github.com/qiyeboy/" 56 | print u"#到慕课网官网打开想要下载的课程的章节列表页面,查看当前url链接" 57 | print u"#例如http://www.imooc.com/learn/615,则课程编号为615" 58 | print u"#####################################################################" 59 | try: 60 | ID = raw_input(u'输入要下载的课程编号:'.encode('utf-8')) 61 | url = COURSEURL+str(ID) 62 | print u"将要下载的课程链接为:",url 63 | print u'开始解析视频,请稍后:' 64 | self.crawl(url) 65 | conf.PERSUM = len(self.res_datas)*100.0#总的进度 66 | print u'共有%d条视频'% len(self.res_datas) 67 | print u"课程名称:%s" % self.res_datas[0].subject 68 | for res_data in self.res_datas: 69 | print u"----->%s" % res_data.filename 70 | 71 | state = input(u'选择清晰度(1:超清UHD,2:高清HD,3:普清SD):'.encode('utf-8')) 72 | if state not in [1,2,3]: 73 | print u'输入有误' 74 | return 75 | conf.STATE = CHOOSE[state-1] 76 | self.download(self.res_datas) 77 | 78 | except Exception ,e: 79 | print u'程序炸了',e 80 | return 81 | 82 | def prn_obj(obj): 83 | print '\n'.join(['%s:%s' % item for item in obj.__dict__.items()]) 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /python/spider/spiderman.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igo9go/downloaa_imooc/71e0ca2f1a6bf761158b877d434b49fed83bbe1a/python/spider/spiderman.pyc -------------------------------------------------------------------------------- /python/test/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Xaxdus' 2 | -------------------------------------------------------------------------------- /python/test/test_bs4.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | #pip install beautifulsoup 3 | from bs4 import BeautifulSoup 4 | import re 5 | 6 | # soup = BeautifulSoup('123','html.parser',from_encoding='utf-8') 7 | # #查找所有标签为a的节点 8 | # soup.find_all('a') 9 | # #查找所有标签为a,链接符合/view/123.html形式的节点 10 | # soup.find_all('a',href='/view/123.html') 11 | # 12 | # soup.find_all('a',href=re.compile(r'/view/\d+\.htm')) 13 | # 14 | # #查找所有标签为div,class为abc,文字为python的节点 15 | # soup.find_all('div',class_='abc',string='python') 16 | 17 | # #获取查找到的节点的标签名称 18 | # node.name 19 | # #获取查找到的按节点的href属性 20 | # node['href'] 21 | # 22 | # #获取查找到的a节点的链接文字 23 | # node.get_text() 24 | 25 | # html_doc = """ 26 | # The Dormouse's story 27 | # 28 | #

The Dormouse's story

29 | # 30 | #

Once upon a time there were three little sisters; and their names were 31 | # Elsie, 32 | # Lacie and 33 | # Tillie; 34 | # and they lived at the bottom of a well.

35 | # 36 | #

...

37 | # """ 38 | # soup = BeautifulSoup(html_doc) 39 | # links = soup.find_all('a') 40 | # 41 | # for link in links: 42 | # print link.name,link['href'],link.get_text() 43 | # 44 | # p_node = soup.find('p',class_ = "title") 45 | # print p_node.name,p_node.get_text() 46 | 47 | # str = '星女郎林允清纯活泼 当街开吃抬腿跳跃_街拍' 49 | # 50 | # soup = BeautifulSoup(str,'html.parser',from_encoding='utf-8') 51 | # links = soup.find_all('img') 52 | # for link in links: 53 | # print link['src'] 54 | # str = '1-1 项目介绍 (02:14)' 55 | # soup = BeautifulSoup(str,'html.parser',from_encoding='utf-8') 56 | # links = soup.find_all('a',class_='J-media-item studyvideo') 57 | # 58 | # for link in links: 59 | # print link.get_text().strip(),int(link['href'].split('/')[2]) 60 | 61 | str ='

Hibernate注解

' 62 | soup = BeautifulSoup(str,'html.parser',from_encoding='utf-8') 63 | p_node = soup.find('div',class_ = "hd") 64 | 65 | print p_node.get_text() 66 | -------------------------------------------------------------------------------- /python/test/test_httplib.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import urlparse 3 | import httplib 4 | parsedurl = urlparse.urlparse('https://ss0.bdstatic.com/94oJfD_bAAcT8t7mm9GUKT-xh_/timg?image&quality=100&size=b4000_4000&sec=1457758644&di=a35a020cb3e5da7c2c179da3f18b6ad5&src=http://img.hb.aicdn.com/d2024a8a998c8d3e4ba842e40223c23dfe1026c8bbf3-OudiPA_fw580') 5 | 6 | print parsedurl[1] 7 | print parsedurl[2] 8 | httpConn = httplib.HTTPConnection(parsedurl[1]) 9 | httpConn.request('GET', parsedurl[2]) 10 | response = httpConn.getresponse() 11 | if response.status == 200: 12 | size = response.getheader('Content-Length') 13 | size = int(size) / 1024 14 | print 'Size: %s KB,Content-Type: %s, Last-Modified: %s'%(size,response.getheader('Content-Type'),response.getheader('Last-Modified')) 15 | else: 16 | print response.status,response.reason 17 | httpConn.close() 18 | -------------------------------------------------------------------------------- /python/test/test_json.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Xaxdus' 2 | str='''{"result":0,"data":{"result":{"mid":11305,"mpath":["http:\/\/v2.mukewang.com\/b0e670ef-7695-4ded-b5d8-3f78d221413d\/L.mp4?auth_key=1457620869-0-0-688a98b2f90af6006b38ddf89b750ad1","http:\/\/v2.mukewang.com\/b0e670ef-7695-4ded-b5d8-3f78d221413d\/M.mp4?auth_key=1457620869-0-0-2c0f5e8ca614ce7fd2a4e2b1ef844074","http:\/\/v2.mukewang.com\/b0e670ef-7695-4ded-b5d8-3f78d221413d\/H.mp4?auth_key=1457620869-0-0-7287a7bc3fd090f504d592117600d4cf"],"cpid":"3019","name":"\u8bbe\u7f6e\u5e03\u5c40","time":"59","practise":[]}},"msg":"\u6210\u529f"}'''.replace('\/','/') 3 | 4 | dict = eval(str) 5 | 6 | print dict['data']['result']['mpath'][1] -------------------------------------------------------------------------------- /python/test/test_progress.py: -------------------------------------------------------------------------------- 1 | import os,sys,string 2 | import time 3 | 4 | def view_bar(num=1, sum=100, bar_word=":"): 5 | rate = float(num) / float(sum) 6 | rate_num = int(rate * 100) 7 | print '\r%d%% :' %(rate_num) 8 | 9 | for i in range(0, num): 10 | os.write(1, bar_word) 11 | sys.stdout.flush() 12 | 13 | if __name__ == '__main__': 14 | for i in range(0, 100): 15 | time.sleep(0.1) 16 | view_bar(i, 99) 17 | -------------------------------------------------------------------------------- /python/test/test_urllib.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import urllib2,cookielib 3 | import bs4 4 | 5 | # url = "http://www.imooc.com/learn/615" 6 | # 7 | # 8 | # print '第一种方法' 9 | # 10 | # response1 = urllib2.urlopen(url) 11 | # 12 | # print response1.getcode() 13 | # print response1.read() 14 | # 15 | # print '第二种方法' 16 | # 17 | # DEFAULT_REQUEST_HEADERS = { 18 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 19 | # 'Accept-Language': 'en', 20 | # 'Referer':'http://www.kuaidaili.com/', 21 | # 'Cookie':'Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1466940405,1466944441; _ga=GA1.2.172144337.1466940406; _gat=1' 22 | # '; Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1466944441', 23 | # 'Connection':'keep-alive' 24 | # } 25 | 26 | # request = urllib2.Request("http://www.kuaidaili.com/free/inha/2") 27 | # request.add_header('user-agent','Mozilla-Firefox-Spider(Wenanry)') 28 | # request.add_header('Referer','http://www.kuaidaili.com/') 29 | # request.add_header('Cookie','Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1466953979; _ga=GA1.2.555532848.1466953979; _gat=1' 30 | # '; Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1466953979',) 31 | # 32 | # try: 33 | # response2= urllib2.urlopen(request) 34 | # except urllib2.HTTPError,e: 35 | # # print response2 36 | # # print response2.getcode() 37 | # # print response2.read() 38 | # print e.getcode() 39 | # print e.read(),e.info() 40 | 41 | 42 | from selenium import webdriver 43 | 44 | browser = webdriver.Chrome() 45 | browser.get('http://www.baidu.com/') 46 | 47 | # import requests 48 | # 49 | # loginUrl = 'http://www.kuaidaili.com/free/' 50 | # s = requests.Session() 51 | # # r = s.get(loginUrl,proxies=proxies,allow_redirects=True) 52 | # r = s.get(loginUrl,allow_redirects=True) 53 | # 54 | # response= r.text 55 | # r = s.get(loginUrl,allow_redirects=False) 56 | # 57 | # response= r.text 58 | # print response 59 | 60 | # 61 | # print '第三种方法' 62 | # cj = cookielib.CookieJar() 63 | # opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) 64 | # 65 | # urllib2.install_opener(opener) 66 | # 67 | # response3 = urllib2.urlopen(url) 68 | # 69 | # print response3.getcode() 70 | # print cj 71 | # print len(response3.read()) 72 | 73 | 74 | # freader = urllib2.urlopen(self.__fileInfor.url[conf.STATE]) 75 | # filepath = self.filedir+os.sep+self.__fileInfor.filename+'.mp4' 76 | # with open(filepath, "wb") as fwriter: 77 | # fwriter.write(freader.read()) 78 | # fwriter.flush() 79 | # fwriter.close() 80 | # import urllib 81 | # #下载文件 82 | # def callbackfunc(blocknum, blocksize, totalsize): 83 | # '''回调函数 84 | # @blocknum: 已经下载的数据块 85 | # @blocksize: 数据块的大小 86 | # @totalsize: 远程文件的大小 87 | # ''' 88 | # percent = 100.0 * blocknum * blocksize / totalsize 89 | # if percent > 100: 90 | # percent = 100 91 | # print "%.2f%%"% percent 92 | # url = 'http://www.sina.com.cn' 93 | # local = 'd:\\sina.html' 94 | # urllib.urlretrieve(url, local, callbackfunc) 95 | --------------------------------------------------------------------------------