├── DoubanMovie ├── movie_crawler.py └── write_to_mysql.py ├── Huaban ├── explain.md ├── huaban_crawler.py └── huaban_travel_places_result.txt ├── IpProxy ├── Ip181FreeProxy │ └── get_ip181.py ├── KuaiFreeProxy │ └── get_kuaifreeproxy.py └── XunFreeProxy │ └── get_xunfreeproxy.py ├── README.md ├── SinaWeibo ├── chromedriver ├── image_result.md ├── weibo_crawler.py └── weibo_hot_topic_crawler.py └── WechatOfficialAccounts └── spider_wechat_official_accounts.py /DoubanMovie/movie_crawler.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | 3 | import requests 4 | import json 5 | import os,sys,time 6 | from lxml import etree 7 | from scrapy.selector import Selector 8 | from scrapy.http import HtmlResponse 9 | from bs4 import BeautifulSoup 10 | import re 11 | reload(sys) 12 | sys.setdefaultencoding("utf-8") 13 | 14 | LANGUAGES_RE = re.compile(ur"语言: (.+?)
") 15 | COUNTRIES_RE = re.compile(ur"制片国家/地区: (.+?)
") 16 | ALTERNATE_NAME_RE = re.compile(ur"又名: (.+?)
") 17 | RELEASE_TIME_RE = re.compile(ur"上映日期: (.+?)
") 18 | NUM_RE = re.compile(r"(\d+)") 19 | 20 | data_save_file = "douban_donghua_results.txt" 21 | headers = { 22 | 'Accept':'*/*', 23 | 'Accept-Encoding':'gzip, deflate, br', 24 | 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6', 25 | 'Connection':'keep-alive', 26 | 'Host':'movie.douban.com', 27 | 'Referer':'https://movie.douban.com/explore', 28 | 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 29 | 'X-Requested-With':'XMLHttpRequest' 30 | } 31 | 32 | def get_item_list(d_url,d_type,d_tag,d_sort,d_page_limit,d_page_start): 33 | params = {} 34 | params["type"] = d_type 35 | params["tag"] = d_tag 36 | if d_sort != "": 37 | params["sort"] = d_sort 38 | params["page_limit"] = d_page_limit 39 | params["page_start"] = d_page_start 40 | response = requests.get(d_url,headers = headers,params = params,timeout =10) 41 | json_obj = response.json() 42 | json_array = json_obj["subjects"] 43 | return json_array 44 | 45 | def get_item_list_from_newsearch(d_url,d_sort,d_range,d_tag,d_page_start): 46 | params = {} 47 | params["sort"] = d_sort 48 | params["tags"] = d_tag 49 | params["range"] = d_range 50 | params["start"] = d_page_start 51 | response = requests.get(d_url,headers = headers,params = params,timeout =10) 52 | json_obj = response.json() 53 | json_array = json_obj["data"] 54 | return json_array 55 | def get_item_detail(item_detail_url): 56 | result_obj = {} 57 | result_obj["subject_id"] = int(item_detail_url.split("/")[-2]) 58 | celebrities_url = "https://movie.douban.com/subject/"+str(result_obj["subject_id"])+"/celebrities" 59 | (directors_cn_names,directors_en_names,actors_cn_names,actors_en_names)=get_directors_and_actors(celebrities_url) 60 | result_obj["directors_cn_names"] = directors_cn_names 61 | result_obj["directors_en_names"] = directors_en_names 62 | result_obj["actors_cn_names"] = actors_cn_names 63 | result_obj["actors_en_names"] = actors_en_names 64 | response = requests.get(item_detail_url,headers = headers,timeout = 10) 65 | selector = etree.HTML(response.text) 66 | s_response = HtmlResponse(url=item_detail_url,body = response.text,encoding='utf-8') 67 | 68 | name = s_response.selector.xpath("//title/text()").extract() 69 | if name: result_obj["movie_name"] = name[0].replace(u" (豆瓣)", "").strip() 70 | 71 | genres = s_response.selector.xpath("//span[@property='v:genre']/text()").extract() 72 | if genres: result_obj["genres"] = genres 73 | 74 | S = "".join(s_response.selector.xpath("//div[@id='info']").extract()) 75 | 76 | M = COUNTRIES_RE.search(S) 77 | if M is not None: 78 | result_obj["countries"] = [country.strip() for country in M.group(1).split("/")] 79 | 80 | L = LANGUAGES_RE.search(S) 81 | if L is not None: 82 | result_obj["languages"] = [ lang.strip() for lang in L.group(1).split("/") ] 83 | 84 | A = ALTERNATE_NAME_RE.search(S) 85 | if A is not None: 86 | result_obj["alternate_name"] =[ alternate.strip() for alternate in A.group(1).split("/")] 87 | 88 | T = [] 89 | tags = s_response.selector.xpath("//div[@class='tags-body']/a") 90 | for tag in tags: 91 | t = tag.xpath("text()").extract() 92 | if t: T.append(t[0]) 93 | if T: result_obj["tags"] = T 94 | 95 | average = s_response.selector.xpath("//strong[@property='v:average']/text()").extract() 96 | if average and average[0] != "": result_obj["average"] = float( average[0] ) + 0.0 97 | 98 | json_value = json.dumps(result_obj,ensure_ascii = False) 99 | print(json_value) 100 | return json_value 101 | 102 | def get_directors_and_actors(celebrities_url): 103 | try: 104 | p = requests.get(celebrities_url,headers = headers) 105 | html = p.text 106 | soup = BeautifulSoup(html,"html.parser") 107 | div_list = soup.find_all("div","list-wrapper") 108 | directors_html = div_list[0] 109 | directors = directors_html.find_all("a") 110 | directors_cn_names = [] 111 | directors_en_names = [] 112 | actors_cn_names = [] 113 | actors_en_names = [] 114 | for x in xrange(len(directors)): 115 | if directors[x].get("target") != "_blank": 116 | director = directors[x].text 117 | first_tag = director.find(" ") 118 | directors_cn_name = director[:first_tag].strip() 119 | directors_en_name = director[first_tag+1:].strip() 120 | if directors_cn_name != "": 121 | directors_cn_names.append(directors_cn_name) 122 | if directors_en_name != "": 123 | directors_en_names.append(directors_en_name) 124 | print directors_cn_name 125 | print directors_en_name 126 | 127 | actors_html = div_list[1] 128 | actors = actors_html.find_all("a") 129 | for x in xrange(len(actors)): 130 | if actors[x].get("target") != "_blank": 131 | actor = actors[x].text 132 | first_tag = actor.find(" ") 133 | actors_cn_name = actor[:first_tag].strip() 134 | actors_en_name = actor[first_tag+1:].strip() 135 | if actors_cn_name != "": 136 | actors_cn_names.append(actors_cn_name) 137 | print "cn_name: "+actors_cn_name 138 | if actors_en_name != "": 139 | actors_en_names.append(actors_en_name) 140 | print "en_name: "+actors_en_name 141 | except Exception, e: 142 | print e 143 | directors_cn_names = [] 144 | directors_en_names = [] 145 | actors_cn_names = [] 146 | actors_en_names = [] 147 | finally: 148 | return (directors_cn_names,directors_en_names,actors_cn_names,actors_en_names) 149 | 150 | 151 | 152 | def write_json_obj(json_value): 153 | os.system("touch "+data_save_file) 154 | f= open(data_save_file,'a+') 155 | f.write(str(json_value)+",") 156 | f.close() 157 | 158 | search_url = "https://movie.douban.com/j/search_subjects?" 159 | tag_search_url = "https://movie.douban.com/j/new_search_subjects?" 160 | 161 | #豆瓣电影-选电影下的爬虫,如下例子是经典里的前50页 162 | for x in xrange(0,50): 163 | print x 164 | page_start = 20*x 165 | print page_start 166 | time.sleep(1) 167 | json_array = get_item_list(search_url,"movie","经典","time",20,page_start) 168 | for x in xrange(len(json_array)): 169 | time.sleep(1) 170 | json_value = get_item_detail(json_array[x]["url"]) 171 | write_json_obj(json_value) 172 | 173 | #豆瓣电影-分类下的爬虫,如下例子是动画里的前50页 174 | for x in xrange(0,50): 175 | print x 176 | page_start = 20*x 177 | print page_start 178 | time.sleep(1) 179 | json_array = get_item_list_from_newsearch(tag_search_url,"T","0,10","动画",page_start) 180 | for x in xrange(len(json_array)): 181 | time.sleep(1) 182 | json_value = get_item_detail(json_array[x]["url"]) 183 | write_json_obj(json_value) 184 | 185 | 186 | -------------------------------------------------------------------------------- /DoubanMovie/write_to_mysql.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import MySQLdb as mdb 3 | import sys 4 | import json 5 | con = None 6 | 7 | json_path = "douban_donghua_results.txt" 8 | def get_jsonarray_from_txt(path): 9 | try: 10 | f = open(path,"r") 11 | text = f.read() 12 | array = json.loads(text) 13 | print len(array) 14 | f.close() 15 | return array 16 | except Exception, e: 17 | print e 18 | finally: 19 | pass 20 | 21 | try: 22 | #连接 mysql 的方法: connect('ip','user','password','dbname') 23 | con = mdb.connect('localhost', 'root','root', 'test',charset="utf8"); 24 | 25 | #所有的查询,都在连接 con 的一个模块 cursor 上面运行的 26 | cur = con.cursor() 27 | 28 | data = [] 29 | json_array = get_jsonarray_from_txt(json_path) 30 | for x in xrange(len(json_array)): 31 | item = json_array[x] 32 | print item["directors_cn_names"] 33 | print ','.join(item["directors_cn_names"]) 34 | # values = [item["subject_id"],item["movie_name"],item["directors_cn_names"],item["directors_en_names"],item["actors_cn_names"],item["actors_en_names"],item["genres"],item["tags"],item["languages"],item["average"],item["alternate_name"],item["countries"]] 35 | try: 36 | values = [item["subject_id"],item["movie_name"],','.join(item["directors_cn_names"]),','.join(item["directors_en_names"]),','.join(item["actors_cn_names"]),','.join(item["actors_en_names"]),','.join(item["genres"]),','.join(item["tags"]),','.join(item["languages"]),item["average"],','.join(item["alternate_name"]),','.join(item["countries"])] 37 | cur.execute('insert into douban_movie(subject_id,movie_name,directors_cn_names,directors_en_names,actors_cn_names,actors_en_names,genres,tags,languages,average,alternate_name,countries) value(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',values) 38 | except KeyError, e: 39 | print e 40 | finally: 41 | pass 42 | cur.close() 43 | except Exception, e: 44 | print e 45 | finally: 46 | if con: 47 | #无论如何,连接记得关闭 48 | con.commit() 49 | con.close() -------------------------------------------------------------------------------- /Huaban/explain.md: -------------------------------------------------------------------------------- 1 | #### 花瓣爬虫简单说明 2 | 3 | 首先进入你要爬取的页面 4 | 5 | 我以http://huaban.com/favorite/travel_places/ 为例子 6 | 7 | 右键 “检查”,选择network, 往下滑动页面,这时候就会有网络请求,找到我们需要的请求 8 | 9 | 如 http://huaban.com/favorite/travel_places/?j0xapa21&max=1081611043&limit=20&wfl=1 10 | 11 | ![](http://oic2oders.bkt.clouddn.com/github_hua_ban_chrome_screen_cut.png) 12 | 13 | 然后复制下Response的结果,在 http://json.cn/ 里查看format后的结果,找到对应的数据。 14 | 15 | ```json 16 | { 17 | "filter":"pin:category:travel_places", 18 | "pins":[ 19 | { 20 | "pin_id":1081388818, 21 | "user_id":141402, 22 | "board_id":409091, 23 | "file_id":131759569, 24 | "file":{ 25 | "id":131759569, 26 | "farm":"farm1", 27 | "bucket":"hbimg", 28 | "key":"cad3b3be27c98e222065f6a20bb2285d9c1d872d9e124-R3LxxT", 29 | "type":"image/jpeg", 30 | "width":"1024", 31 | "height":"683", 32 | "frames":"1", 33 | "colors":[ 34 | { 35 | "color":14342874, 36 | "ratio":0.1 37 | } 38 | ], 39 | "audit":{ 40 | "porn":{ 41 | "rate":0.9999141809676075, 42 | "label":0, 43 | "review":false 44 | } 45 | }, 46 | "theme":"dadada" 47 | }, 48 | "media_type":0, 49 | "source":"nipic.com", 50 | "link":"http://www.nipic.com/show/16746237.html?v=2", 51 | "raw_text":"新疆喀纳斯湖 喀纳斯景区 旅游观光胜地 峡谷中的湖 内陆淡水湖 山峦起伏 植物树木 阿勒泰地区 人间仙境 高山湖泊 清澈湖水 变换颜色湖水 自然风光", 52 | "text_meta":{ 53 | 54 | }, 55 | "via":1043457819, 56 | "via_user_id":19710125, 57 | "original":1043457819, 58 | "created_at":1490868088, 59 | "like_count":0, 60 | "comment_count":0, 61 | "repin_count":1, 62 | "is_private":0, 63 | "orig_source":null, 64 | "user":{ 65 | "user_id":141402, 66 | "username":"休纱", 67 | "urlname":"wangheady", 68 | "created_at":1332149742, 69 | "avatar":{ 70 | "id":74814335, 71 | "farm":"farm1", 72 | "bucket":"hbimg", 73 | "key":"dee8c814cd883df97eadaf34cc416847ef42b7403fbf-viFpjv", 74 | "type":"image/jpeg", 75 | "width":408, 76 | "height":408, 77 | "frames":1 78 | }, 79 | "extra":null 80 | }, 81 | "board":{ 82 | "board_id":409091, 83 | "user_id":141402, 84 | "title":"旅行", 85 | "description":"", 86 | "category_id":"travel_places", 87 | "seq":1, 88 | "pin_count":972, 89 | "follow_count":42, 90 | "like_count":0, 91 | "created_at":1332149777, 92 | "updated_at":1490868097, 93 | "deleting":0, 94 | "is_private":0, 95 | "extra":null 96 | }, 97 | "via_user":{ 98 | "user_id":19710125, 99 | "username":"六王爷", 100 | "urlname":"znl21", 101 | "created_at":1479094868, 102 | "avatar":{ 103 | "bucket":"hbimg", 104 | "farm":"farm1", 105 | "frames":1, 106 | "height":300, 107 | "id":102890808, 108 | "key":"654953460733026a7ef6e101404055627ad51784a95c-B6OFs4", 109 | "type":"image/jpeg", 110 | "width":300 111 | }, 112 | "extra":null 113 | } 114 | } 115 | ], 116 | "explore":null, 117 | "promotions":null, 118 | "suggests":{ 119 | 120 | }, 121 | "banner_box_promotion":null, 122 | "query":null 123 | } 124 | ``` 125 | 126 | 这里选取了返回的20个结果的一个作为示例,pins对应的JsonArray的最后一个,找到Key为pin_id作为下一次请求的max对应的值。每个图片的地址为 "http://img.hb.aicdn.com/"+pins里的"file"中的"key"。 -------------------------------------------------------------------------------- /Huaban/huaban_crawler.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | #encoding:utf-8 3 | ''' 4 | 花瓣网爬虫,以花瓣下的旅行模块为例 http://huaban.com/favorite/travel_places/ 5 | main_page中的max参数对应的是起始的ID,可以点击某个图片进入详情即可看到 6 | 如示例代码中的max参数的值,就是点击这个图片进入详情页后的ID http://huaban.com/pins/1082254826/ 7 | 一页爬取20个,爬取完一页后以接口返回的Json中的最后一个的 pin_id 为下一次请求的max的参数 8 | huaban_travel_places_result.txt 为运行的结果 9 | ''' 10 | 11 | 12 | import json 13 | import os 14 | import requests 15 | 16 | main_page = "http://huaban.com/favorite/travel_places/?j0x9q48g&max=1082254826&limit=20&wfl=1" 17 | save_result_path = "huaban_travel_places_result.txt" 18 | 19 | headers = { 20 | 'Accept':'application/json', 21 | 'Accept-Encoding':'gzip, deflate, sdch', 22 | 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6', 23 | 'Cache-Control':'no-cache', 24 | 'Connection':'keep-alive', 25 | 'Cookie':'BDTUJIAID=f6b17872d06259f8a38509c1baf402e9; UM_distinctid=15adb12a1f22b5-0b1ea1d4ebe05e-1d396850-1fa400-15adb12a1f3672; _f=iVBORw0KGgoAAAANSUhEUgAAADIAAAAUCAYAAADPym6aAAAFjElEQVRYR7WWeWxUVRTGf2fa6UIrNUIRi0VBqMiq4mtBAp2JBhODAQE1aoJhp%2BDGvgkIQgAx0PlDQIIlxhiwEjBCMEikU9EiMwVFG0ooFGQxgWKRWKTQzhxz%2B96UKXul3GQyL%2B%2Bdd%2B%2F5zvd95zxBVXHWzMWLaXv8OONXrkRF6u6qSB%2FgG%2BABgZpI7O3%2BK8wEkgVm3%2B4714vLrcytz%2FN6zyUayLjVq0k%2FcYLZixbVx6pIV2AHkC4QamwyCuOAlgILG%2FtudHyjgWQcOsSk5cuJra0lFBND2OV6AvgS6Ay8BKQAa4BXgI7ACoEqhRbA20Br4C%2FgI4FKhZHAamAV8BwwRqCwjm0YgP27BPwkkK%2BQ6hQuD3gXWAss8VXm3rSIDRgZkZfHxxMmsGTGDOIvXaI6IYH58%2BZ95oDoDTQHfgMetvNgH9Af%2BAf4FXgGOAscAHoJnHEYyRCYpPA48AvwCPA00A6bKVOgTIEpCu8B8wAL%2BAMoM%2Bf7KnNP34zRBkCMN1qePcuCuXOjpZWBXcF0oBmwxUk47HjHVLwtMFBgjoIx13rDjgNkPNDGeMR5FgSmA3OAFwXOqc32GwLT1T5jW9QZm4CxjQIyfN06Ohw%2BfLVHugCbgceAJKAEaA9EgLzmVHi%2BwEAn2d%2BBvk6SBojxyIIoIJOABUCOQKlCtpGYwNTrADGNZpivMrfythhpX17OqpwcatxuDKCK1NQ6n9S43e8ASwEvYKq5H8gH3NgyMNJYBpjkTSUNQJP8t46PRgBvAi8APYHRwLOONwxz7wOjgA7AEOBf4Csgy7kuAkb4KnPNfjdc9dLqWlLC5bg4YkIhLsXHU96%2BPQnV1VxMTOzhbGgoN2y0AvoBPwP3OZ44BcQDplUfA847ctqvtiQTnUQvGpmKDdaYrJPTHHYD3YFyIA0wcWY%2FE1cLxPoqc43vbg3kxhHOQLmT3tkE7zaq%2FV73PLkWiBbuaYdKPioXENwopeK1Rqk%2FsAXEsJSAsFKyrU8je%2BqPxW2p1fWopoAsFK%2B14erz1B%2FYhJKBS3ZItjXx6jkiCkOPdGNvq1OUN29omQZd63aB1MmiIDgUqZsv28VjPW%2FfK4hFkk4hki%2FZ1lv1IMx9kg%2BDbiQ2Zj2h0E5EPJJtmVZct9QfNP5KQauykeRyVNaI96n6Ifr5n2t0cHkXmtXGse2hg5SlmFF1Zf1vIPbhAaPtLmhSmni7VOmuvT0JhXeQGO4oWVn1J6k%2FYMzvI1Fbm%2FvqDxSB%2FF1fgOLiFKq0AnUNFm%2FPreovng86SjxWm0iq3x%2FeoK6wkHUmHX9aOYfuNeOqqYDYrJgOM0081jL1B74AaSkey0zw%2BqWFwRUoQ8RjmXkTKcD94sk0bRz1F3shvA0NdxRvr5PqDy5FdTJx8WnSp8cZE2M8YqQ19kAWO9scaVogtpwCJxEuiCfzUftaRovHatAq1R%2F80LRf8VhmFhlZfg10Eq9lupYDRLfjjnvQJG4zKCtwx6VHA4kJuxhdat0lIIWBD1CZhcgsNDzCALrWxAaIjkWT2tRJ0B8w86h5JNZh5DvQfuLJ2m0D14kRKUYYubtACo4mIGcrzKc6yiLxWmZINlhasHcAEtoc5ZEgSKl4rGFaWNyNcIsypOIcKsNNN9OC4BJgkGFMd%2B3rIX2f3B8tre3pZRxtqq7VwAMFwY2gg6I1fS0rgd2ohBD5AQ2PIS6%2BMzWXpwCTQXuhvI7wKuKaRlh9xDAScR0jFC4GXenrXjQ%2B63Q65nfinvNsbXuQmpgrH8R31LUiyZqqEQ6Nk2wr52oADQD7g5%2BApuGOH2l7YU9vxPUy4aqp4vXWqr94EhruD7JWvNZGu50nLyVW83ydi0oyT6dT4wqRGHKzL%2FUU1TFm6NurSYDcLPmmenZXJntTJdeYfW4F5D%2BFfIk3uiHuDgAAAABJRU5ErkJggg%3D%3D%2CMacIntel.1920.1080.24; wft=1; crtg_rta=criteo_250x250crtnative3criteo_200x200_Pins%3Bcriteo_200x200_Search%3B; _cnzz_CV1256903590=is-logon%7Clogged-out%7C1490170334892; _ga=GA1.2.659722060.1489734116; __asc=c44c6e9615af4faa683f624a6bd; __auc=4426e80715adb12a228b727e1b4; sid=LpKUX2dYDSQobYImEL6VqeAkafp.Ye41ttTXInOv21reRwzXRyhebcGzTKkgn%2FlSWI2yYEw; CNZZDATA1256903590=449876967-1489731133-null%7C1490168535', 26 | 'Host':'huaban.com', 27 | 'Pragma':'no-cache', 28 | 'Referer':'http://huaban.com/favorite/pets/', 29 | 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 30 | 'X-Request':'JSON', 31 | 'X-Requested-With':'XMLHttpRequest' 32 | } 33 | 34 | def get_image_urls(url): 35 | response = requests.get(url,headers=headers) 36 | response.encoding= "utf-8" 37 | jsonObj = response.json() 38 | pins = jsonObj["pins"] 39 | for x in xrange(len(pins)): 40 | url = "http://img.hb.aicdn.com/"+pins[x]["file"]["key"] 41 | write_txt(url) 42 | print url 43 | next_url_id=pins[-1]['pin_id'] 44 | print next_url_id 45 | next_page_url = make_next_request_url(next_url_id) 46 | print next_page_url 47 | get_image_urls(next_page_url) 48 | 49 | def make_next_request_url(id_num): 50 | return "http://huaban.com/favorite/travel_places/?j0x9q48g&max=" + str(id_num) + "&limit=20&wfl=1" 51 | 52 | def write_txt(url): 53 | try: 54 | if os.path.isfile(save_result_path)==False: 55 | os.system("touch "+save_result_path) 56 | f= open(save_result_path,'a+') 57 | f.write(url.encode("utf-8")) 58 | f.write('\n') 59 | except Exception, e: 60 | print e 61 | finally: 62 | pass 63 | 64 | 65 | 66 | get_image_urls(main_page) 67 | 68 | 69 | -------------------------------------------------------------------------------- /IpProxy/Ip181FreeProxy/get_ip181.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/python 2 | # encoding:utf-8 3 | 4 | import requests 5 | import json 6 | from bs4 import BeautifulSoup 7 | import sys 8 | reload(sys) 9 | sys.setdefaultencoding('utf-8') 10 | 11 | base_url = 'http://www.ip181.com/' 12 | proxy_list = [] 13 | 14 | 15 | def get_181_free_proxies(): 16 | try: 17 | print "--------------------------get_181_freeproxy---------------------------" 18 | global proxy_list 19 | p = requests.get(base_url) 20 | requests.encoding = "gb2312" 21 | html = p.text 22 | soup = BeautifulSoup(html,"html.parser") 23 | content = soup.find("tbody") 24 | tr_list = content.find_all("tr") 25 | for x in xrange(1,len(tr_list)): 26 | one_tr = tr_list[x] 27 | ip = one_tr.find_all("td")[0].text 28 | port = one_tr.find_all("td")[1].text 29 | kuai_proxy = ip+":"+port 30 | print kuai_proxy 31 | proxy_list.append(kuai_proxy) 32 | return proxy_list 33 | except Exception, e: 34 | print e 35 | finally: 36 | pass 37 | 38 | def get_one_from_list(): 39 | try: 40 | print "------------------requests timeout, change a new proxy------------------" 41 | global proxy_list 42 | del proxy_list[0] 43 | if len(proxy_list)<=5: 44 | get_181_free_proxies() 45 | return proxy_list[0] 46 | except Exception, e: 47 | print e 48 | finally: 49 | pass -------------------------------------------------------------------------------- /IpProxy/KuaiFreeProxy/get_kuaifreeproxy.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/python 2 | # encoding:utf-8 3 | 4 | import requests 5 | import json 6 | from bs4 import BeautifulSoup 7 | base_url = 'http://www.kuaidaili.com/free/' 8 | kuai_proxy_list = [] 9 | 10 | def get_kuai_free_proxies(url): 11 | try: 12 | print "--------------------------get_kuai_freeproxy---------------------------" 13 | global kuai_proxy_list 14 | p = requests.get(url) 15 | html = p.text 16 | soup = BeautifulSoup(html,"html.parser") 17 | content = soup.find("div",id="list") 18 | tr_list = content.find_all("tr") 19 | for x in xrange(1,len(tr_list)): 20 | one_tr = tr_list[x] 21 | ip = one_tr.find_all("td")[0].text 22 | port = one_tr.find_all("td")[1].text 23 | kuai_proxy = ip+":"+port 24 | print kuai_proxy 25 | kuai_proxy_list.append(kuai_proxy) 26 | return kuai_proxy_list 27 | except Exception, e: 28 | print e 29 | finally: 30 | pass 31 | 32 | def get_one_from_list(): 33 | try: 34 | print "------------------requests timeout, change a new proxy------------------" 35 | global kuai_proxy_list 36 | del kuai_proxy_list[0] 37 | if len(kuai_proxy_list)<=5: 38 | get_kuai_free_proxies() 39 | return kuai_proxy_list[0] 40 | except Exception, e: 41 | print e 42 | finally: 43 | pass 44 | get_kuai_free_proxies(base_url) -------------------------------------------------------------------------------- /IpProxy/XunFreeProxy/get_xunfreeproxy.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/python 2 | #encoding:utf-8 3 | 4 | ''' 5 | 获取免费的讯代理 http://www.xdaili.cn/freeproxy.html 6 | ''' 7 | 8 | import requests 9 | import json 10 | 11 | xun_free_url = "http://www.xdaili.cn/ipagent//freeip/getFreeIps?page=1&rows=10" 12 | 13 | xun_proxy_list = [] 14 | proxies = {} 15 | 16 | def get_xun_free_proxy(): 17 | try: 18 | print "--------------------------get_xun_freeproxy---------------------------" 19 | global xun_proxy_list 20 | response = requests.get(xun_free_url) 21 | print json.dumps(response.json(),ensure_ascii = False) 22 | xun_proxy_list_result = [] 23 | for x in xrange(len(response.json()["rows"])): 24 | xun_proxy = response.json()["rows"][x]["ip"]+":"+response.json()["rows"][x]["port"] 25 | print xun_proxy 26 | xun_proxy_list_result.append(xun_proxy) 27 | xun_proxy_list = xun_proxy_list + xun_proxy_list_result 28 | return xun_proxy_list 29 | except Exception, e: 30 | print e 31 | finally: 32 | pass 33 | 34 | def get_one_from_list(): 35 | try: 36 | print "------------------requests timeout, change a new proxy------------------" 37 | global xun_proxy_list 38 | del xun_proxy_list[0] 39 | if len(xun_proxy_list)<=5: 40 | get_xun_free_proxy() 41 | return xun_proxy_list[0] 42 | except Exception, e: 43 | print e 44 | finally: 45 | pass 46 | 47 | 48 | get_xun_free_proxy() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 1.网站的图片爬虫 2 | 3 | * ##### 微信公众号爬虫,因为微信公众号无网页端展示,只能用第三方搜狗平台的微信公众号。 4 | 5 | * ##### 新浪微博爬虫,使用webdriver登录来获取cookie,然后通过cookie调取接口来获取微博数据。详细分析见Python实现微博爬虫 6 | 7 | * ##### 花瓣网爬虫,以花瓣下的旅行模块为例 http://huaban.com/favorite/travel_places/ 8 | 9 | ### 2.爬虫IP代理 10 | 11 | * ##### 免费IP代理,详情见IPProxy文件夹。 12 | 13 | * ##### 包含181代理,快代理,讯代理。 14 | 15 | ### 3.豆瓣电影爬虫 16 | 17 | * ##### 因为懒的再去新建一个Repository,所以也放这了。 18 | 19 | -------------------------------------------------------------------------------- /SinaWeibo/chromedriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darrenfantasy/image_crawler/b5c38afd80dae0bb89c948ff2bad614c25c2d8d7/SinaWeibo/chromedriver -------------------------------------------------------------------------------- /SinaWeibo/image_result.md: -------------------------------------------------------------------------------- 1 | ![](http://wx4.sinaimg.cn/orj360/6d860193gy1fdzj1gu7h1j20zk0jzjtc.jpg) 2 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fdy9gjuyhjj20zk0qotic.jpg) 3 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fdy9gmlqsxj20zk0qotdq.jpg) 4 | ![](http://wx4.sinaimg.cn/orj360/6d860193gy1fdvyn0ee4gj20pm0zkagj.jpg) 5 | ![](http://wx1.sinaimg.cn/orj360/6d860193gy1fdtw4929c6j20qo10zn5z.jpg) 6 | ![](http://wx4.sinaimg.cn/orj360/d5d006e1ly1fdr6auyty9j21w01w01l3.jpg) 7 | ![](http://wx1.sinaimg.cn/orj360/6d860193gy1fdsamj8s63j20pm0zk0zp.jpg) 8 | ![](http://wx1.sinaimg.cn/orj360/6d860193gy1fdntml8yv8j20qo0zkwm0.jpg) 9 | ![](http://wx3.sinaimg.cn/thumb150/d5d006e1ly1fdnqr9pzrvj20ku112b29.jpg) 10 | ![](http://wx3.sinaimg.cn/thumb150/d5d006e1ly1fdnqral3kzj21f01j67pu.jpg) 11 | ![](http://wx1.sinaimg.cn/thumb150/d5d006e1ly1fdnragz111j20zk1hchdt.jpg) 12 | ![](http://wx4.sinaimg.cn/thumb150/d5d006e1ly1fdnrahnjj1j20zk0npwxj.jpg) 13 | ![](http://wx4.sinaimg.cn/orj360/6d860193gy1fdm9umme7jj20ki0m8tbw.jpg) 14 | ![](http://wx1.sinaimg.cn/thumb150/6d860193gy1fdjemaw9f0j20qo0zk41j.jpg) 15 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fdjemgjempj20zk0qon31.jpg) 16 | ![](http://wx1.sinaimg.cn/orj360/6d860193gy1fdi2urk4lwj20pm0zkgo4.jpg) 17 | ![](http://wx2.sinaimg.cn/orj360/d5d006e1ly1fdgviuygobj20qo11cn41.jpg) 18 | ![](http://wx4.sinaimg.cn/orj360/d5d006e1ly1fdfmxlv5g2j24go28ku10.jpg) 19 | ![](http://wx3.sinaimg.cn/orj360/6d860193gy1fdfjonkkf8j20pm0zkdp0.jpg) 20 | ![](http://wx3.sinaimg.cn/orj360/d5d006e1ly1fdei7fyyfvj22qf2qfb2a.jpg) 21 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fdddrnoypaj20qo0zk784.jpg) 22 | ![](http://wx3.sinaimg.cn/thumb150/6d860193gy1fdddroxw8jj20jz0zk77l.jpg) 23 | ![](http://wx2.sinaimg.cn/thumb150/6d860193gy1fdddrmnuifj20zk0qoqbj.jpg) 24 | ![](http://wx4.sinaimg.cn/orj360/6d860193gy1fd9jcwed6tj20do0820tx.jpg) 25 | ![](http://wx3.sinaimg.cn/orj360/6d860193gy1fd8um0awd2j20hs2li7j0.jpg) 26 | ![](http://wx2.sinaimg.cn/orj360/6d860193gy1fd7d5zn0zgj20pm0zk42v.jpg) 27 | ![](http://wx2.sinaimg.cn/orj360/006uu9RLly1fcx1zyjj95j31hc0u01d8.jpg) 28 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fcvqs59djwj20zk0qo0vs.jpg) 29 | ![](http://wx3.sinaimg.cn/thumb150/6d860193gy1fcvqs64qrhj20zk0qogo7.jpg) 30 | ![](http://ww3.sinaimg.cn/orj480/006gQC4sjw1fbcbwbrhqqj30no0dc0sp.jpg) 31 | ![](http://wx2.sinaimg.cn/orj360/6d860193gy1fcp8qmc0sfj20zk0qodl8.jpg) 32 | ![](http://wx3.sinaimg.cn/orj360/6d860193gy1fcgn8t0eebj20go06mtaf.jpg) 33 | ![](http://wx1.sinaimg.cn/thumb150/6d860193gy1fcdk9pscx0j20zk0qote5.jpg) 34 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fcdk9tsw7wj20zk0qoahu.jpg) 35 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fcdk9my8cbj20zk0qogu5.jpg) 36 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fcdk9wgliij20zk0qon6r.jpg) 37 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fcb5dvdd1vj20dc0dc40a.jpg) 38 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fcb5dve424j20rs0if3zv.jpg) 39 | ![](http://wx4.sinaimg.cn/thumb150/6d860193gy1fc6huefgq7j20qo0zkq7z.jpg) 40 | ![](http://wx3.sinaimg.cn/thumb150/6d860193gy1fc6hufa13hj20qo0zkmzt.jpg) 41 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fbxb3u1bwhj20zk0qodll.jpg) 42 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fbxb3yk6vzj20zk0qoq8m.jpg) 43 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fbxb3rul5hj20zk0qote1.jpg) 44 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fbuxe0vc0zj20zk0notfs.jpg) 45 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1fbuxe4ltplj20zk0non3j.jpg) 46 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fbuxecbi82j20no0zk440.jpg) 47 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fbuxeggb6cj20no0zkjwl.jpg) 48 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fbuxdwrrerj20zk0non3g.jpg) 49 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fbuxejw1t6j20no0zkgqm.jpg) 50 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fbuxem3ceuj20no0zkahb.jpg) 51 | ![](http://wx3.sinaimg.cn/orj360/6d860193ly1fbn63pt6h6j20m80gotav.jpg) 52 | ![](http://wx1.sinaimg.cn/orj360/6d860193ly1fbmwtej4jvj20zk0qoguc.jpg) 53 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fbh31pfbztj20zk0nqadg.jpg) 54 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fbh31qhygxj20zk0nq437.jpg) 55 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fbclf34l1lj20jz0zk0u6.jpg) 56 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fbclf4q54qj20jz0zk3zn.jpg) 57 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fbbg6z8z39j20zk0jzjsq.jpg) 58 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fbbg6ye5nxj20zk0jz0ty.jpg) 59 | ![](http://wx4.sinaimg.cn/thumb150/d5d006e1ly1fbaa3i2wnsj23lc2cwe83.jpg) 60 | ![](http://wx1.sinaimg.cn/thumb150/d5d006e1ly1fbaa841z9ij22g03o0qv9.jpg) 61 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fbaf2i1488j20kg0gc78o.jpg) 62 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fbaf2gybu8j20ay0gotar.jpg) 63 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1fb8vmsvalzj20jz0zkjx5.jpg) 64 | ![](http://wx1.sinaimg.cn/thumb150/6d860193ly1fb6pjh9s5yj20m80go3zi0.jpg) 65 | ![](http://wx1.sinaimg.cn/thumb150/6d860193ly1fb6pjjro4dj21kw11xwno0.jpg) 66 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fb59723bp2j20jz0zkjtj.jpg) 67 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fb5972ww9cj20jz0zk0xq.jpg) 68 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1fb2bcc9wc9j20qo0ystl3.jpg) 69 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fb110y1anzj20ku0rs0zv.jpg) 70 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fb110ebmjqj20jz0zkmzo.jpg) 71 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fb10zru2bjj20ku0rsgso.jpg) 72 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1fb10z9t59ij20jz0zkacf.jpg) 73 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1fays4my5huj20k00k0q45.jpg) 74 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fau9lkwr17j20zk0jz79j.jpg) 75 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fau9lm7t8dj20kg0rm0vj.jpg) 76 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1fastftne0nj20hs0cqdjg.jpg) 77 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1farucieni8j20rs0kuwjg.jpg) 78 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1farucjabvaj20kw0ku40h.jpg) 79 | ![](http://g4.tdimg.com/94eaab5540f35c621bd7f7b785267209/p_2.jpg) 80 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1farexntof3j20p80zkwgx.jpg) 81 | ![](http://wsacdn4.miaopai.com/stream/50C-MdI2xKkM~f7y~Xw1sg___tmp_11_318_.jpg) 82 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fadypfydstj20zk0jzjuy.jpg) 83 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1fadypn153wj20ox0z1wlg.jpg) 84 | ![](http://ww3.sinaimg.cn/thumb150/006r96Nvjw1fabccg5ckkj30k00qotb8.jpg) 85 | ![](http://ww2.sinaimg.cn/thumb150/006r96Nvjw1fabccq2pwsj30k00qowg6.jpg) 86 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1fabnucu59vj20dc0nkq45.jpg) 87 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1fa5kl5ljasj20zk0rikjl.jpg) 88 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1fa5kkuaqfkj20zk0r2b29.jpg) 89 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1fa5kl9p0nsj20zk0rae81.jpg) 90 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f9xssglnw6j205k05kglm.jpg) 91 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9vazbkb0zj218a1w0npf.jpg) 92 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9vazlf8ybj21w019ce83.jpg) 93 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9vazv5l3uj218j1w0hdv.jpg) 94 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9vaz2to0hj21w019cu0y.jpg) 95 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9vb056mvaj21801w0hdv.jpg) 96 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9vb0c3kfij21w019c4qr.jpg) 97 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1f9u28s3ar0j25og3sgqv7.jpg) 98 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f9u3rs3agqj21w019ee83.jpg) 99 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f9u28oxs7qj218g0tne81.jpg) 100 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1f9u292vy56j22p81sw7wl.jpg) 101 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f9u298fc9ej22p81swhdx.jpg) 102 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f9u3rk9m15j22p81sw1l2.jpg) 103 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9sztwtpetj21w01917nb.jpg) 104 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9sztyesw2j21w0191x2s.jpg) 105 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9szu0bpvwj21w0191tvh.jpg) 106 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9szu360laj218x1tv1kx.jpg) 107 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9szu3xfg6j20q60zkqfc.jpg) 108 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9sztw8bekj218x1tv4qp.jpg) 109 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9szu6v6ifj218x1tvb20.jpg) 110 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9szu94rmcj218x1tv4qp.jpg) 111 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9szuabt9hj218x1tvh0s.jpg) 112 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f9sxq1f7spj206k06dt8r.jpg) 113 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9qhsoywhtj218x1tv7wh.jpg) 114 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9qhsqpecvj218x1tvqt5.jpg) 115 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9qhsms96nj218x1tv4qp.jpg) 116 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9qhsspyehj218x1tvb29.jpg) 117 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9qhstlzhyj21w01917dt.jpg) 118 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9qhsvpa7oj218x1tvhdt.jpg) 119 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f9pj2i9v57j211q0qogpo.jpg) 120 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9pc9bfynlj20rb17g4qp.jpg) 121 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9pc9c9b40j20ab0dpju8.jpg) 122 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9pc9cznjuj20j60srjv2.jpg) 123 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9pc99hkbhj20jg0czjzz.jpg) 124 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9pc9dinrdj20jj0d1gpe.jpg) 125 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9pc9ehfdbj20jc0d1k1d.jpg) 126 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9o58496z6j21w0191e82.jpg) 127 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9o586h09rj21kw16oqd9.jpg) 128 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9o57j9hy9j21w0191e82.jpg) 129 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9o5934lr5j218x1tv7wj.jpg) 130 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9o59eof5dj218x1tvkjm.jpg) 131 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9o59i2jomj218x1tvu0x.jpg) 132 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9n6aq6j2xj24c02w0b2c.jpg) 133 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9n6auvus8j24c02w07wj.jpg) 134 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9n6b0sg86j24c02w0npg.jpg) 135 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9n6b5q683j24c02w0hdw.jpg) 136 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9n6b6metuj20ne0fl770.jpg) 137 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f9n6akkr8rj24c02w0hdv.jpg) 138 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f9n6b7golej21400qoqbe.jpg) 139 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f9n6bc8pz8j24c02w0npg.jpg) 140 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f9n6birtivj23o02g0e88.jpg) 141 | ![](http://ww3.sinaimg.cn/orj480/736f0c7ejw1f9m33g2zvhj20no0dcmyy.jpg) 142 | ![](http://r1.ykimg.com/0542010853CA36A76A0A4A298C1E9EBA) 143 | ![](http://ww3.sinaimg.cn/orj360/0068Rj1Ljw1f9jjadhe26j30qo0k040y.jpg) 144 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f9g4dpmbzyj20zk0qo462.jpg) 145 | ![](http://ww3.sinaimg.cn/orj480/d5d006e1jw1f9bbftda7ij20no0dc3yh.jpg) 146 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f9bb4qepakj20zk0jzgq0.jpg) 147 | ![](http://ww1.sinaimg.cn/orj360/006vf51agw1f94v8qzcpgj30hs0dcabt.jpg) 148 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f92598vottj20nk0dcdi8.jpg) 149 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f90cjdn876j20jz0zk0w4.jpg) 150 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f90cj86p29j20jz0zkwfl.jpg) 151 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f90cjm27a9j20zk0jzgno.jpg) 152 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f8zr0lpbvbj20du07vgmx.jpg) 153 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f8ymfs5v8jj20zk0qodkf.jpg) 154 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f8ymfqn9guj20qo0zkgon.jpg) 155 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f8xmpm4uj6j20sg0lcdqq.jpg) 156 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f8xmpdkywhj20kg0f5gqe.jpg) 157 | ![](http://ww3.sinaimg.cn/orj480/736f0c7ejw1f8v5rn891tj20no0dcjt8.jpg) 158 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f8t4pz4p1oj20ci0m80vy.jpg) 159 | ![](http://ww4.sinaimg.cn/orj480/736f0c7ejw1f8l3gy4d1sj20g008wjsf.jpg) 160 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1f8kq738amkj21kw11xjzf.jpg) 161 | ![](http://ww4.sinaimg.cn/orj360/61ecbb3djw1f8ifq682w2j20jz0zj3zm.jpg) 162 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f8jo4vsqenj20jg0zkn0q.jpg) 163 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f81hscp9luj20zk0jzn17.jpg) 164 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f7y274i8gej20nb0jzgpp.jpg) 165 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f7ws5ohrgsj20zk0jzdj4.jpg) 166 | ![](http://ww4.sinaimg.cn/orj360/6d860193gw1f7tkes7bqsj20t10qo48i.jpg) 167 | ![](http://ww1.sinaimg.cn/thumb150/6d860193gw1f7pzaryc05j20u01hc4in.jpg) 168 | ![](http://ww2.sinaimg.cn/thumb150/6d860193gw1f7pzaraxqcj20qo0wgjv0.jpg) 169 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1f7pzat6vj7j20yq1abqbz.jpg) 170 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f7peqwxnr7j20qo1bfdzl.jpg) 171 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f7ope6ghe7j20go0b5dgs.jpg) 172 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f7ope6s67qj20dw08wt9z.jpg) 173 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f7ope6za8bj20sg0lcdht.jpg) 174 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f7ope75wksj20ia06iwex.jpg) 175 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f7ope7jjnbj20jz0zkq4e.jpg) 176 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f7kbyvjfp5j20hs0hsn0m.jpg) 177 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f7kbyvwm17j20gl0cy75i.jpg) 178 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f7kbyvrj4pj20rs0e33zl.jpg) 179 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f7kbyuzqyhj20rd0qo452.jpg) 180 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f7jqmvabxlj20dt0kt404.jpg) 181 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f7jqmwe3ypj20le0zk77c.jpg) 182 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f7iveap32oj20dw099764.jpg) 183 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f7ivebhi33j20g00sgq56.jpg) 184 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f7ive9stapj20sg0ixq9s.jpg) 185 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f7ivec96b8j20sg0d440k.jpg) 186 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1f7gtfw6iinj20zk0k0q68.jpg) 187 | ![](http://ww2.sinaimg.cn/orj360/6d860193gw1f7gnnk7e0dj20zk0qoagl.jpg) 188 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f7e7067075j20be0fz0td.jpg) 189 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f7d0vp1hv2j20qo0zkgon.jpg) 190 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f79w976ky9j20jz0zk0xg.jpg) 191 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f78dngemdaj20et0m876e.jpg) 192 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f78dnzxgwrj21f01w0npg.jpg) 193 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f78dok6blpj21f01w0qv8.jpg) 194 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1f761tlox92j20jz0zkgoj.jpg) 195 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1f6z4eb6mstj20qo0eyt9r.jpg) 196 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1f6y7f1krslj20k10c1gty.jpg) 197 | ![](http://ww1.sinaimg.cn/orj360/6d860193gw1f6wq5t5s7dj20jz0zkabb.jpg) 198 | ![](http://ww4.sinaimg.cn/orj480/736f0c7ejw1f51xg9xgoej20bu06kgm6.jpg) 199 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f6vrld4k9wj20zk0jz787.jpg) 200 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f6vrlbuuetj20zk0jztb0.jpg) 201 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1f6uqu19n6sj20zk0jzwjn.jpg) 202 | ![](http://ww4.sinaimg.cn/thumb150/6d860193gw1f6uflc3gzfj20jz0zkdhn.jpg) 203 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1f6ufl8fjpzj20zk0jzwgd.jpg) 204 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f6sd4wsov7j209s0hsjsf.jpg) 205 | ![](http://ww2.sinaimg.cn/thumb150/6d860193gw1f6refhr3exj20qo0zkq8v.jpg) 206 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1f6refqvpwmj20zk0nqgq9.jpg) 207 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f6ow043j54j20bu080wfa.jpg) 208 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f6ote9meh5j20qo0zktd6.jpg) 209 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1f6mv1ix5ldj20ey0qoq3i.jpg) 210 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f6i2mk4rh9j20hs0notab.jpg) 211 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f6i2mkxo02j20jz0zk75v.jpg) 212 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f6i2mmmm9aj20ko0pojx6.jpg) 213 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f6gv4asdqaj20fb0hd40t.jpg) 214 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f6gv4bx0rbj20fb0hdtc0.jpg) 215 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f6gv4cw3b9j20fb0hddhp.jpg) 216 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f6gv49lnwtj20zk0qodlc.jpg) 217 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f6gv569ibsj232o21sx6p.jpg) 218 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1gw1f6gv5k3c4dj20zk0qond2.jpg) 219 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f66d0li4oij21f014sh2i.jpg) 220 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f66d0opz5ej21ez1w0tih.jpg) 221 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f66d0mgmqsj20zk0nqgpx.jpg) 222 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f63pa7quvtj21w01f0u10.jpg) 223 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f63pahjx55j21f01w0x6s.jpg) 224 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f63p9q31hbj21f01w0npg.jpg) 225 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f63pat2z6vj21f01w0b2c.jpg) 226 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f63pbsf7c5j21f01w07wk.jpg) 227 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f63pc23ph9j21f01w0e84.jpg) 228 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f63pcd5ddyj21f01w04qs.jpg) 229 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f63pcn3bpsj21f01w0qv8.jpg) 230 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f63pcxofd3j21w01f01l1.jpg) 231 | ![](http://r3.ykimg.com/054203085788ED466A0A4704DCAD2F37) 232 | ![](http://ww4.sinaimg.cn/orj480/736f0c7ejw1f5tnxhw9sxj20g80dcdgy.jpg) 233 | ![](http://ww2.sinaimg.cn/orj360/6d860193gw1f5ov7on9ckj20qo0zk0x9.jpg) 234 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f5o05kmvaaj20go0m8gnv.jpg) 235 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f5lm18bx9qj21f01w0h7q.jpg) 236 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1f5lm1cuz8tj21kw16o14c.jpg) 237 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1f5lm1h3x01j21f01w0qob.jpg) 238 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1f5lm1mkvacj21f01w0h72.jpg) 239 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f5hpjlzgjzj20u00gwgpx.jpg) 240 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f5hpjmves2j20u00gwwj2.jpg) 241 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f5hpjnyzhhj20u00gwn1p.jpg) 242 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f5hpjla8ytj20u00gwdk4.jpg) 243 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f5hppy5cxnj21w01f04qt.jpg) 244 | ![](http://ww2.sinaimg.cn/thumb150/7f9205bdgw1f5fzt5hvsij20m80gowgt.jpg) 245 | ![](http://ww1.sinaimg.cn/thumb150/7f9205bdgw1f5fzt60js2j20zk0qo78m.jpg) 246 | ![](http://ww4.sinaimg.cn/thumb150/7f9205bdgw1f5fzt6j8pnj20qo0zk0xj.jpg) 247 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1f5ei35l08uj21f01w01l0.jpg) 248 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f5ei3ac8v9j21f01w0x6q.jpg) 249 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1f5ei30r53vj20k00zkwg9.jpg) 250 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f5ek950oxjj20ci0m8gnb.jpg) 251 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f5dpjhcatij20zk0qogqt.jpg) 252 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f5bxg920alj20bu06k3yt.jpg) 253 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f5a1b7541hj20go0dcjsy.jpg) 254 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f55b9gkbdsj20rs0ijad8.jpg) 255 | ![](http://ww4.sinaimg.cn/thumb150/006gQC4sgw1f51p1yld12j30jh0rsn84.jpg) 256 | ![](http://ww1.sinaimg.cn/thumb150/006gQC4sgw1f51p1viri7j316v1odwqw.jpg) 257 | ![](http://ww2.sinaimg.cn/thumb150/006gQC4sgw1f51p1w9y1kj31ar1arjws.jpg) 258 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f525tjxrs5j20rs0ijwh2.jpg) 259 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f51rs0osb0j20zk0ns0wd.jpg) 260 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f51rrzgglej20h00qomz4.jpg) 261 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f51rs08fp4j20pz0yogp1.jpg) 262 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f51rs11ayyj20rs0ku0un.jpg) 263 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f51rrztze0j20qo0zkmz9.jpg) 264 | ![](http://ww4.sinaimg.cn/orj480/736f0c7ejw1f4wdb08ldzj20bu06ot8s.jpg) 265 | ![](http://ww2.sinaimg.cn/orj480/d5d006e1jw1f4mxbo578kj20no0dc3yh.jpg) 266 | ![](http://img4.yytcdn.com/video/mv/160608/2591243/-M-5b0b72a3e96763cd3752cdb0cdf1ba06_240x135.jpg) 267 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f4p4qp67plj20cn0m8wh0.jpg) 268 | ![](http://ww1.sinaimg.cn/orj360/d5d006e1jw1f4o407mnajj20k00l0q6p.jpg) 269 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f4kpv10nfyj20k00qotbk.jpg) 270 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f4kpv26gk6j20k00qo770.jpg) 271 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f4jlrv0tpbj20sq0zk42y.jpg) 272 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1f4jlrvjrzvj20qo0zkjxm.jpg) 273 | ![](http://ww3.sinaimg.cn/orj360/d5d006e1jw1f4jfuxttrzj20k00j4dk4.jpg) 274 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1f4h7l5j4rlj20k00bkdj8.jpg) 275 | ![](http://ww2.sinaimg.cn/orj480/736f0c7ejw1f4g078gbcxj20no0dcmzb.jpg) 276 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1f4dob4ick4j20om0omk9k.jpg) 277 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f4costesqgj20zk088gn1.jpg) 278 | ![](http://ww3.sinaimg.cn/orj360/d5d006e1jw1f4abtkrf87j20rs0rsk7j.jpg) 279 | ![](http://ww1.sinaimg.cn/orj360/d5d006e1jw1f48p3qic7ej20n20mtn1v.jpg) 280 | ![](http://ww3.sinaimg.cn/orj360/6d860193gw1f48z1xxt53j20gf0i2t9t.jpg) 281 | ![](http://ww1.sinaimg.cn/orj360/6eed7cf4jw1f3w970ldchj20u00u0whn.jpg) 282 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f3tvwu79p0j20no0dcmzt.jpg) 283 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1jw1f3rtlmtohhj218g0xcnbk.jpg) 284 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f3rfjq5r9wj20qo0zktg0.jpg) 285 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f3rfjr0b42j20qo0zk7a8.jpg) 286 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f3jhe0b98qj20zk0qogou.jpg) 287 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f3jhe1bzo9j20qo0zkadh.jpg) 288 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f3jhe2asnyj20qo0zk447.jpg) 289 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f3jhe2xjlwj20qo0zkwja.jpg) 290 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f3jhe43sbpj20qo0zk0we.jpg) 291 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f3fsvrgq1gj20hs3vo4l4.jpg) 292 | ![](http://ww1.sinaimg.cn/thumb300/d5d006e1jw9f37uxp6d2mj2050050746.jpg) 293 | ![](http://ww3.sinaimg.cn/orj360/d5d006e1jw1f34kow2vhaj20fk078q3x.jpg) 294 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1f33f4l3ap3j20ho0r9ae3.jpg) 295 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f3253r695hj20qo0f00v6.jpg) 296 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1f2ys9pck4yj20qo0x40xf.jpg) 297 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1f2wj15qh7hj20es0jm7fu.jpg) 298 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1f2wj16bl8qj20k00i9abp.jpg) 299 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1f2lzzsbwpij20u215owo3.jpg) 300 | ![](http://ww4.sinaimg.cn/thumb150/006gQC4sjw1f2idf0o212j30zk0qojwa.jpg) 301 | ![](http://ww1.sinaimg.cn/thumb150/006gQC4sjw1f2idf3760aj30qo0zkwlz.jpg) 302 | ![](http://ww4.sinaimg.cn/thumb150/006gQC4sjw1f2idf21f1zj30qo0zk7b1.jpg) 303 | ![](http://ww2.sinaimg.cn/thumb150/006gQC4sjw1f2idezt23hj30qo0zkgrr.jpg) 304 | ![](http://ww1.sinaimg.cn/thumb150/006gQC4sjw1f2idf42purj30qo0zk457.jpg) 305 | ![](http://ww1.sinaimg.cn/orj480/736f0c7ejw1f2h6mx79sgj20bu0bs3z5.jpg) 306 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1f29d8tl2cxj20zk0qojv3.jpg) 307 | ![](http://ww2.sinaimg.cn/orj360/705eaf5egw1f1kwomeua0j20sg0iz3ze.jpg) 308 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f1l6nfs125j20hs0fuwic.jpg) 309 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1f1bxtp99rmj20hs0vkabk.jpg) 310 | ![](http://ww2.sinaimg.cn/thumb150/6d860193gw1f1bxtqfo9fj20hs0vkdho.jpg) 311 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1f1bxtqw6lwj20xc18ggq0.jpg) 312 | ![](http://ww2.sinaimg.cn/thumb150/6d860193gw1f1bxtyxr7hj20hs0vkjt8.jpg) 313 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1f0zv5ignydj218g0vfafz.jpg) 314 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f04ugwjzvzj20hs0hs0vg.jpg) 315 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f04ugxairyj20dc0hsjtp.jpg) 316 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f04ugy4g1zj20qo0hjwig.jpg) 317 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1f04ugz0bs6j20cr0hsabj.jpg) 318 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f04ugzu2d4j20hs0hswgy.jpg) 319 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f04uh0lb47j20hs0dc40n.jpg) 320 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f04uh1dmsqj20hs0dc40r.jpg) 321 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1f04uh25n38j20hs0dcwhb.jpg) 322 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1f04ugvlqg8j20hs0dcjtk.jpg) 323 | ![](http://r1.ykimg.com/0542010156938E116A0A440B071EDC18) 324 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1ezvwdm6hgnj218g0u44c2.jpg) 325 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1ezvwdqtbwsj218g0tmtle.jpg) 326 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1ezvwdhzzwnj20wc0lythk.jpg) 327 | ![](http://ww1.sinaimg.cn/orj360/6d860193gw1ezk7v9vstyj20vk0hswk4.jpg) 328 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1gw1ezizouh8xbj21f01w07wh.jpg) 329 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1gw1ezizoydyggj21f01w0e81.jpg) 330 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1gw1ezizotdsquj21f01w0kjl.jpg) 331 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1gw1ezizov7xtfj21e71w0qv5.jpg) 332 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1gw1ezizq2ej8gj21kw128nby.jpg) 333 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1gw1ezizq26oumj21kw2dckdn.jpg) 334 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1gw1ezizqk0cvgj21kw2dcb29.jpg) 335 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1gw1ezizqp9bl3j21kw2dc4qp.jpg) 336 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1gw1ezizqokzegj21kw2dc1kx.jpg) 337 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1gw1ezbuqtx44gj20wc0lkqbe.jpg) 338 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1ez9qqshhn4j20pn0zkgr9.jpg) 339 | ![](http://ww4.sinaimg.cn/orj360/6d860193gw1ez6agbj5f6j20zk0qoq8n.jpg) 340 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1ez4g33nv82j218g0xcwpg.jpg) 341 | ![](http://ww4.sinaimg.cn/thumb150/6d860193gw1ez4g34qfm2j20vk0hstcv.jpg) 342 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1ez4g35hcicj20vk0hs78u.jpg) 343 | ![](http://ww2.sinaimg.cn/thumb150/6d860193gw1ez4g36m12cj20hs0vk0w7.jpg) 344 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1eyzcpozcjfj20dc0hstb7.jpg) 345 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1eyzcprcpiqj20dc0hsq5a.jpg) 346 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1eyzcpso86qj20hs0dcjtt.jpg) 347 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1gw1eyvwpsambvj20ne0gjn1y.jpg) 348 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1eyidqcwmu2j20ic096jsn.jpg) 349 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1eyidqetyivj20hs0dzad0.jpg) 350 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1eyidqfywarj20m80etq6l.jpg) 351 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1eyidqhk56qj20m80etq5d.jpg) 352 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1eyidqbovukj20m80e9diq.jpg) 353 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1eyidqixrmoj20hs0dctba.jpg) 354 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1eyidqkysvxj20hs0dcjtn.jpg) 355 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1eyidqlvdwoj20hs0dcacj.jpg) 356 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1exxfe8l84fj20zk0g6gt6.jpg) 357 | ![](http://ww3.sinaimg.cn/orj360/d5d006e1gw1exx9rga2uxj20m80gqtbc.jpg) 358 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1exwegep8d8j20xc0m8n2g.jpg) 359 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1exwe6jyvesj20e80e8wf5.jpg) 360 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1gw1exw47jn7qaj20jg0jgdmz.jpg) 361 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1jw1exriu2ehk0j20ne0x3do5.jpg) 362 | ![](http://ww2.sinaimg.cn/orj360/0068Rj1Ljw1exq51o59w3j30go0m875e.jpg) 363 | ![](http://ww1.sinaimg.cn/thumb150/6d860193gw1exqb8eerz8j20qo0zkagu.jpg) 364 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1exqb8g1sxjj20zk0qotf0.jpg) 365 | ![](http://ww1.sinaimg.cn/thumb150/6d860193gw1exqb8hcoddj20xc18gtf6.jpg) 366 | ![](http://ww1.sinaimg.cn/thumb150/6d860193gw1exqb8jftivj20xc18gwpg.jpg) 367 | ![](http://ww4.sinaimg.cn/thumb150/006gQC4sjw1exmdgconpoj30go0m843y.jpg) 368 | ![](http://ww2.sinaimg.cn/thumb150/006gQC4sgw1exmdldvfenj307u0afgll.jpg) 369 | ![](http://ww3.sinaimg.cn/thumb150/006gQC4sgw1exmdpjb4d8j305p05p3yr.jpg) 370 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1jw1exbgq8ljioj20hs0dcdi6.jpg) 371 | ![](http://ww1.sinaimg.cn/thumb150/6d860193gw1ewtw883qjyj21120kudl5.jpg) 372 | ![](http://ww1.sinaimg.cn/thumb150/6d860193gw1ewtw893dy5j20vk0hs41b.jpg) 373 | ![](http://ww2.sinaimg.cn/thumb150/6d860193gw1ewsonys031j21kw23ugx5.jpg) 374 | ![](http://ww3.sinaimg.cn/thumb150/6d860193gw1ewsonztbklj20go0m8abg.jpg) 375 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1gw1ewqfypmd1pj21kw2dcnpd.jpg) 376 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1gw1ewqfyoroboj21kw2dckjl.jpg) 377 | ![](http://ww2.sinaimg.cn/orj360/d5d006e1gw1ewnpttdz31j20kn0ykq9d.jpg) 378 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1ew7qdezehcj20jg0czmyq.jpg) 379 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1ew7qdeidovj20jg0czdhc.jpg) 380 | ![](http://ww1.sinaimg.cn/thumb150/6d860193jw1ew5s1uxac0j20sy0sywj7.jpg) 381 | ![](http://ww2.sinaimg.cn/thumb150/6d860193jw1ew5s1tmbqzj20f60ld75m.jpg) 382 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1gw1ew2xr0c1ixj21kw1fnafu.jpg) 383 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1gw1ew2xr0att7j21kw1fndjt.jpg) 384 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1evyv88qr8cj20jg0cz0tk.jpg) 385 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1evyv89i3y0j20go0b40u8.jpg) 386 | ![](http://ww2.sinaimg.cn/thumb150/d5d006e1jw1evyv8a2b1pj20go0b4gmb.jpg) 387 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1evyv8b59ihj207s05kgll.jpg) 388 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1evyv8bi2l9j2050050wee.jpg) 389 | ![](http://ww4.sinaimg.cn/thumb150/d5d006e1jw1evyv8c0jntj207s0570ss.jpg) 390 | ![](http://ww3.sinaimg.cn/thumb150/d5d006e1jw1evyv8clesnj209q09qq37.jpg) 391 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1evyv8dace6j207s057q30.jpg) 392 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1jw1evyv8dy40mj207s07sq35.jpg) 393 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1evrutkpa4bj20ct0hsacq.jpg) 394 | ![](http://r2.ykimg.com/0542040855E97C506A0A456D5C762FBE) 395 | ![](http://ww4.sinaimg.cn/orj360/6d860193gw1evql5pygddj2102088wgd.jpg) 396 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1gw1ev2focbdacj20um2b4ww7.jpg) 397 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1euvnovhylkj20fb0hsmz1.jpg) 398 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1gw1euryb2tfhgj20sg0izaem.jpg) 399 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1eundgr47h7j20xc18gtoc.jpg) 400 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1gw1eum597q3bbj21kw1sy7wh.jpg) 401 | ![](http://ww1.sinaimg.cn/thumb150/d5d006e1gw1eum59auzdhj20hs0sdgom.jpg) 402 | ![](http://ww1.sinaimg.cn/sq480/736f0c7ejw1eul3b522sbj20hs0a0jt5.jpg) 403 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1etsbowcjkbj20hs0dcq4a.jpg) 404 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1etd1swc4qej20hs0dcdjj.jpg) 405 | ![](http://ww4.sinaimg.cn/orj360/6d860193jw1etc7222kwej20hs0dc41c.jpg) 406 | ![](http://ww2.sinaimg.cn/orj360/6d860193jw1etc6c5mkxmj20hs0dc41c.jpg) 407 | ![](http://ww3.sinaimg.cn/orj360/6d860193jw1et9d3iv5dyj20hs0ckwfz.jpg) 408 | ![](http://ww1.sinaimg.cn/thumb150/6baa1682jw1esyw3hprksj20qe0hlwfz.jpg) 409 | ![](http://ww2.sinaimg.cn/thumb150/6baa1682jw1esyw3ia4yfj20qe0hl409.jpg) 410 | ![](http://ww4.sinaimg.cn/thumb150/6baa1682jw1esyw3hvidij20qe0hl77g.jpg) 411 | ![](http://ww3.sinaimg.cn/thumb150/6baa1682jw1esyw65yvuwj20qe0hlq4r.jpg) 412 | ![](http://ww4.sinaimg.cn/orj360/d5d006e1jw1esy8ef6vn7j20co074aa8.jpg) 413 | ![](http://ww3.sinaimg.cn/thumb150/6d860193jw1eswx1r1424j20dc0hsjug.jpg) 414 | ![](http://ww4.sinaimg.cn/thumb150/6d860193jw1eswx1oeyj6j20qo0fmn26.jpg) 415 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1estmm6bpuoj20dc0hsmyg.jpg) 416 | ![](http://ww1.sinaimg.cn/orj360/6d860193jw1esasmzd2c9j20i40cydia.jpg) 417 | ![](http://ww3.sinaimg.cn/orj360/d5d006e1jw1es6ap2xpd0j20qo0zktk8.jpg) 418 | -------------------------------------------------------------------------------- /SinaWeibo/weibo_crawler.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/python 2 | # encoding:utf-8 3 | ''' 4 | 爬取微博的流程:因为微博调用接口的时候需要cookie,所以我们要用webdriver来登录微博获取cookie,微博的cookie有效期应该蛮长的,我设置过期时间6hours,未过期则去本地读取,否则重新登录获取cookie 5 | 获取cookie后则分析微博网页端的请求,找到相应接口和参数,然后去请求我们要的数据。 6 | 这个例子是去获取微博里的图片,例子爬取的微博是我伦的官方账号:MRJ台灣官方 7 | 运行代码脚本需要加5个参数 分别为 1.微博账号 2.微博密码 3.要爬取的账号的个性域名(无个性域名则输入 u/+微博id)4.要爬取的账号的ID 5.爬取页数 8 | 如:python weibo_crawler.py username password mrj168 1837498771 5 9 | ''' 10 | from selenium import webdriver 11 | import time 12 | import requests 13 | import json 14 | from bs4 import BeautifulSoup 15 | import os 16 | import sys 17 | 18 | request_params = {"ajwvr":"6","domain":"100505","domain_op":"100505","feed_type":"0","is_all":"1","is_tag":"0","is_search":"0"} 19 | profile_request_params = {"profile_ftype":"1","is_all":"1"} 20 | 21 | weibo_url = "http://weibo.com/" 22 | requset_url = "http://weibo.com/p/aj/v6/mblog/mbloglist?" 23 | 24 | 25 | cookie_save_file = "cookie.txt"#存cookie的文件名 26 | cookie_update_time_file = "cookie_timestamp.txt"#存cookie时间戳的文件名 27 | image_result_file = "image_result.md"#存图片结果的文件名 28 | 29 | 30 | # username = 'your weibo accounts'##你的微博账号 31 | # password = 'your weibo password'##你的微博密码 32 | 33 | person_site_name = "mrj168"#想爬取的微博号的个性域名 无个性域名则换成: u/+"微博id" 如 u/12345678 34 | weibo_id = "1837498771"#微博id可以在网页端打开微博,显示网页源代码,找到关键词$CONFIG['oid']='1837498771'; 35 | page_size = 5#你要爬取的微博的页数 36 | 37 | 38 | 39 | 40 | 41 | 42 | headers = {#User-Agent需要根据每个人的电脑来修改 43 | 'Accept': '*/*', 44 | 'Accept-Encoding': 'gzip, deflate, sdch', 45 | 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6', 46 | 'Cache-Control':'no-cache', 47 | 'Connection':'keep-alive', 48 | 'Content-Type':'application/x-www-form-urlencoded', 49 | 'Host':'weibo.com', 50 | 'Pragma':'no-cache', 51 | 'Referer':'http://weibo.com/u/3278620272?profile_ftype=1&is_all=1', 52 | 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 53 | 'X-Requested-With':'XMLHttpRequest' 54 | } 55 | 56 | def get_timestamp():#获取当前系统时间戳 57 | try: 58 | tamp = time.time() 59 | timestamp = str(int(tamp))+"000" 60 | print timestamp 61 | return timestamp 62 | except Exception, e: 63 | print e 64 | finally: 65 | pass 66 | 67 | def login_weibo_get_cookies():#登录获取cookies 68 | time.sleep(2) 69 | driver.find_element_by_name("username").send_keys(username)##输入用户名 70 | driver.find_element_by_name("password").send_keys(password)##输入密码 71 | driver.find_element_by_xpath("//a[@node-type='submitBtn']").click()##点击登录按钮 72 | cookies = driver.get_cookies()##获取cookies 73 | print cookies 74 | cookie = "" 75 | ##将返回的Cookies数组转成微博需要的cookie格式 76 | for x in xrange(len(cookies)): 77 | value = cookies[x]['name']+"="+cookies[x]['value']+";" 78 | cookie = cookie+value 79 | print cookie 80 | return cookie 81 | 82 | def save_cookie(cookie):#把cookie存到本地 83 | try: 84 | f= open(cookie_save_file,'w') 85 | f.write(cookie) 86 | f.close() 87 | except Exception, e: 88 | print e 89 | finally: 90 | pass 91 | 92 | def get_cookie_from_txt():#从本地文件里读取cookie 93 | f = open(cookie_save_file) 94 | cookie = f.read() 95 | print cookie 96 | return cookie 97 | 98 | def save_cookie_update_timestamp(timestamp):#把cookie存到本地 99 | try: 100 | f= open(cookie_update_time_file,'w') 101 | f.write(timestamp) 102 | f.write('\n') 103 | f.close() 104 | except Exception, e: 105 | print e 106 | finally: 107 | pass 108 | 109 | def get_cookie_update_time_from_txt():#获取上一次cookie更新时间 110 | try: 111 | f = open(cookie_update_time_file) 112 | lines = f.readlines() 113 | cookie_update_time = lines[0] 114 | print cookie_update_time 115 | return cookie_update_time 116 | except Exception, e: 117 | print e 118 | finally: 119 | pass 120 | 121 | def write_image_urls(image_list): 122 | try: 123 | f= open(image_result_file,'a+') 124 | for x in xrange(len(image_list)): 125 | image = image_list[x] 126 | show_image = "![]("+image+")" 127 | f.write(show_image.encode("utf-8")) 128 | f.write('\n') 129 | f.close() 130 | except Exception, e: 131 | print e 132 | finally: 133 | pass 134 | 135 | 136 | def is_valid_cookie():#判断cookie是否有效 137 | if os.path.isfile(cookie_update_time_file)==False: 138 | return False 139 | else : 140 | f = open(cookie_update_time_file) 141 | lines = f.readlines() 142 | if len(lines) == 0: 143 | return False 144 | else : 145 | last_time_stamp = get_cookie_update_time_from_txt() 146 | if long(get_timestamp()) - long(last_time_stamp) > 6*60*60*1000: 147 | return False 148 | else : 149 | return True 150 | 151 | def get_object_weibo_by_weibo_id_and_cookie(weibo_id,person_site_name,cookie,pagebar,page):#通过微博ID和cookie来调取接口 152 | try: 153 | headers["Cookie"] = cookie 154 | headers['Referer'] = weibo_url+person_site_name+"?profile_ftype=1&is_all=1" 155 | request_params["__rnd"] = get_timestamp() 156 | request_params["page"] = page 157 | request_params["pre_page"] = page 158 | request_params["pagebar"] = pagebar 159 | request_params["id"] = "100505"+weibo_id 160 | request_params["script_uri"] = "/"+person_site_name 161 | request_params["pl_name"] = "Pl_Official_MyProfileFeed__22" 162 | request_params["profile_ftype"] = 1 163 | response = requests.get(requset_url,headers=headers,params=request_params) 164 | print response.url 165 | html = response.json()["data"] 166 | return html 167 | except Exception, e: 168 | print e 169 | finally: 170 | pass 171 | 172 | 173 | def get_object_top_weibo_by_person_site_name_and_cookie(person_site_name,cookie,page):#每一页顶部微博 174 | try: 175 | profile_url = weibo_url+person_site_name+"?" 176 | headers["Cookie"] = cookie 177 | profile_request_params["page"] = page 178 | response = requests.get(profile_url,headers=headers,params=profile_request_params) 179 | print response.url 180 | html = response.text 181 | soup = BeautifulSoup(html,"html.parser") 182 | script_list = soup.find_all("script") 183 | script_size = len(script_list) 184 | print "script_size:"+str(script_size) 185 | tag = 0 186 | for x in xrange(script_size): 187 | if "WB_feed WB_feed_v3 WB_feed_v4" in str(script_list[x]): 188 | tag = x 189 | print "tag:"+str(tag) 190 | # print script_list[script_size-1] 191 | html_start = str(script_list[tag]).find("") 193 | # print str(script_list[tag])[html_start:html_end+4] 194 | return str(str(script_list[tag])[html_start:html_end+4]) 195 | except Exception, e: 196 | print e 197 | finally: 198 | pass 199 | 200 | 201 | 202 | def get_img_urls_form_html(html):#从返回的html格式的字符串中获取图片 203 | try: 204 | image_url_list = [] 205 | result_html = html.replace("\\","") 206 | soup = BeautifulSoup(result_html,"html.parser") 207 | div_list = soup.find_all("div",'media_box') 208 | print "div_list:"+str(len(div_list)) 209 | for x in xrange(len(div_list)): 210 | image_list = div_list[x].find_all("img") 211 | for y in xrange(len(image_list)): 212 | image_url = image_list[y].get("src").replace("\\","") 213 | print image_url 214 | image_url_list.append(image_url.replace("\"","")) 215 | return image_url_list 216 | except Exception, e: 217 | print e 218 | finally: 219 | pass 220 | 221 | if(len(sys.argv)==6): 222 | username = sys.argv[1] 223 | password = sys.argv[2] 224 | person_site_name = sys.argv[3] 225 | weibo_id = sys.argv[4] 226 | page_size = int(sys.argv[5]) 227 | print "微博账号:"+username 228 | print "微博密码:"+password 229 | print "要爬取的账号的个性域名(无个性域名则输入 u/+微博id ):"+person_site_name 230 | print "要爬取的账号的ID:"+weibo_id 231 | print "爬取页数:"+str(page_size) 232 | else: 233 | print "未按照指定参数输入,请按顺序输入5个指定参数 1.微博账号 2.微博密码 3.要爬取的账号的个性域名(无个性域名则输入 u/+微博id)4.要爬取的账号的ID 5.爬取页数" 234 | sys.exit(0) 235 | 236 | result = is_valid_cookie() 237 | print result 238 | if result == False: 239 | driver = webdriver.Chrome("/Users/darrenfantasy/Documents/study/python/image_crawler/SinaWeibo/chromedriver")#打开Chrome 240 | driver.maximize_window()#将浏览器最大化显示 241 | driver.get(weibo_url)#打开微博登录页面 242 | time.sleep(10)#因为加载页面需要时间,所以这里延时10s来确保页面已加载完毕 243 | cookie = login_weibo_get_cookies() 244 | save_cookie(cookie) 245 | save_cookie_update_timestamp(get_timestamp()) 246 | else : 247 | cookie = get_cookie_from_txt() 248 | for x in xrange(1,page_size+1): 249 | profile_html = get_object_top_weibo_by_person_site_name_and_cookie(person_site_name,cookie,x) 250 | image_url_list = get_img_urls_form_html(profile_html) 251 | write_image_urls(image_url_list) 252 | for y in xrange(0,2):#有两次下滑加载更多的操作 253 | print "pagebar:"+str(y) 254 | html = get_object_weibo_by_weibo_id_and_cookie(weibo_id,person_site_name,cookie,y,x) 255 | image_url_list = get_img_urls_form_html(html) 256 | write_image_urls(image_url_list) 257 | 258 | 259 | 260 | 261 | -------------------------------------------------------------------------------- /SinaWeibo/weibo_hot_topic_crawler.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf-8') 5 | import requests 6 | import json 7 | import base64 8 | import re 9 | import time 10 | import pandas as pd 11 | 12 | time1=time.time() 13 | 14 | 15 | ###########模拟登录新浪 16 | def login(username, password): 17 | username = base64.b64encode(username.encode('utf-8')).decode('utf-8') 18 | postData = { 19 | "entry": "sso", 20 | "gateway": "1", 21 | "from": "null", 22 | "savestate": "30", 23 | "useticket": "0", 24 | "pagerefer": "", 25 | "vsnf": "1", 26 | "su": username, 27 | "service": "sso", 28 | "sp": password, 29 | "sr": "1440*900", 30 | "encoding": "UTF-8", 31 | "cdult": "3", 32 | "domain": "sina.com.cn", 33 | "prelt": "0", 34 | "returntype": "TEXT", 35 | } 36 | loginURL = r'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)' 37 | session = requests.Session() 38 | res = session.post(loginURL, data = postData) 39 | jsonStr = res.content.decode('gbk') 40 | info = json.loads(jsonStr) 41 | if info["retcode"] == "0": 42 | print(U"登录成功") 43 | # 把cookies添加到headers中,必须写这一步,否则后面调用API失败 44 | cookies = session.cookies.get_dict() 45 | cookies = [key + "=" + value for key, value in cookies.items()] 46 | cookies = "; ".join(cookies) 47 | session.headers["cookie"] = cookies 48 | else: 49 | print(U"登录失败,原因: %s" % info["reason"]) 50 | return session 51 | 52 | 53 | session = login('账号', '密码') 54 | ##################定义数据结构列表存储数据 55 | top_name = [] 56 | url_new1=[] 57 | url_new2=[] 58 | pageids = [] 59 | 60 | #########################开始循环抓取 61 | for i in range(1,5): 62 | try: 63 | print "正在抓取第"+str(i)+"页。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。" 64 | url2="http://d.weibo.com/100803?pids=Pl_Discover_Pt6Rank__5&cfs=920&Pl_Discover_Pt6Rank__5_filter=hothtlist_type=0&Pl_Discover_Pt6Rank__5_page="+str(i)+"&ajaxpagelet=1&__ref=/100803&_t=FM_149273744327929" 65 | html=session.get(url2).content 66 | 67 | ###########正则表达式匹配####################### 68 | name=re.findall("Pl_Discover_Pt6Rank__5(.*?)",html,re.S) 69 | for each in name: 70 | # print each 71 | k=re.findall('"html":"(.*?)"}',each,re.S) 72 | # print each 73 | for each1 in k: 74 | k1=str(each1).replace('\\t',"").replace('\\n','').replace('\\','').replace('#','') 75 | # print k1 76 | k2=re.findall('alt="(.*?)" class="pic">',str(k1),re.S) 77 | for each2 in k2: 78 | # print each2 79 | top_name.append(each2) 80 | k3=re.findall('',str(k1),re.S) 81 | # print "k3Length:" 82 | # print len(k3) 83 | for x in xrange(len(k3)): 84 | # print(k3[x]) 85 | newUrl = "https:"+k3[x].replace("?from=faxian_huati","/topic_album?from=page_100808&mod=TAB#place") 86 | page_id = k3[x].lstrip("//weibo.com/p/").rstrip("?from=faxian_huati") 87 | pageids .append(page_id) 88 | url_new1.append(k3[x]) 89 | url_new2.append(newUrl) 90 | except: 91 | pass 92 | 93 | 94 | 95 | apiUrl = "https://weibo.com/p/aj/proxy" 96 | page_id = "" 97 | page = 2 98 | gifs = [] 99 | try: 100 | for x in xrange(5,15): 101 | print top_name[x] 102 | # print url_new1[x] 103 | # print url_new2[x] 104 | # print pageids[x] 105 | page_id = pageids[x] 106 | since_id = "" 107 | for y in xrange(30): 108 | time.sleep(1) 109 | page = y+2 110 | params = {"api":"http://i.huati.weibo.com/pcpage/papp","ajwvr":6,"atype":"all","viewer_uid":"1942763351","since_id":since_id,"page_id":page_id,"page":page,"ajax_call":1,"appname":"album","module":"feed","is_feed":1,"_rnd":"1506317976286"} 111 | html = session.get(apiUrl,params = params).content 112 | # print html 113 | k1=str(html).replace('\\t',"").replace('\\n','').replace('\\','').replace('#','') 114 | imgs=re.findall('',k1,re.S) 115 | for z in xrange(len(imgs)): 116 | if ".gif" in imgs[z]: 117 | print imgs[z].replace("thumb300","large").replace("https","http") 118 | gifs.append(imgs[z].replace("thumb300","large").replace("https","http")) 119 | since_ids = re.findall('action-data="(.*?)"',k1,re.S) 120 | selected_since_ids = [] 121 | for i in xrange(len(since_ids)): 122 | if "since_id" in since_ids[i]: 123 | selected_since_ids.append(since_ids[i]) 124 | if len(selected_since_ids)>0: 125 | result_since_id = selected_since_ids[-1] 126 | # print result_since_id 127 | since_id = result_since_id[int(result_since_id.find("since_id="))+9:int(result_since_id.find("&isPrivate"))].replace("%23","#") 128 | else: 129 | break; 130 | # print since_id 131 | except Exception, e: 132 | print e 133 | finally: 134 | pass 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /WechatOfficialAccounts/spider_wechat_official_accounts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding:utf-8 3 | import urllib2 4 | import requests 5 | import json 6 | from bs4 import BeautifulSoup 7 | 8 | def cut_url(a): 9 | i = -1 10 | while a[i] != '.': 11 | i -= 1 12 | return a[i:] 13 | 14 | def get_html(html): 15 | name_num = 0 16 | soup = BeautifulSoup(html, "html.parser") 17 | for detail in soup.find_all('img'): 18 | image = detail.get('data-src') 19 | if image is None: 20 | pass 21 | else: 22 | name_num += 1 23 | # pic = requests.get(image, verify=False) 24 | # pic_name = str(name_num) + str(cut_url(image)) 25 | print u'the%dphoto, img url:%s\n' % (name_num,image) 26 | # with open(address + '\\' + pic_name, 'wb') as fp: 27 | # fp.write(pic.content) 28 | print u"\nfinish all%dphoto ╰( ̄▽ ̄)╭ \n " % name_num 29 | 30 | 31 | def get_history_url_by_content(url): 32 | soup = BeautifulSoup(url, "html.parser") 33 | a = soup.find('a',uigs="account_image_0") 34 | resUrl = a.get('href') 35 | return resUrl 36 | 37 | def get_one_page_content_url(html): 38 | name_num = 0 39 | start = 0 40 | end = 0 41 | soup = BeautifulSoup(html, "html.parser") 42 | content = soup.find_all('script')[7] 43 | # print u'content.string:%s' % (content) 44 | size =len(content.string) 45 | # print u'size:%d' %size 46 | start = content.string.find("msgList") 47 | end = content.string.rfind("}") 48 | # print u'start:%d' %start 49 | result = content.string[start+10:end+1] 50 | print u'%s' %result 51 | s = json.loads(result) 52 | urlList = [] 53 | for x in xrange(len(s["list"])): 54 | addUrl = "http://mp.weixin.qq.com"+s["list"][x]["app_msg_ext_info"]["content_url"] 55 | newUrl = addUrl.replace('&','&') 56 | urlList.append(newUrl) 57 | return urlList 58 | 59 | wechatIdList = ['','','']#“你要爬的公众号id列表” 60 | for x in xrange(len(wechatIdList)): 61 | print wechatIdList[x] 62 | baseUrl = 'http://weixin.sogou.com/weixin?type=1&query='+wechatIdList[x]+'&ie=utf8&_sug_=y&_sug_type_=1' 63 | response = urllib2.urlopen(baseUrl) 64 | content = response.read() 65 | historyUrl = get_history_url_by_content(content) 66 | print u'historyUrl:%s' % (historyUrl) 67 | response2 = urllib2.urlopen(historyUrl) 68 | urls =get_one_page_content_url(response2) 69 | for x in xrange(len(urls)): 70 | print urls[x] 71 | response = urllib2.urlopen(urls[x]) 72 | content = response.read() 73 | get_html(content) --------------------------------------------------------------------------------