├── README.md ├── weibo_top.py └── zhihu_billboard.py /README.md: -------------------------------------------------------------------------------- 1 | # hot_display 2 | 爬取热门网站的热榜,集中展示;Integrate and display hot billboard or ranked topic from hot Chinese websites 3 | 4 | zhihu_billboard.py 抓取知乎热榜数据存入列表 5 | weibo_top.py 抓取微博热门数据存入列表 6 | -------------------------------------------------------------------------------- /weibo_top.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # @Time : 2019-10-10 20:08 4 | 5 | __author__ = 'Ted' 6 | 7 | import requests 8 | from bs4 import BeautifulSoup 9 | 10 | url = "https://s.weibo.com/top/summary" 11 | headers = {"User-Agent": "", "Cookie": ""} 12 | wb_response = requests.get(url, headers=headers) 13 | webcontent = wb_response.text 14 | soup = BeautifulSoup(webcontent, "html.parser") 15 | index_list = soup.find_all("td", class_="td-01") 16 | title_list = soup.find_all("td", class_="td-02") 17 | level_list = soup.find_all("td", class_="td-03") 18 | 19 | topic_list = [] 20 | for i in range(len(index_list)): 21 | item_index = index_list[i].get_text(strip=True) 22 | if item_index == "": 23 | item_index = "0" 24 | item_title = title_list[i].a.get_text(strip=True) 25 | if title_list[i].span: 26 | item_mark = title_list[i].span.get_text(strip=True) 27 | 28 | else: 29 | item_mark = "置顶" 30 | item_level = level_list[i].get_text(strip=True) 31 | topic_list.append({"index": item_index, "title": item_title, "mark": item_mark, "level": item_level, 32 | "link": f"https://s.weibo.com/weibo?q=%23{item_title}%23&Refer=top"}) 33 | print(topic_list) -------------------------------------------------------------------------------- /zhihu_billboard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # @Time : 2019-10-09 23:06 4 | 5 | __author__ = 'Ted' 6 | 7 | 8 | import requests 9 | import re 10 | from bs4 import BeautifulSoup 11 | 12 | headers={"User-Agent":"","Cookie":""} 13 | zh_url = "https://www.zhihu.com/billboard" 14 | zh_response = requests.get(zh_url,headers=headers) 15 | 16 | webcontent = zh_response.text 17 | soup = BeautifulSoup(webcontent,"html.parser") 18 | script_text = soup.find("script",id="js-initialData").get_text() 19 | rule = r'"hotList":(.*?),"guestFeeds"' 20 | result = re.findall(rule,script_text) 21 | 22 | temp = result[0].replace("false","False").replace("true","True") 23 | hot_list = eval(temp) 24 | print(hot_list) --------------------------------------------------------------------------------