├── README.md
├── weibo_top.py
└── zhihu_billboard.py


/README.md:
--------------------------------------------------------------------------------
1 | # hot_display
2 | 爬取热门网站的热榜，集中展示；Integrate and display hot billboard or ranked topic from hot Chinese websites
3 | 
4 | zhihu_billboard.py 抓取知乎热榜数据存入列表
5 | weibo_top.py 抓取微博热门数据存入列表
6 | 


--------------------------------------------------------------------------------
/weibo_top.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | # @Time : 2019-10-10 20:08
 4 | 
 5 | __author__ = 'Ted'
 6 | 
 7 | import requests
 8 | from bs4 import BeautifulSoup
 9 | 
10 | url = "https://s.weibo.com/top/summary"
11 | headers = {"User-Agent": "", "Cookie": ""}
12 | wb_response = requests.get(url, headers=headers)
13 | webcontent = wb_response.text
14 | soup = BeautifulSoup(webcontent, "html.parser")
15 | index_list = soup.find_all("td", class_="td-01")
16 | title_list = soup.find_all("td", class_="td-02")
17 | level_list = soup.find_all("td", class_="td-03")
18 | 
19 | topic_list = []
20 | for i in range(len(index_list)):
21 |     item_index = index_list[i].get_text(strip=True)
22 |     if item_index == "":
23 |         item_index = "0"
24 |     item_title = title_list[i].a.get_text(strip=True)
25 |     if title_list[i].span:
26 |         item_mark = title_list[i].span.get_text(strip=True)
27 | 
28 |     else:
29 |         item_mark = "置顶"
30 |     item_level = level_list[i].get_text(strip=True)
31 |     topic_list.append({"index": item_index, "title": item_title, "mark": item_mark, "level": item_level,
32 |                        "link": f"https://s.weibo.com/weibo?q=%23{item_title}%23&Refer=top"})
33 | print(topic_list)


--------------------------------------------------------------------------------
/zhihu_billboard.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | # @Time : 2019-10-09 23:06
 4 | 
 5 | __author__ = 'Ted'
 6 | 
 7 | 
 8 | import requests
 9 | import re
10 | from bs4 import BeautifulSoup
11 | 
12 | headers={"User-Agent":"","Cookie":""}
13 | zh_url = "https://www.zhihu.com/billboard"
14 | zh_response = requests.get(zh_url,headers=headers)
15 | 
16 | webcontent = zh_response.text
17 | soup = BeautifulSoup(webcontent,"html.parser")
18 | script_text = soup.find("script",id="js-initialData").get_text()
19 | rule = r'"hotList":(.*?),"guestFeeds"'
20 | result = re.findall(rule,script_text)
21 | 
22 | temp = result[0].replace("false","False").replace("true","True")
23 | hot_list = eval(temp)
24 | print(hot_list)


--------------------------------------------------------------------------------