.
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # News-M
2 |
3 | 
4 |    
5 |
6 | ---
7 |
8 | ## 数据来源 | Data
9 |
10 | **网易财经、同花顺财经、金融界财经、凤凰财经、东方财富、新浪财经、新华网财经、松果财经、新闻联播文字版、投资界。**
11 |
12 | **NetEase Finance and economics, flush finance and economics, finance and economics in financial circles, Phoenix finance and economics, Oriental Fortune, sina finance and economics, Xinhuanet finance and economics, pinecone finance and economics, news broadcast text version and investment circles. The data will be generated into tables and stored in the finance folder**
13 |
14 | ---
15 |
16 | ## 运行程序 | Run
17 |
18 | ```python
19 | #Run
20 | #新闻联播文字版、个股处理功能、网盘资料备份默认关闭,自行开启。
21 | pip install -r requirements.txt
22 | python3 main.py
23 | ```
24 |
25 | > [!IMPORTANT]
26 | > Windows新闻文件生成在桌面, Linux/Macos生成在运行目录下
27 |
28 | Windows system is generated on the desktop, Linux/Macos is generated in the running directory
29 |
30 | **使用教程 | Tutorial: [https://6923403.github.io/post/news_wps](https://vcvvvc.github.io/post/news_wps)**
31 |
32 | - 更多设置, 请查看教程
33 |
34 | - More settings, check out the tutorial
35 |
36 | ---
37 |
38 | ## 跨平台 | Cross-Plateform
39 |
40 | **实机测试:**
41 |
42 | **Running in real environment**
43 |
44 | - **Windows7 + python3.7**
45 |
46 | - **Ubuntu20.04 + python3.8**
47 |
48 | - **Macos11.4 + python3.9**
49 |
50 | ---
51 |
52 | ## 关于 | About
53 |
54 | **提取财经新闻标题、链接进行整合排列写入表格,省去繁琐步骤, 提高效率。过去每日会大约花费一小时时间用来获取新闻资讯,效率极低,目前10分钟即可全部阅完。**
55 |
56 | **Extracting the financial news headlines and links for integration and arrangement and writing them into the table eliminates the complicated steps of searching the website, opening the web page, viewing the title, opening the web page, closing the web page and continuing to view the next one. In the past, it took one hour to get up early to watch the news, but now it can be done in 10 minutes.**
57 |
58 |
59 | ---
60 |
61 | ## 开源协议 | License
62 |
63 | ``GPLV3 License``
64 |
65 | ---
66 |
67 | ## Stargazers over time
68 |
69 | [](https://starchart.cc/VcSpace/News-M)
70 |
71 |
72 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import threading
2 | import time
3 | import logging
4 | import random
5 | import os
6 |
7 | from src.Platform import pt
8 | from src.Wy_Finance import Wy
9 | from src.Ths_Finance import Ths
10 | from src.Jrj_Finance import Jrj
11 | from src.Fh_Finance import Fh
12 | from src.East_Finance import Ew
13 | from src.Self_Stock import Stock
14 | from src.CCTV_News import CCTV
15 | from src.Sina_Finance import Sina
16 | from src.Xhs_Finance import Xhs
17 | from src.Sg_Finance import Sg
18 | from src.Tzj_Finance import Tzj
19 | import src.Baidu_upload
20 |
21 | def get_News(platform, filename, debug):
22 | #debug True开启
23 | if debug:
24 | Wy.create_file(filename)
25 | CCTV.main()
26 | return
27 | Wy.main(filename)
28 | # t1 = threading.Thread(target=CCTV.main, args=()) #新闻联播文字版获取默认关闭,经常失效,失效后不再更新
29 | # t2 = threading.Thread(target=Stock.main, args=(filename,)) #个股处理版本默认关闭,使用先在Code.txt添加代码+名称,失效不再更新
30 | # t1.start()
31 | # t2.start()
32 | Xhs.main(filename)
33 | Ths.main(filename)
34 | Jrj.main(filename)
35 | # Fh.main(filename)#接口失效
36 | Ew.main(filename)
37 | Sina.main(filename)
38 | Tzj.main(filename)
39 | Sg.main(filename)
40 | # t1.join()
41 | # t2.join()
42 |
43 | def get_filename(platform):
44 | if platform == True:
45 | win_file = pt.win_filename()
46 | return win_file
47 | else:
48 | linux_file = pt.linux_filename()
49 | return linux_file
50 |
51 | if __name__ == '__main__':
52 | Debug = False
53 | m_platform = pt.get_platform() #判断系统
54 | filename = get_filename(m_platform)
55 | get_News(m_platform, filename, Debug) #获取信息
56 |
57 | pt.file_move(m_platform) #文件移动 重命名操作
58 |
59 | """
60 | 授权步骤
61 | 命令行bypy info || python -m bypy info || python3 -m bypy info
62 | 打开链接-登陆-授权
63 | 复制授权码
64 | 粘贴到命令行-enter
65 | """
66 | bd_flag = False #改为True开启
67 | if bd_flag == False or Debug == True:
68 | print("如果需要上传到云盘备份 请自行开启bd.main \n")
69 | else:
70 | Bd = src.Baidu_upload.Baidu()
71 | Bd.main()
72 |
73 | print("操作完成")
74 |
75 | if m_platform == True:
76 | pt.pause()
77 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.25.1
2 | openpyxl==3.0.6
3 | beautifulsoup4==4.11.2
4 | lxml==4.9.2
5 | bypy==1.7.12
6 | wget==3.2
--------------------------------------------------------------------------------
/src/Baidu_upload.py:
--------------------------------------------------------------------------------
1 | import os
2 | from src.Platform import pt
3 | from bypy import ByPy
4 |
5 | class Baidu(object):
6 | def __init__(self):
7 | bp = ByPy()
8 | #print(bp.info()) # or whatever instance methods of ByPy class
9 | # 使用上传接口之前,请申请接入,申请地址为:https://pan.baidu.com/union/apply/
10 |
11 | # def get_token(self):
12 | # #用不到
13 | # url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}".format(self.AppKey, self.SecretKey)
14 | # # 获取token
15 | # res = requests.get(url).text
16 | # data = json.loads(res) # 将json格式转换为字典格式
17 | # self.access_token = data['access_token']
18 | # self.filename = pt.filename()
19 |
20 | def upload(self):
21 | path = pt.getpath()
22 | print("正在同步备份文件,如果文件过多 请耐心等待")
23 | #re: https://www.jianshu.com/p/19ddb60e2b22
24 | cmd = 'bypy syncup ' + path + " /"
25 | try:
26 | print("上传完成: ", os.system(cmd))
27 | except:
28 | print("上传出错")
29 |
30 | def main(self):
31 | self.upload() #bypy 把当前目录同步到云盘
--------------------------------------------------------------------------------
/src/CCTV_News.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import os
4 | import shutil
5 | from src.Platform import pt
6 | import time
7 |
8 | """
9 | http://mrxwlb.com/category/mrxwlb-text/amp/
10 | cn.govopendata.com
11 | http://www.11417.cn/cctv.html
12 | """
13 |
14 | headers = {
15 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
16 | }
17 |
18 | class CCTV_News(object):
19 | def __init__(self):
20 | pass
21 |
22 | def request(self):
23 | newslist = ''
24 | url = 'http://mrxwlb.com/category/mrxwlb-text/amp/'
25 | data = ''
26 | for ll in range(3):
27 | try:
28 | data = requests.get(url, headers=headers, timeout=120)
29 | if data.status_code == 200:
30 | break
31 | except Exception as e:
32 | pass
33 |
34 | self.soup = BeautifulSoup(data.text, "lxml")
35 | m_new = self.soup.find(class_='loop-title')
36 | m_url = m_new.find('a')
37 | self.mr_url = m_url['href']
38 | self.mr_title = m_url.get_text()
39 |
40 |
41 | def getNews(self):
42 | news = ''
43 | # self.mr_url = 'http://mrxwlb.com/2022年2月15日新闻联播文字版/amp/'
44 | # self.mr_title = '2022年2月15日新闻联播文字版'
45 | for _ in range(10):
46 | try:
47 | news = requests.get(self.mr_url, headers=headers)
48 | break
49 | except:
50 | continue
51 | soup = BeautifulSoup(news.text, "lxml")
52 | content = soup.find_all(class_='cntn-wrp artl-cnt')
53 | # 补全
54 | self.filename = self.mr_title + ".md"
55 | with open(self.filename, "w+", encoding='utf-8') as f:
56 | for news in content:
57 | m_con = news.find_all('li')
58 | m_con2 = news.find_all('p')
59 | for m_cont in m_con:
60 | m_content = m_cont.get_text()
61 | f.write("- " + m_content + "\n")
62 | f.write("---" + "\n")
63 | for m_cont in m_con2:
64 | m_content = m_cont.get_text()
65 | f.write("- " + m_content + "\n")
66 |
67 | if pt.get_platform() == True:
68 | self.win_cctv_file(self.filename)
69 | else:
70 | self.lin_cctv_file(self.filename)
71 |
72 |
73 | def getfilename(self):
74 | return self.filename
75 |
76 | def lin_cctv_file(self, filename):
77 | path = "./Finance/CCTV_News/"
78 | isExists = os.path.exists(path)
79 | if not isExists:
80 | os.mkdir(path)
81 | shutil.move(filename, path + filename)
82 |
83 | def win_cctv_file(self, cctv_file):
84 | desktop_path = os.path.join(os.path.expanduser('~'),"Desktop") #获取桌面路径
85 | path = desktop_path +"\\Finance\\CCTV_News\\"
86 | isExists = os.path.exists(path)
87 | if not isExists:
88 | os.mkdir(path)
89 |
90 | shutil.move(cctv_file, path + cctv_file)
91 |
92 | def request_114(self):
93 | newslist = ''
94 | url = 'http://www.11417.cn/cctv.html'
95 | for _ in range(10):
96 | try:
97 | newslist = requests.get(url, headers=headers, timeout=30)
98 | break
99 | except:
100 | continue
101 | self.soup = BeautifulSoup(newslist.text, "lxml")
102 | m_new = self.soup.select('#hcsticky > div.content > div.block > div:nth-child(1) > h2 > a')
103 | self.m114_url = m_new[0]['href']
104 | self.m114_title = m_new[0].get_text()
105 |
106 |
107 | def getNews_114(self):
108 | news = ''
109 | # self.m_url = 'http://www.11417.cn/7309.html'
110 | # self.m_title = '2022年01月06日新闻联播文字版完整版'
111 | for _ in range(10):
112 | try:
113 | news = requests.get(self.m114_url, headers=headers, timeout=30)
114 | break
115 | except:
116 | continue
117 |
118 | soup = BeautifulSoup(news.text, "lxml")
119 | content = soup.find_all(class_='single')
120 |
121 | self.filename = self.m114_title + ".md"
122 | with open(self.filename, "w+", encoding='utf-8') as f:
123 | for news in content:
124 | #m_con = news.find_all('li')
125 | m_con2 = news.find_all('p')
126 | for m_cont in m_con2:
127 | m_content = m_cont.get_text()
128 | f.write("- " + m_content + "\n")
129 |
130 | if pt.get_platform() == True:
131 | self.win_cctv_file(self.filename)
132 | else:
133 | self.lin_cctv_file(self.filename)
134 |
135 |
136 | def main(self):
137 | #获取今天与昨天的新闻联播 已获取会自动覆盖
138 | CCTV.request()
139 | CCTV.getNews()
140 | # CCTV.request_114()
141 | # CCTV.getNews_114()
142 |
143 |
144 | CCTV = CCTV_News()
145 |
--------------------------------------------------------------------------------
/src/East_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 | from openpyxl.styles import Font
5 | import json
6 | import time
7 | import threading
8 |
9 | headers = {
10 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
11 | }
12 |
13 | class EastWealth(object):
14 | def __init__(self):
15 | pass
16 |
17 | def request(self):
18 | self.url = 'https://www.eastmoney.com/'
19 |
20 | for ll in range(3):
21 | try:
22 | self.data = requests.get(self.url, headers=headers, timeout=120)
23 | if self.data.status_code == 200:
24 | break
25 | except Exception as e:
26 | pass
27 | self.data.encoding = "utf-8"
28 | self.soup = BeautifulSoup(self.data.text, "lxml")
29 |
30 | self.stock_url = 'http://stock.eastmoney.com/'
31 |
32 | for ll in range(3):
33 | try:
34 | self.stock_data = requests.get(self.stock_url, headers=headers, timeout=120)
35 | if self.stock_data.status_code == 200:
36 | break
37 | except Exception as e:
38 | pass
39 | self.stock_data.encoding = "utf-8"
40 | self.stock_soup = BeautifulSoup(self.stock_data.text, "lxml")
41 |
42 | self.finance_url = 'http://finance.eastmoney.com/'
43 |
44 | for ll in range(3):
45 | try:
46 | self.finance_data = requests.get(self.finance_url, headers=headers, timeout=120)
47 | if self.finance_data.status_code == 200:
48 | break
49 | except Exception as e:
50 | pass
51 | self.finance_data.encoding = "utf-8"
52 | self.finance_soup = BeautifulSoup(self.finance_data.text, "lxml")
53 |
54 |
55 | def getTopNew(self):
56 | wb = load_workbook(self.xlsxname)
57 | sheet = wb.create_sheet("Ew")
58 | t_row = 1
59 | t_col = 1
60 | sheet.cell(row=t_row, column=t_col, value="东方财富")
61 | t_row = t_row + 1
62 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
63 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
64 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
65 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
66 | t_row = t_row + 1
67 |
68 | datalist = self.soup.find_all(class_='nlist')
69 | for Newslist in datalist:
70 | News = Newslist.find_all('a')
71 | for m_new in News:
72 | m_title = m_new.get_text()
73 | if len(m_title) <= 3:
74 | continue
75 | m_href = m_new['href']
76 | sheet.cell(row=t_row, column=t_col, value=m_title)
77 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
78 | t_row = t_row + 1
79 | try:
80 | wb.save(self.xlsxname)
81 | except:
82 | print("EastWealth Save Error = getTopNew")
83 |
84 |
85 | def getStockNew(self):
86 | wb = load_workbook(self.xlsxname)
87 | sheet = wb.get_sheet_by_name("Ew")
88 | t_row = sheet.max_row + 2
89 | t_col = 1
90 | sheet.cell(row=t_row, column=t_col, value="股市聚焦")
91 | t_row = t_row + 1
92 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
93 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
94 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
95 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
96 | t_row = t_row + 1
97 |
98 | datalist = self.stock_soup.find_all(class_='card_body pt0 card_gsjj')
99 | for Newslist in datalist:
100 | News = Newslist.find_all('a')
101 | for m_new in News:
102 | m_title = m_new.get_text()
103 | if len(m_title) <= 3:
104 | continue
105 | m_href = m_new['href']
106 | sheet.cell(row=t_row, column=t_col, value=m_title)
107 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
108 | t_row = t_row + 1
109 | try:
110 | wb.save(self.xlsxname)
111 | except:
112 | print("EastWealth Save Error = getStockNew")
113 |
114 |
115 | def getIndexanalysis(self): #大盘分析
116 | wb = load_workbook(self.xlsxname)
117 | sheet = wb.get_sheet_by_name("Ew")
118 | t_row = sheet.max_row + 2
119 | t_col = 1
120 | sheet.cell(row=t_row, column=t_col, value="主力市场")
121 | t_row = t_row + 1
122 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
123 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
124 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
125 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
126 | t_row = t_row + 1
127 |
128 | datalist = self.stock_soup.find_all(class_='list list-md list-truncate')
129 | datalist2 = self.stock_soup.find_all(class_='card_body pt0 common_list')
130 | for Newslist in datalist:
131 | News = Newslist.find_all('a')
132 | for m_new in News:
133 | m_title = m_new.get_text()
134 | if len(m_title) <= 3:
135 | continue
136 | m_href = m_new['href']
137 | sheet.cell(row=t_row, column=t_col, value=m_title)
138 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
139 | t_row = t_row + 1
140 |
141 | for Newslist2 in datalist2:
142 | News = Newslist2.find_all('a')
143 | for m_new in News:
144 | m_title = m_new.get_text()
145 | if len(m_title) <= 3:
146 | continue
147 | m_href = m_new['href']
148 | sheet.cell(row=t_row, column=t_col, value=m_title)
149 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
150 | t_row = t_row + 1
151 |
152 | try:
153 | wb.save(self.xlsxname)
154 | except:
155 | print("EastWealth Save Error = Indexanalysis")
156 |
157 |
158 | def getMainNews(self):
159 | wb = load_workbook(self.xlsxname)
160 | sheet = wb.get_sheet_by_name("Ew")
161 | t_row = sheet.max_row + 2
162 | t_col = 1
163 | sheet.cell(row=t_row, column=t_col, value="财经要闻")
164 | t_row = t_row + 1
165 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
166 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
167 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
168 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
169 | t_row = t_row + 1
170 |
171 | datalist = self.finance_soup.find_all(class_='yaowen panel')
172 | for Newslist in datalist:
173 | News = Newslist.find_all('a')
174 | for m_new in News:
175 | m_title = m_new.get_text()
176 | m_href = m_new['href']
177 | sheet.cell(row=t_row, column=t_col, value=m_title)
178 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
179 | t_row = t_row + 1
180 | try:
181 | wb.save(self.xlsxname)
182 | except:
183 | print("EastWealth Save Error = MainNews")
184 |
185 |
186 | def getFinanceNews(self):
187 | wb = load_workbook(self.xlsxname)
188 | sheet = wb.get_sheet_by_name("Ew")
189 | t_row = sheet.max_row + 2
190 | t_col = 1
191 | sheet.cell(row=t_row, column=t_col, value="财经导读")
192 | t_row = t_row + 1
193 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
194 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
195 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
196 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
197 | t_row = t_row + 1
198 |
199 | datalist = self.finance_soup.find_all(class_='daodu panel')
200 | for Newslist in datalist:
201 | News = Newslist.find_all('a')
202 | for m_new in News:
203 | m_title = m_new.get_text()
204 | if len(m_title) <= 4:
205 | continue
206 | m_href = m_new['href']
207 | sheet.cell(row=t_row, column=t_col, value=m_title)
208 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
209 | t_row = t_row + 1
210 | try:
211 | wb.save(self.xlsxname)
212 | except:
213 | print("EastWealth Save Error = FinanceNews")
214 |
215 |
216 | def main(self, file_name):
217 | self.xlsxname = file_name
218 | Ew.request()
219 | Ew.getTopNew()
220 | Ew.getMainNews()
221 | Ew.getFinanceNews()
222 | Ew.getStockNew()
223 | Ew.getIndexanalysis() #大盘分析
224 |
225 | Ew = EastWealth()
226 |
--------------------------------------------------------------------------------
/src/Fh_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook, Workbook
4 | from openpyxl.styles import Font
5 | import re
6 | import json
7 | import time
8 |
9 | """
10 | https://tech.ifeng.com/24h/
11 | http://tech.ifeng.com/
12 | http://finance.ifeng.com/
13 | """
14 |
15 | headers = {
16 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
17 | }
18 |
19 | class FengHuang(object):
20 | def __init__(self):
21 | pass
22 |
23 | def request(self):
24 | #new
25 | self.url = 'http://finance.ifeng.com/'
26 |
27 | for ll in range(3):
28 | try:
29 | self.data = requests.get(self.url, headers=headers, timeout=120)
30 | if self.data.status_code == 200:
31 | break
32 | except Exception as e:
33 | pass
34 |
35 | self.soup = BeautifulSoup(self.data.text, "lxml")
36 |
37 | self.stock_url = 'http://finance.ifeng.com/stock/'
38 | self.stock_data = requests.get(self.stock_url, headers=headers)
39 | self.stock_soup = BeautifulSoup(self.stock_data.text, "lxml")
40 |
41 |
42 | def getTopNew(self):
43 | wb = load_workbook(self.xlsxname)
44 | sheet = wb.create_sheet("Fh")
45 | t_row = 1
46 | t_col = 1
47 | sheet.cell(row=t_row, column=t_col, value="凤凰财经")
48 | t_row = t_row + 1
49 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
50 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
51 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
52 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
53 | t_row = t_row + 1
54 |
55 | #datalist = self.soup.find_all(class_='hot-1NJ2DKa4 clearfix')
56 | datalist = self.soup.select('#root > div > div.col-3u4gcc0Q.clearfix > div.col_L-3c5atSII > div.box-1bAs3EGr')
57 |
58 | for Newslist in datalist:
59 | News = Newslist.find_all('a')
60 | for m_new in News:
61 | m_title = m_new['title']
62 | m_href = m_new['href']
63 | sheet.cell(row=t_row, column=t_col, value=m_title)
64 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
65 | t_row = t_row + 1
66 | try:
67 | wb.save(self.xlsxname)
68 | except:
69 | print("FengHuang Save Error = getTopNew")
70 |
71 |
72 | def stockNewslist(self, json_str, t_row): #stockNewsList
73 | wb = load_workbook(self.xlsxname)
74 | sheet = wb.get_sheet_by_name("Fh")
75 | Newslist = json_str['stockNewsList']
76 | t_col = 1
77 | temp = 0
78 | for m_new in Newslist:
79 | temp = temp + 1
80 | if temp == 6:
81 | temp = 0
82 | t_row = t_row + 1
83 | else:
84 | m_title = m_new['title']
85 | m_time = m_new['newsTime']
86 | m_href = m_new['url']
87 | sheet.cell(row=t_row, column=t_col, value=m_title)
88 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
89 | sheet.cell(row=t_row, column=t_col + 3, value=m_time)
90 | t_row = t_row + 1
91 | try:
92 | wb.save(self.xlsxname)
93 | except:
94 | print("FengHuang Save Error = getNewsList")
95 |
96 |
97 | def CompanyNews(self, json_str): #stockNewsList 没有top 只有top下的5条
98 | wb = load_workbook(self.xlsxname)
99 | sheet = wb.get_sheet_by_name("Fh")
100 | t_row = sheet.max_row + 2
101 | t_col = 1
102 | sheet.cell(row=t_row, column=t_col, value="公司要闻")
103 | t_row = t_row + 1
104 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
105 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
106 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
107 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
108 | t_row = t_row + 1
109 |
110 | CNews = json_str['news']
111 | m_CNews = json_str['newsList']
112 | for m_new in CNews:
113 | m_href = m_new['url']
114 | m_title = m_new['title']
115 | m_href = m_href.replace("//", "https://")
116 | sheet.cell(row=t_row, column=t_col, value=m_title)
117 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
118 | t_row = t_row + 1
119 | break
120 |
121 | for m_new in m_CNews:
122 | m_time = m_new['newsTime']
123 | m_href = m_new['url']
124 | m_title = m_new['title']
125 | sheet.cell(row=t_row, column=t_col, value=m_title)
126 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
127 | sheet.cell(row=t_row, column=t_col + 2, value=m_time)
128 | t_row = t_row + 1
129 | try:
130 | wb.save(self.xlsxname)
131 | except:
132 | print("FengHuang Save Error = CompanyNews2")
133 |
134 | def marketAnalysis(self, json_str):
135 | wb = load_workbook(self.xlsxname)
136 | sheet = wb.get_sheet_by_name("Fh")
137 | t_row = sheet.max_row + 2
138 | t_col = 1
139 | sheet.cell(row=t_row, column=t_col, value="操盘分析")
140 | t_row = t_row + 1
141 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
142 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
143 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
144 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
145 | t_row = t_row + 1
146 |
147 | Newslist = json_str['marketAnalysis']
148 | for m_news in Newslist:
149 | m_href = m_news['url']
150 | m_title = m_news['title']
151 | m_href = m_href.replace("//", "https://")
152 | sheet.cell(row=t_row, column=t_col, value=m_title)
153 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
154 | t_row = t_row + 1
155 | try:
156 | wb.save(self.xlsxname)
157 | except:
158 | print("FengHuang Save Error = marketAnalysis")
159 |
160 |
161 | def IPOObservation(self, json_str): #stockNewsList 没有top 只有top下的5条
162 | wb = load_workbook(self.xlsxname)
163 | sheet = wb.get_sheet_by_name("Fh")
164 | t_row = sheet.max_row + 2
165 | t_col = 1
166 |
167 | sheet.cell(row=t_row, column=t_col, value="IPO观察")
168 | t_row = t_row + 1
169 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
170 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
171 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
172 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
173 | t_row = t_row + 1
174 |
175 | NewsList = json_str['hotPlate']
176 | for m_new in NewsList:
177 | m_href = m_new['url']
178 | m_title = m_new['title']
179 | m_href = m_href.replace("//", "https://")
180 | sheet.cell(row=t_row, column=t_col, value=m_title)
181 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
182 | t_row = t_row + 1
183 | try:
184 | wb.save(self.xlsxname)
185 | except:
186 | print("FengHuang Save Error = IPOObservation")
187 |
188 |
189 | def getStockNews(self):
190 | wb = load_workbook(self.xlsxname)
191 | sheet = wb.get_sheet_by_name("Fh")
192 | t_row = sheet.max_row + 2
193 | t_col = 1
194 | sheet.cell(row=t_row, column=t_col, value="证券要闻")
195 | t_row = t_row + 1
196 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
197 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
198 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
199 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
200 | t_row = t_row + 1
201 | soup = str(self.stock_soup)
202 |
203 | #sear = re.search(r'var allData = (.*?) .*\"\}\}\;', soup, re.M | re.I)
204 | sear = re.search('var allData = (.*?)var adData', soup, re.M | re.I | re.S)
205 | data = sear.group(1)
206 | data = data.replace(";", "")
207 | json_str = json.loads(data)
208 | #print(data)
209 | m_row = t_row + 1
210 | stockNews = json_str['stockNews'] #top新闻
211 | for m_new in stockNews:
212 | m_title = m_new['title']
213 | href = m_new['url']
214 | m_href = href.replace("//", "https://")
215 | sheet.cell(row=t_row, column=t_col, value=m_title)
216 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
217 | t_row = t_row + 1
218 |
219 | try:
220 | wb.save(self.xlsxname)
221 | except:
222 | print("FengHuang Save Error = getStockNews")
223 |
224 | self.stockNewslist(json_str, m_row) #stockNewsList 没有top 只有top下的5条
225 | self.CompanyNews(json_str) #stockNewsList 没有top 只有top下的5条
226 | self.marketAnalysis(json_str)
227 | self.IPOObservation(json_str) #stockNewsList 没有top 只有top下的5条
228 |
229 | def main(self, filename):
230 | self.xlsxname = filename
231 | #Fh.Style()
232 | Fh.request()
233 | Fh.getTopNew()
234 | Fh.getStockNews()
235 |
236 | Fh = FengHuang()
237 |
--------------------------------------------------------------------------------
/src/Jrj_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 | from openpyxl.styles import Font
5 | import json
6 | import time
7 | import datetime
8 |
9 | headers = {
10 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
11 | }
12 |
13 | class JinRongJie(object):
14 | def __init__(self):
15 | pass
16 |
17 | def Style(self):
18 | self.m_font = Font(
19 | size=12,
20 | bold=True,
21 | )
22 |
23 | self.head_font = Font(
24 | size=14,
25 | bold=True,
26 | )
27 |
28 | def get_TopNews(self):
29 | url = 'http://finance.jrj.com.cn/'
30 | for ll in range(3):
31 | try:
32 | self.data = requests.get(url, headers=headers, timeout=120)
33 | if self.data.status_code == 200:
34 | break
35 | except Exception as e:
36 | pass
37 |
38 | soup = BeautifulSoup(self.data.text, "lxml")
39 |
40 | wb = load_workbook(self.xlsxname)
41 | sheet = wb.create_sheet('Jrj')
42 | t_row = 1
43 | t_col = 1
44 |
45 | sheet.cell(row=t_row, column=t_col + 0, value="金融界财经")
46 | t_row = t_row + 1
47 | sheet.cell(row=t_row, column=t_col + 0, value="新闻标题")
48 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
49 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
50 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
51 | t_row = t_row + 1
52 |
53 | datalist = soup.find_all(class_="l1_top")
54 | for News in datalist:
55 | New = News.select('dl dt p')
56 | for m_new in New:
57 | data = m_new.find('a')
58 | m_url = data['href']
59 | m_title = data.get_text()
60 | sheet.cell(row=t_row, column=t_col, value=m_title)
61 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
62 | t_row = t_row + 1
63 | try:
64 | wb.save(self.xlsxname)
65 | except Exception:
66 | print("JRJ Save Error = 1")
67 |
68 | def get_FinanceNews(self): #差不多4-5个小时内的热点新闻
69 | fin_time1 = time.strftime("%Y%m", time.localtime()) # year-month-day-hour-minute
70 | fin_time2 = time.strftime("%Y%m%d", time.localtime()) # year-month-day-hour-minute
71 | fin_url = 'http://finance.jrj.com.cn/xwk/{}/{}_1.shtml'.format(fin_time1, fin_time2)
72 |
73 | wb = load_workbook(self.xlsxname)
74 | sheet = wb.get_sheet_by_name('Jrj')
75 | t_row = sheet.max_row + 1
76 | t_col = 1
77 |
78 | sheet.cell(row=t_row + 1, column=t_col, value="财经频道新闻")
79 | sheet.cell(row=t_row + 2, column=t_col, value="新闻标题")
80 | sheet.cell(row=t_row + 2, column=t_col + 1, value="新闻链接")
81 | sheet.cell(row=t_row + 2, column=t_col + 2, value="新闻简介")
82 | sheet.cell(row=t_row + 2, column=t_col + 3, value="新闻时间")
83 | t_row = t_row + 3
84 | time_row = t_row
85 |
86 | data = None
87 | for ll in range(3):
88 | try:
89 | data = requests.get(fin_url, headers=headers, timeout=120)
90 | if data.status_code == 200:
91 | break
92 | except Exception as e:
93 | pass
94 |
95 | soup = BeautifulSoup(data.text, "lxml")
96 | datalist = soup.find_all(class_="list")
97 | flag = False
98 | for Newslist in datalist:
99 | #News = Newslist.select('li')
100 | News = Newslist.find_all('a')
101 | m_NewsTime = Newslist.find_all('span')
102 | for m_new in News:
103 | if flag == True:
104 | flag = False
105 | m_url = m_new['href']
106 | m_title = m_new.get_text()
107 | sheet.cell(row=t_row, column=t_col, value=m_title)
108 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
109 | t_row = t_row + 1
110 | else:
111 | flag = True
112 | for new_time in m_NewsTime:
113 | m_time = new_time.get_text()
114 | sheet.cell(row=time_row, column=t_col + 3, value=m_time)
115 | time_row = time_row + 1
116 | try:
117 | wb.save(self.xlsxname)
118 | except Exception:
119 | print("JRJ Save Error = 2")
120 |
121 | def get_todayHot(self):
122 | url = 'http://biz.jrj.com.cn/biz_index.shtml'
123 | for ll in range(3):
124 | try:
125 | data = requests.get(url, headers=headers, timeout=120)
126 | if data.status_code == 200:
127 | break
128 | except Exception as e:
129 | pass
130 |
131 | soup = BeautifulSoup(data.text, "lxml")
132 | datalist = soup.find_all(class_="jrj-top10")
133 |
134 | wb = load_workbook(self.xlsxname)
135 | sheet = wb.get_sheet_by_name('Jrj')
136 | t_row = sheet.max_row + 2
137 | t_col = 1
138 |
139 | sheet.cell(row=t_row, column=t_col, value="24小时间热闻点击排行榜")
140 | t_row = t_row + 1
141 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
142 | sheet.cell(row=t_row, column=t_col + 0, value="新闻标题")
143 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
144 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
145 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
146 | t_row = t_row + 1
147 |
148 | flag = True
149 | for Newslist in datalist:
150 | if flag == True:
151 | News = Newslist.find_all('a')
152 | for m_new in News:
153 | m_title = m_new['title']
154 | m_url = m_new['href']
155 | sheet.cell(row=t_row, column=t_col, value=m_title)
156 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
157 | t_row = t_row + 1
158 | flag = False
159 | try:
160 | wb.save(self.xlsxname)
161 | except Exception:
162 | print("JRJ Save Error = 5")
163 |
164 |
165 | def get_Business(self):
166 | bu_time1 = time.strftime("%Y%m", time.localtime()) # year-month-day-hour-minute
167 | bu_time2 = time.strftime("%Y%m%d", time.localtime()) # year-month-day-hour-minute
168 | bus_url = 'http://biz.jrj.com.cn/xwk/{}/{}_1.shtml'.format(bu_time1, bu_time2)
169 |
170 | wb = load_workbook(self.xlsxname)
171 | sheet = wb.get_sheet_by_name('Jrj')
172 | t_row = sheet.max_row + 1
173 | t_col = 1
174 |
175 | sheet.cell(row=t_row + 1, column=t_col, value="商业频道新闻")
176 | sheet.cell(row=t_row + 2, column=t_col, value="新闻标题")
177 | sheet.cell(row=t_row + 2, column=t_col + 1, value="新闻链接")
178 | sheet.cell(row=t_row + 2, column=t_col + 2, value="新闻简介")
179 | sheet.cell(row=t_row + 2, column=t_col + 3, value="新闻时间")
180 | t_row = t_row + 3
181 | time_row = t_row
182 |
183 | data = None
184 | for ll in range(3):
185 | try:
186 | data = requests.get(bus_url, headers=headers, timeout=120)
187 | if data.status_code == 200:
188 | break
189 | except Exception as e:
190 | pass
191 |
192 | soup = BeautifulSoup(data.text, "lxml")
193 | datalist = soup.find_all(class_="list")
194 | flag = False
195 | for Newslist in datalist:
196 | # News = Newslist.select('li')
197 | News = Newslist.find_all('a')
198 | m_NewsTime = Newslist.find_all('span')
199 | for m_new in News:
200 | if flag == True:
201 | flag = False
202 | m_url = m_new['href']
203 | m_title = m_new.get_text()
204 | sheet.cell(row=t_row, column=t_col, value=m_title)
205 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
206 | t_row = t_row + 1
207 | else:
208 | flag = True
209 | for new_time in m_NewsTime:
210 | m_time = new_time.get_text()
211 | sheet.cell(row=time_row, column=t_col + 3, value=m_time)
212 | time_row = time_row + 1
213 | try:
214 | wb.save(self.xlsxname)
215 | except Exception:
216 | print("JRJ Save Error = 3")
217 |
218 | """
219 | def get_Science(self):
220 | url = 'http://finance.jrj.com.cn/tech/tech_index.shtml'
221 | sci_time = time.strftime("%Y-%m-%d", time.localtime()) # year-month-day-hour-minute
222 | """
223 |
224 | def get_yesScience(self):
225 | sci_time1 = time.strftime("%Y%m", time.localtime()) # year-month-day-hour-minute
226 | sci_time2 = time.strftime("%Y%m%d", time.localtime()) # year-month-day-hour-minute
227 | """
228 | yesterday = time.strftime("%d", time.localtime()) # year-month-day-hour-minute
229 | yesterday = 1
230 | if yesterday == 1:
231 | # 获取当前日期
232 | now_time = datetime.datetime.now()
233 | # 获取本月的第一天
234 | end_day_in_mouth = now_time.replace(day=1)
235 | # 获取上月的最后一天
236 | next_mouth = end_day_in_mouth - datetime.timedelta(days=1)
237 | print(next_mouth.month)
238 | """
239 | sci_url = 'http://biz.jrj.com.cn/xwk/{}/{}_1.shtml'.format(sci_time1, sci_time2)
240 |
241 | wb = load_workbook(self.xlsxname)
242 | sheet = wb.get_sheet_by_name('Jrj')
243 | t_row = sheet.max_row + 1
244 | t_col = 1
245 |
246 | sheet.cell(row=t_row + 1, column=t_col, value="科技频道新闻")
247 | sheet.cell(row=t_row + 2, column=t_col, value="新闻标题")
248 | sheet.cell(row=t_row + 2, column=t_col + 1, value="新闻链接")
249 | sheet.cell(row=t_row + 2, column=t_col + 2, value="新闻简介")
250 | sheet.cell(row=t_row + 2, column=t_col + 3, value="新闻时间")
251 | t_row = t_row + 3
252 | time_row = t_row
253 | data = ''
254 | for ll in range(3):
255 | try:
256 | data = requests.get(sci_url, headers=headers, timeout=120)
257 | if data.status_code == 200:
258 | break
259 | except Exception as e:
260 | pass
261 | soup = BeautifulSoup(data.text, "lxml")
262 | datalist = soup.find_all(class_="list")
263 | flag = False
264 | for Newslist in datalist:
265 | # News = Newslist.select('li')
266 | News = Newslist.find_all('a')
267 | m_NewsTime = Newslist.find_all('span')
268 | for m_new in News:
269 | if flag == True:
270 | flag = False
271 | m_url = m_new['href']
272 | m_title = m_new.get_text()
273 | sheet.cell(row=t_row, column=t_col, value=m_title)
274 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
275 | t_row = t_row + 1
276 | else:
277 | flag = True
278 | for new_time in m_NewsTime:
279 | m_time = new_time.get_text()
280 | sheet.cell(row=time_row, column=t_col + 3, value=m_time)
281 | time_row = time_row + 1
282 | try:
283 | wb.save(self.xlsxname)
284 | except Exception:
285 | print("JRJ Save Error = 4")
286 |
287 | def main(self, filename):
288 | self.xlsxname = filename
289 | Jrj.get_TopNews()
290 | Jrj.get_todayHot()
291 | Jrj.get_FinanceNews()
292 | Jrj.get_Business()
293 | # Jrj.get_Science()
294 | Jrj.get_yesScience()
295 |
296 | Jrj = JinRongJie()
297 |
--------------------------------------------------------------------------------
/src/Platform.py:
--------------------------------------------------------------------------------
1 | import platform
2 | import os
3 | import shutil
4 | import time
5 |
6 | class Files(object):
7 | def __init__(self):
8 | pass
9 |
10 | def get_platform(self):
11 | sys = platform.system()
12 | if sys == "Windows":
13 | return True
14 | elif sys == "Linux":
15 | return False
16 |
17 | def file_move(self, platform):
18 | if platform:
19 | self.win_News()
20 | else:
21 | self.lin_News()
22 |
23 | def lin_News(self):
24 | path = "./Finance/News/"
25 | filename = "./News_Finance.xlsx"
26 | isExists = os.path.exists(path)
27 | if not isExists:
28 | os.mkdir(path)
29 | #filetime = time.strftime("%Y_%m_%d_%H_%M", time.localtime()) # year-month-day-hour-minute
30 | filetime = time.strftime("%Y_%m_%d_%H", time.localtime()) # year-month-day-hour-minute
31 | path2 = path + "闻讯__" + filetime + "时.xlsx"
32 | shutil.move(filename, path2)
33 |
34 |
35 | def win_News(self):
36 | desktop_path = os.path.join(os.path.expanduser('~'),"Desktop") #获取桌面路径
37 | filename = desktop_path + "\\Finance\\News_Finance.xlsx"
38 | path = desktop_path +"\\Finance\\News\\"
39 |
40 | isExists = os.path.exists(path)
41 | if not isExists:
42 | os.mkdir(path)
43 | #filetime = time.strftime("%Y_%m_%d_%H_%M", time.localtime()) #year-month-day-hour-minute
44 | filetime = time.strftime("%Y_%m_%d_%H", time.localtime()) #year-month-day-hour-minute
45 | path2 = path + "闻讯__" + filetime + "时.xlsx"
46 | shutil.move(filename, path2)
47 |
48 | def linux_filename(self):
49 | path = "./Finance/"
50 | isExists = os.path.exists(path)
51 | if not isExists:
52 | os.mkdir(path)
53 | # self.lin_History()
54 | linux_file = "./News_Finance.xlsx"
55 | return linux_file
56 |
57 | def win_filename(self):
58 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
59 | path = desktop_path + "\\Finance\\"
60 | isExists = os.path.exists(path)
61 | if not isExists:
62 | os.mkdir(path)
63 |
64 | win_file = desktop_path + "\\Finance\\News_Finance.xlsx"
65 | return win_file
66 |
67 | def pause(self):
68 | os.system('pause')
69 |
70 | """
71 | def win_mkdir(self):
72 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
73 | dir = os.path.exists(desktop_path + "\\Finance")
74 | if not dir:
75 | os.mkdir(dir)
76 | """
77 | def getwinfile(self):
78 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
79 | filename = desktop_path + "\\Finance\\News_Finance.xlsx"
80 | path = desktop_path + "\\Finance\\News\\"
81 | # filetime = time.strftime("%Y_%m_%d_%H_%M", time.localtime()) #year-month-day-hour-minute
82 | filetime = time.strftime("%Y_%m_%d", time.localtime()) # year-month-day-hour-minute
83 | path2 = path + "闻讯__" + filetime + ".xlsx"
84 | return path2
85 |
86 | def getlinfile(self):
87 | path = "./Finance/News/"
88 | filename = "./News_Finance.xlsx"
89 | isExists = os.path.exists(path)
90 | # filetime = time.strftime("%Y_%m_%d_%H_%M", time.localtime()) # year-month-day-hour-minute
91 | filetime = time.strftime("%Y_%m_%d", time.localtime()) # year-month-day-hour-minute
92 | path2 = path + "闻讯__" + filetime + ".xlsx"
93 | return path2
94 |
95 |
96 | def getfilename(self):
97 | if self.get_platform() == True:
98 | return self.getwinfile()
99 | else:
100 | return self.getlinfile()
101 |
102 | def filename(self):
103 | filetime = time.strftime("%Y_%m_%d", time.localtime()) # year-month-day-hour-minute
104 | path2 = "闻讯__" + filetime + ".xlsx"
105 | return path2
106 |
107 | def getwinpath(self):
108 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
109 | path = desktop_path + "\\Finance\\" + " \\"
110 | return path
111 |
112 | def getlinpath(self):
113 | path = "./Finance/" + " /"
114 | return path
115 |
116 | def getpath(self):
117 | if self.get_platform() == True:
118 | return self.getwinpath()
119 | else:
120 | return self.getlinpath()
121 |
122 |
123 | pt = Files()
--------------------------------------------------------------------------------
/src/Self_Stock.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook, Workbook
4 | import json
5 | import threading
6 | import time
7 | import re
8 | import os
9 | import wget
10 | from src.Platform import pt
11 | #处理个股版本,包含交易明细、买卖盘、券商机构等。
12 | #上层目录Code.txt 添加代码
13 | #失效不再更新,自行抓包获取。
14 |
15 |
16 | headers = {
17 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
18 | }
19 |
20 | class SelfStock(object):
21 | threadLock = threading.Lock()
22 | def __init__(self):
23 | self.stop = True
24 | #self.xlsxname = file_name
25 |
26 | def Deal_Xq_quote(self, data, name):
27 | self.threadLock.acquire()
28 | file_name = self.get_filename(name)
29 | try:
30 | wb = load_workbook(file_name)
31 | try:
32 | sheet = wb.get_sheet_by_name(name)
33 | ws = wb[name]
34 | wb.remove(ws)
35 | sheet = wb.create_sheet(name, 0)
36 | except:
37 | sheet = wb.create_sheet(name)
38 | except:
39 | wb = Workbook()
40 | ws = wb['Sheet']
41 | wb.remove(ws)
42 | sheet = wb.create_sheet(name)
43 |
44 | t_row = 1
45 | t_col = 1
46 |
47 | data_json = data['data']
48 | data_items = data_json['items']
49 | data_mk = data_items[0]['market']
50 | data_quote = data_items[0]['quote']
51 | #print(data_quote)
52 |
53 | sheet.cell(row=t_row, column=t_col, value="股票代码")
54 | sheet.cell(row=t_row, column=t_col + 1, value="股票名称")
55 | sheet.cell(row=t_row, column=t_col + 2, value="交易状态")
56 | sheet.cell(row=t_row, column=t_col + 3, value="更新时间")
57 | t_row = t_row + 1
58 | m_status = data_mk['status']
59 | stock_code = data_quote['symbol']
60 | stock_name = data_quote['name']
61 | m_time = data_quote['timestamp']
62 | timeStamp = float(m_time / 1000) #13位时间戳
63 | timeArray = time.localtime(timeStamp)
64 | otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
65 |
66 | sheet.cell(row=t_row, column=t_col, value=stock_code)
67 | sheet.cell(row=t_row, column=t_col + 1, value=stock_name)
68 | sheet.cell(row=t_row, column=t_col + 2, value=m_status)
69 | sheet.cell(row=t_row, column=t_col + 3, value=otherStyleTime)
70 | t_row = t_row + 2
71 |
72 | sheet.cell(row=t_row, column=t_col + 0, value="当前价格")
73 | sheet.cell(row=t_row, column=t_col + 1, value="涨跌价格")
74 | sheet.cell(row=t_row, column=t_col + 2, value="涨跌幅度")
75 | sheet.cell(row=t_row, column=t_col + 3, value="开盘价格")
76 | sheet.cell(row=t_row, column=t_col + 4, value="当前新高")
77 | sheet.cell(row=t_row, column=t_col + 5, value="当前新低")
78 | sheet.cell(row=t_row, column=t_col + 6, value="昨日收盘")
79 | sheet.cell(row=t_row, column=t_col + 7, value="平均价格")
80 | sheet.cell(row=t_row, column=t_col + 8, value="涨停价格")
81 | sheet.cell(row=t_row, column=t_col + 9, value="跌停价格")
82 | sheet.cell(row=t_row, column=t_col + 10, value="52周最高")
83 | sheet.cell(row=t_row, column=t_col + 11, value="52周最低")
84 | t_row = t_row + 1
85 |
86 | m_current = data_quote['current']
87 | m_percent = data_quote['percent'] #涨跌幅度
88 | m_chg = data_quote['chg'] #涨跌价格
89 | m_open = data_quote['open'] #开盘价
90 | m_yesclose = data_quote['last_close'] #昨收
91 | m_high = data_quote['high']
92 | m_low = data_quote['low']
93 | m_avg_price = data_quote['avg_price']
94 | m_limit_up = data_quote['limit_up'] #涨停
95 | m_limit_down = data_quote['limit_down'] #跌停
96 | m_high52w = data_quote['high52w'] #52周最高
97 | m_low52w = data_quote['low52w'] #52周最低
98 | sheet.cell(row=t_row, column=t_col + 0, value=m_current)
99 | sheet.cell(row=t_row, column=t_col + 1, value=m_chg)
100 | sheet.cell(row=t_row, column=t_col + 2, value=str(m_percent) + "%")
101 | sheet.cell(row=t_row, column=t_col + 3, value=m_open)
102 | sheet.cell(row=t_row, column=t_col + 4, value=m_high)
103 | sheet.cell(row=t_row, column=t_col + 5, value=m_low)
104 | sheet.cell(row=t_row, column=t_col + 6, value=m_yesclose)
105 | sheet.cell(row=t_row, column=t_col + 7, value=m_avg_price)
106 | sheet.cell(row=t_row, column=t_col + 8, value=m_limit_up)
107 | sheet.cell(row=t_row, column=t_col + 9, value=m_limit_down)
108 | sheet.cell(row=t_row, column=t_col + 10, value=m_high52w)
109 | sheet.cell(row=t_row, column=t_col + 11, value=m_low52w)
110 | t_row = t_row + 2
111 |
112 |
113 | sheet.cell(row=t_row, column=t_col + 0, value="成交额(/万)")
114 | sheet.cell(row=t_row, column=t_col + 1, value="成交量(/万手)")
115 | sheet.cell(row=t_row, column=t_col + 2, value="换手率")
116 | sheet.cell(row=t_row, column=t_col + 3, value="量比")
117 | sheet.cell(row=t_row, column=t_col + 4, value="振幅")
118 | sheet.cell(row=t_row, column=t_col + 5, value="市盈率TTM")
119 | sheet.cell(row=t_row, column=t_col + 6, value="市盈率(动)")
120 | sheet.cell(row=t_row, column=t_col + 7, value="市盈率(静)")
121 | sheet.cell(row=t_row, column=t_col + 8, value="市净率")
122 | sheet.cell(row=t_row, column=t_col + 9, value="总股本(/万)")
123 | sheet.cell(row=t_row, column=t_col + 10, value="流通股(/万)")
124 | sheet.cell(row=t_row, column=t_col + 11, value="总市值(/亿)")
125 | sheet.cell(row=t_row, column=t_col + 12, value="流通市值(/亿)")
126 | t_row = t_row + 1
127 |
128 | m_amount = data_quote['amount'] #成交额
129 | if m_amount == None: #防止停牌、退市等情况股价不再更新
130 | self.stop = False
131 | self.threadLock.release()
132 | return
133 | m_turnover_rate = data_quote['turnover_rate'] #换手:0.74%
134 | m_volume = data_quote['volume'] #成交量
135 | m_amplitude = data_quote['amplitude'] #振幅
136 | m_total_shares = data_quote['total_shares'] #总股本
137 | m_float_shares = data_quote['float_shares'] #流通股
138 | m_volume_ratio = data_quote['volume_ratio'] #量比
139 | m_pe_ttm = data_quote['pe_ttm'] #市盈率
140 | m_pe_forecast = data_quote['pe_forecast'] #动态市盈率
141 | m_pe_lyr = data_quote['pe_lyr'] #静态市盈率
142 | m_pb = data_quote['pb'] #市净率
143 | m_profit = data_quote['profit'] #年报利润
144 | m_profit_four = data_quote['profit_four'] #也是利润 不清楚
145 | m_profit_forecast = data_quote['profit_forecast']
146 | m_market_capital = data_quote['market_capital'] #总市值
147 | m_float_market_capital = data_quote['float_market_capital'] #流通市值
148 | sheet.cell(row=t_row, column=t_col + 0, value=round(m_amount / 10000, 2))
149 | sheet.cell(row=t_row, column=t_col + 1, value=round(m_volume / 10000, 2))
150 | sheet.cell(row=t_row, column=t_col + 2, value=str(m_turnover_rate) + "%")
151 | sheet.cell(row=t_row, column=t_col + 3, value=m_volume_ratio)
152 | sheet.cell(row=t_row, column=t_col + 4, value=str(m_amplitude) + "%")
153 | sheet.cell(row=t_row, column=t_col + 5, value=m_pe_ttm)
154 | sheet.cell(row=t_row, column=t_col + 6, value=m_pe_forecast)
155 | sheet.cell(row=t_row, column=t_col + 7, value=m_pe_lyr)
156 | sheet.cell(row=t_row, column=t_col + 8, value=m_pb)
157 | sheet.cell(row=t_row, column=t_col + 9, value=round(m_total_shares / 10000, 2))
158 | sheet.cell(row=t_row, column=t_col + 10, value=round(m_float_shares / 10000, 2))
159 | sheet.cell(row=t_row, column=t_col + 11, value=round(m_market_capital / 100000000, 2))
160 | sheet.cell(row=t_row, column=t_col + 12, value=round(m_float_market_capital / 100000000, 2))
161 |
162 | self.threadLock.release()
163 |
164 | try:
165 | wb.save(file_name)
166 | except Exception:
167 | print("Self_Stock Save Error = Xq_quote")
168 |
169 | def Deal_Xq_distribution(self, data, name):
170 | self.threadLock.acquire()
171 | file_name = self.get_filename(name)
172 | wb = load_workbook(file_name)
173 | sheet = wb.get_sheet_by_name(name)
174 |
175 | t_row = sheet.max_row + 3
176 | t_col = 1
177 |
178 | m_text = data['data']['analysis'][0] #今日主力净流入XX亿
179 | sheet.cell(row=t_row, column=t_col, value=m_text)
180 | t_row = t_row + 1
181 |
182 | sheet.cell(row=t_row, column=t_col, value="资金成交分布(/万)")
183 | sheet.cell(row=t_row, column=6, value="净流入(/万)")
184 | t_row = t_row + 1
185 |
186 | m_data = data['data']['distribution']
187 | m_sell = m_data['sell'] #
188 | m_buy = m_data['buy']
189 |
190 | sheet.cell(row=t_row, column=t_col, value="特大单卖出")
191 | sheet.cell(row=t_row + 1, column=t_col, value="大单卖出")
192 | sheet.cell(row=t_row + 2, column=t_col, value="中单卖出")
193 | sheet.cell(row=t_row + 3, column=t_col, value="小单卖出")
194 | sheet.cell(row=t_row + 4, column=t_col, value="合计")
195 | se_xlarge = m_sell['xlarge']
196 | se_large = m_sell['large']
197 | se_medium = m_sell['medium']
198 | se_small = m_sell['small']
199 | sum_sell = se_large + se_large + se_medium + se_small
200 |
201 | by_xlarge = m_buy['xlarge']
202 | by_large = m_buy['large']
203 | by_medium = m_buy['medium']
204 | by_small = m_buy['small']
205 | sum_buy = by_xlarge + by_large + by_medium + by_small
206 | t_col = t_col + 1
207 | sheet.cell(row=t_row, column=t_col, value=round(se_xlarge / 10000, 2))
208 | sheet.cell(row=t_row + 1, column=t_col, value=round(se_large / 10000, 2))
209 | sheet.cell(row=t_row + 2, column=t_col, value=round(se_medium / 10000, 2))
210 | sheet.cell(row=t_row + 3, column=t_col, value=round(se_small / 10000, 2))
211 | sheet.cell(row=t_row + 4, column=t_col, value=round(sum_sell / 10000, 2))
212 | t_col = t_col + 2
213 |
214 | sheet.cell(row=t_row, column=t_col, value=round(by_xlarge / 10000, 2))
215 | sheet.cell(row=t_row + 1, column=t_col, value=round(by_large / 10000, 2))
216 | sheet.cell(row=t_row + 2, column=t_col, value=round(by_medium / 10000, 2))
217 | sheet.cell(row=t_row + 3, column=t_col, value=round(by_small / 10000, 2))
218 | sheet.cell(row=t_row + 4, column=t_col, value=round(sum_buy / 10000, 2))
219 | t_col = t_col + 1
220 |
221 | sheet.cell(row=t_row, column=t_col, value="特大单买入")
222 | sheet.cell(row=t_row + 1, column=t_col, value="大单买入")
223 | sheet.cell(row=t_row + 2, column=t_col, value="中单买入")
224 | sheet.cell(row=t_row + 3, column=t_col, value="小单买入 ")
225 | sheet.cell(row=t_row + 4, column=t_col, value="净流入 ")
226 |
227 | m_xlarge = round((by_xlarge / 10000) - (se_xlarge / 10000), 2)
228 | m_large = round((by_large / 10000) - (se_large / 10000), 2)
229 | m_medium = round((by_medium / 10000) - (se_medium / 10000), 2)
230 | m_small = round((by_small / 10000) - (se_small / 10000), 2)
231 | sheet.cell(row=t_row, column=6, value=m_xlarge)
232 | sheet.cell(row=t_row + 1, column=6, value=m_large)
233 | sheet.cell(row=t_row + 2, column=6, value=m_medium)
234 | sheet.cell(row=t_row + 3, column=6, value=m_small)
235 | sheet.cell(row=t_row + 4, column=6, value=round(sum_buy - sum_sell, 2) / 10000)
236 |
237 | self.threadLock.release()
238 |
239 | try:
240 | wb.save(file_name)
241 | except Exception:
242 | print("Self_Stock Save Error = distribution")
243 |
244 |
245 | def Deal_Xq_query(self, data, name): #主力历史是个持续更新的东西 保存到一个新的文件中 分为第一次使用或者长期更新
246 | self.threadLock.acquire()
247 | file_name = self.get_filename(name)
248 | wb = load_workbook(file_name)
249 | new_sheet = False
250 | try:
251 | sheet = wb.get_sheet_by_name("资金流向历史")
252 | except:
253 | sheet = wb.create_sheet("资金流向历史")
254 | new_sheet = True
255 | sheet.cell(row=1, column=1, value="日期")
256 | sheet.cell(row=1, column=2, value="收盘价")
257 | sheet.cell(row=1, column=3, value="涨跌幅")
258 | sheet.cell(row=1, column=4, value="主力资金净流入")
259 | sheet.cell(row=1, column=5, value="特大单净流入")
260 | sheet.cell(row=1, column=6, value="大单净流入")
261 | sheet.cell(row=1, column=7, value="中单净流入")
262 | sheet.cell(row=1, column=8, value="小单净流入")
263 |
264 | m_data = data['data']['items']
265 | new_max_line = 20 + 1 # 标题一行 内容20行
266 | t_row = sheet.max_row + 1
267 | n = 0
268 | t_col = 1
269 | for m_json in m_data:
270 | m_small = m_json['small']
271 | m_large = m_json['large']
272 | m_xlarge = m_json['xlarge']
273 | m_medium = m_json['medium']
274 | m_close = m_json['close'] # 收盘价
275 | m_percent = m_json['percent'] # 涨跌幅
276 | m_amount = m_json['amount'] # 主力
277 | m_time = m_json['timestamp']
278 | timeStamp = float(m_time / 1000) # 13位时间戳
279 | timeArray = time.localtime(timeStamp)
280 | otherStyleTime = time.strftime("%Y-%m-%d", timeArray) # 年月日
281 |
282 | if new_sheet == False:
283 | cell = sheet.cell(t_row - 1, 1).value
284 | if cell == otherStyleTime:
285 | t_row = t_row - 1
286 | sheet.cell(row=t_row, column=t_col, value=otherStyleTime)
287 | sheet.cell(row=t_row, column=t_col + 1, value=m_close)
288 | sheet.cell(row=t_row, column=t_col + 2, value=str(m_percent) + "%")
289 | sheet.cell(row=t_row, column=t_col + 3, value=round(m_amount / 10000, 2))
290 | sheet.cell(row=t_row, column=t_col + 4, value=round(m_xlarge / 10000, 2))
291 | sheet.cell(row=t_row, column=t_col + 5, value=round(m_large / 10000, 2))
292 | sheet.cell(row=t_row, column=t_col + 6, value=round(m_medium / 10000, 2))
293 | sheet.cell(row=t_row, column=t_col + 7, value=round(m_small / 10000, 2))
294 | break
295 | elif n < 2:
296 | sheet.cell(row=t_row, column=t_col, value=otherStyleTime)
297 | sheet.cell(row=t_row, column=t_col + 1, value=m_close)
298 | sheet.cell(row=t_row, column=t_col + 2, value=str(m_percent) + "%")
299 | sheet.cell(row=t_row, column=t_col + 3, value=round(m_amount / 10000, 2))
300 | sheet.cell(row=t_row, column=t_col + 4, value=round(m_xlarge / 10000, 2))
301 | sheet.cell(row=t_row, column=t_col + 5, value=round(m_large / 10000, 2))
302 | sheet.cell(row=t_row, column=t_col + 6, value=round(m_medium / 10000, 2))
303 | sheet.cell(row=t_row, column=t_col + 7, value=round(m_small / 10000, 2))
304 | t_row = t_row - 1
305 | n = n + 1
306 | else:
307 | break
308 | elif new_sheet == True: # 新文件/新表 写入全部日期
309 | if new_max_line > 1:
310 | sheet.cell(row=new_max_line, column=t_col, value=otherStyleTime)
311 | sheet.cell(row=new_max_line, column=t_col + 1, value=m_close)
312 | sheet.cell(row=new_max_line, column=t_col + 2, value=str(m_percent) + "%")
313 | sheet.cell(row=new_max_line, column=t_col + 3, value=round(m_amount / 10000, 2))
314 | sheet.cell(row=new_max_line, column=t_col + 4, value=round(m_xlarge / 10000, 2))
315 | sheet.cell(row=new_max_line, column=t_col + 5, value=round(m_large / 10000, 2))
316 | sheet.cell(row=new_max_line, column=t_col + 6, value=round(m_medium / 10000, 2))
317 | sheet.cell(row=new_max_line, column=t_col + 7, value=round(m_small / 10000, 2))
318 | new_max_line = new_max_line - 1
319 |
320 | self.threadLock.release()
321 |
322 | try:
323 | wb.save(file_name)
324 | except:
325 | print("Self_Stock Save Error = query")
326 |
327 | """
328 | flag = True #已经存在为true
329 | new_sheet = True #新sheet
330 |
331 | if if_os == True:
332 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
333 | path = desktop_path + "\\Finance\\History\\闻讯_主力资金历史.xlsx"
334 | isExists = os.path.exists(path)
335 | if not isExists:
336 | flag = False
337 | new_sheet = True
338 | if self.first == 0:
339 | self.wb = Workbook()
340 | ws = self.wb['Sheet']
341 | self.wb.remove(ws)
342 | self.sheet = self.wb.create_sheet(name)
343 | self.first += 1
344 | else:
345 | self.wb = load_workbook(desktop_path + "\\Finance\\Main_History.xlsx")
346 | try:
347 | self.sheet = self.wb.get_sheet_by_name(name)
348 | new_sheet = False
349 | except:
350 | self.sheet = self.wb.create_sheet(name)
351 | new_sheet = True
352 | else:
353 | self.wb = load_workbook(path)
354 | try:
355 | self.sheet = self.wb.get_sheet_by_name(name)
356 | new_sheet = False
357 | except:
358 | self.sheet = self.wb.create_sheet(name)
359 | new_sheet = True
360 | elif if_os == False:
361 | d_path = "./Finance/History/"
362 | paths = d_path + "闻讯_主力资金历史.xlsx"
363 | isExists = os.path.exists(paths)
364 | if not isExists:
365 | flag = False #不存在
366 | new_sheet = True
367 | if self.first == 0:
368 | self.wb = Workbook()
369 | ws = self.wb['Sheet']
370 | self.wb.remove(ws)
371 | self.sheet = self.wb.create_sheet(name)
372 | self.first += 1
373 | else:
374 | self.wb = load_workbook("./Main_History.xlsx")
375 | try:
376 | self.sheet = self.wb.get_sheet_by_name(name)
377 | new_sheet = False
378 | except:
379 | self.sheet = self.wb.create_sheet(name)
380 | new_sheet = True
381 | else:
382 | self.wb = load_workbook(paths)
383 | try:
384 | self.sheet = self.wb.get_sheet_by_name(name)
385 | new_sheet = False
386 | except:
387 | self.sheet = self.wb.create_sheet(name)
388 | new_sheet = True
389 |
390 | if new_sheet == True:
391 | self.sheet.cell(row=1, column=1, value="日期")
392 | self.sheet.cell(row=1, column=2, value="收盘价")
393 | self.sheet.cell(row=1, column=3, value="涨跌幅")
394 | self.sheet.cell(row=1, column=4, value="主力资金净流入")
395 | self.sheet.cell(row=1, column=5, value="特大单净流入")
396 | self.sheet.cell(row=1, column=6, value="大单净流入")
397 | self.sheet.cell(row=1, column=7, value="中单净流入")
398 | self.sheet.cell(row=1, column=8, value="小单净流入")
399 |
400 | max_row = 20 + 1 #标题一行 内容20行
401 | n = 0
402 | t_row = self.sheet.max_row + 1
403 | t_col = 1
404 | for m_json in m_data:
405 | m_small = m_json['small']
406 | m_large = m_json['large']
407 | m_xlarge = m_json['xlarge']
408 | m_medium = m_json['medium']
409 | m_close = m_json['close'] # 收盘价
410 | m_percent = m_json['percent'] # 涨跌幅
411 | m_amount = m_json['amount'] #主力
412 | m_time = m_json['timestamp']
413 | timeStamp = float(m_time / 1000) # 13位时间戳
414 | timeArray = time.localtime(timeStamp)
415 | otherStyleTime = time.strftime("%Y-%m-%d", timeArray) # 年月日
416 |
417 | if flag == True:
418 | if new_sheet == False:
419 | cell = self.sheet.cell(t_row - 1, 1).value
420 | if cell == otherStyleTime:
421 | break
422 | elif n < 2:
423 | self.sheet.cell(row=t_row, column= t_col, value=otherStyleTime)
424 | self.sheet.cell(row=t_row, column= t_col + 1, value=m_close)
425 | self.sheet.cell(row=t_row, column= t_col + 2, value=str(m_percent) + "%")
426 | self.sheet.cell(row=t_row, column= t_col + 3, value=round(m_amount / 10000, 2))
427 | self.sheet.cell(row=t_row, column= t_col + 4, value=round(m_xlarge / 10000, 2))
428 | self.sheet.cell(row=t_row, column= t_col + 5, value=round(m_large / 10000, 2))
429 | self.sheet.cell(row=t_row, column= t_col + 6, value=round(m_medium / 10000, 2))
430 | self.sheet.cell(row=t_row, column= t_col + 7, value=round(m_small / 10000, 2))
431 | t_row = t_row - 1
432 | n = n + 1
433 | else:
434 | break
435 | elif new_sheet == True: #新文件/新表 写入全部日期
436 | if max_row > 1:
437 | self.sheet.cell(row=max_row, column=t_col, value=otherStyleTime)
438 | self.sheet.cell(row=max_row, column=t_col + 1, value=m_close)
439 | self.sheet.cell(row=max_row, column=t_col + 2, value=str(m_percent) + "%")
440 | self.sheet.cell(row=max_row, column= t_col + 3, value=round(m_amount / 10000, 2))
441 | self.sheet.cell(row=max_row, column= t_col + 4, value=round(m_xlarge / 10000, 2))
442 | self.sheet.cell(row=max_row, column= t_col + 5, value=round(m_large / 10000, 2))
443 | self.sheet.cell(row=max_row, column= t_col + 6, value=round(m_medium / 10000, 2))
444 | self.sheet.cell(row=max_row, column= t_col + 7, value=round(m_small / 10000, 2))
445 | max_row = max_row - 1
446 | elif flag == False: # 新文件/新表 写入全部日期
447 | if max_row > 1:
448 | self.sheet.cell(row=max_row, column=t_col, value=otherStyleTime)
449 | self.sheet.cell(row=max_row, column=t_col + 1, value=m_close)
450 | self.sheet.cell(row=max_row, column=t_col + 2, value=str(m_percent) + "%")
451 | self.sheet.cell(row=max_row, column=t_col + 3, value=round(m_amount / 10000, 2))
452 | self.sheet.cell(row=max_row, column=t_col + 4, value=round(m_xlarge / 10000, 2))
453 | self.sheet.cell(row=max_row, column=t_col + 5, value=round(m_large / 10000, 2))
454 | self.sheet.cell(row=max_row, column=t_col + 6, value=round(m_medium / 10000, 2))
455 | self.sheet.cell(row=max_row, column=t_col + 7, value=round(m_small / 10000, 2))
456 | max_row = max_row - 1
457 |
458 | self.threadLock.release()
459 | if if_os == True:
460 | if flag == True:
461 | try:
462 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
463 | path = desktop_path + "\\Finance\\History\\闻讯_主力资金历史.xlsx"
464 | self.wb.save(path)
465 | except:
466 | print("Self_Stock Save Error = query_2_1")
467 | elif flag == False:
468 | try:
469 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop")
470 | path = desktop_path + "\\Finance\\Main_History.xlsx"
471 | self.wb.save(path)
472 | except:
473 | print("Self_Stock Save Error = query_2_2")
474 | elif if_os == False:
475 | if flag == True:
476 | try:
477 | d_path = "./Finance/History/"
478 | path = d_path + "闻讯_主力资金历史.xlsx"
479 | self.wb.save(path)
480 | except:
481 | print("Self_Stock Save Error = query_2_1")
482 | elif flag == False:
483 | try:
484 | file_name = "./Main_History.xlsx"
485 | self.wb.save(file_name)
486 | except:
487 | print("Self_Stock Save Error = query_2_2")
488 | """
489 |
490 | def Deal_Xq_blocktrans(self, data, name):
491 | self.threadLock.acquire()
492 | file_name = self.get_filename(name)
493 | wb = load_workbook(file_name)
494 | sheet = wb.get_sheet_by_name(name)
495 |
496 | t_row = sheet.max_row + 3
497 | t_col = 1
498 | sheet.cell(row=t_row, column=t_col, value="大宗交易")
499 | t_row = t_row + 1
500 | sheet.cell(row=t_row, column=t_col, value="成交价")
501 | sheet.cell(row=t_row, column=t_col + 1, value="成交量(/股)")
502 | sheet.cell(row=t_row, column=t_col + 2, value="成交额(/万)")
503 | sheet.cell(row=t_row, column=t_col + 3, value="溢价率")
504 | sheet.cell(row=t_row, column=t_col + 4, value="交易时间")
505 | sheet.cell(row=t_row, column=t_col + 5, value="买方营业部")
506 | sheet.cell(row=t_row, column=t_col + 6, value="卖方营业部")
507 | t_row = t_row + 1
508 |
509 | m_data = data['data']
510 | data_items = m_data['items']
511 |
512 | for m_json in data_items:
513 | m_vol = m_json['vol']
514 | m_seller = m_json['sell_branch_org_name']
515 | m_premium = m_json['premium_rat']
516 | m_trans = m_json['trans_amt']
517 | m_time = m_json['td_date']
518 | m_buyer = m_json['buy_branch_org_name']
519 | m_price = m_json['trans_price']
520 |
521 | timeStamp = float(m_time / 1000) # 13位时间戳
522 | timeArray = time.localtime(timeStamp)
523 | m_date = time.strftime("%Y-%m-%d", timeArray)
524 |
525 | sheet.cell(row=t_row, column=t_col, value=m_price)
526 | sheet.cell(row=t_row, column=t_col + 1, value=m_vol)
527 | sheet.cell(row=t_row, column=t_col + 2, value=round(m_trans / 10000, 2))
528 | sheet.cell(row=t_row, column=t_col + 3, value=str(m_premium) + "%")
529 | sheet.cell(row=t_row, column=t_col + 4, value=m_date)
530 | sheet.cell(row=t_row, column=t_col + 5, value=m_seller)
531 | sheet.cell(row=t_row, column=t_col + 6, value=m_buyer)
532 | t_row = t_row + 1
533 |
534 | self.threadLock.release()
535 |
536 | try:
537 | wb.save(file_name)
538 | except:
539 | print("Self_Stock Save Error = blocktrans")
540 |
541 |
542 | def mkdir_stock(self):
543 | platform = pt.get_platform()
544 | if platform == True:
545 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
546 | path = desktop_path + "\\Finance\\Stock\\"
547 | isExists = os.path.exists(path)
548 | if not isExists:
549 | os.mkdir(path)
550 | return path
551 | else:
552 | path = "./Finance/Stock/"
553 | isExists = os.path.exists(path)
554 | if not isExists:
555 | os.mkdir(path)
556 | return path
557 |
558 |
559 | def get_path(self, name):
560 | platform = pt.get_platform()
561 | if platform == True:
562 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
563 | path = desktop_path + "\\Finance\\Stock\\{}\\".format(name)
564 | isExists = os.path.exists(path)
565 | if not isExists:
566 | os.mkdir(path)
567 | return path
568 | else:
569 | path = "./Finance/Stock/{}/".format(name)
570 | isExists = os.path.exists(path)
571 | if not isExists:
572 | os.mkdir(path)
573 | return path
574 |
575 |
576 | def get_filename(self, name):
577 | platform = pt.get_platform()
578 | if platform == True:
579 | desktop_path = os.path.join(os.path.expanduser('~'), "Desktop") # 获取桌面路径
580 | win_file = desktop_path + "\\Finance\\Stock\\{}\\".format(name) + name + ".xlsx"
581 | return win_file
582 | else:
583 | lin_file = "./Finance/Stock/{}/".format(name) + name + ".xlsx"
584 | return lin_file
585 |
586 |
587 | def Download_Xlsx(self, m_url, path, name, m_time): #这个本来想写入伊利xlsx的 可是格式问题 openpyxl不能load 暂时这样吧
588 | #filetime = time.strftime("%Y-", time.localtime()) # year-month-day-hour-minute
589 | filename = path + name + "成交明细_" + m_time + ".xlsx"
590 | if os.path.exists(filename):
591 | os.remove(filename)
592 | for _ in range(3):
593 | try:
594 | wget.download(m_url, out=filename)
595 | break
596 | except:
597 | continue
598 |
599 | #filename = path + name + "成交明细_" + "2020_12_18" + ".xlsx" #补充操作
600 |
601 | con = 0
602 | def Deal_Xq(self, data, name): #忘记这么写的原因了,能跑咱就不动了
603 | con = self.con
604 | if self.stop == False:
605 | return
606 | if con < 4:
607 | if con == 0:
608 | con = con + 1
609 | t1 = threading.Thread(target=self.Deal_Xq_quote, args=(data, name, ))
610 | t1.start()
611 | t1.join()
612 | elif con == 1:
613 | con = con + 1
614 | t2 = threading.Thread(target=self.Deal_Xq_distribution, args=(data, name, ))
615 | t2.start()
616 | t2.join()
617 | elif con == 2:
618 | con = con + 1
619 | t3 = threading.Thread(target=self.Deal_Xq_query, args=(data, name, ))
620 | t3.start()
621 | t3.join()
622 | elif con == 3:
623 | con = con + 1
624 | t4 = threading.Thread(target=self.Deal_Xq_blocktrans, args=(data, name,))
625 | t4.start()
626 | t4.join()
627 | #elif con < 5:
628 | self.con = self.con + 1
629 |
630 | def Analysis_date(self, s_url):
631 | for _ in range(3):
632 | try:
633 | self.xlsxdata = requests.get(s_url, headers=headers, timeout=(10, 30))
634 | break
635 | except Exception as e:
636 | continue
637 |
638 | soup = BeautifulSoup(self.xlsxdata.text, "lxml")
639 | xlslist = soup.select('#historyData > div.bd > a:nth-child(1)')
640 | for xlsx in xlslist:
641 | return xlsx.get_text()
642 |
643 | def get_SelfStock(self):
644 | self.mkdir_stock()
645 | #url_list = list()
646 | name_list = dict()
647 | t = time.time()
648 | m_time = int(t)
649 | m_file_time = ''
650 | with open("Code.txt", "r", encoding='utf-8') as f:
651 | for line in f.readlines():
652 | m_line = line.strip('\n') # 去掉列表中每一个元素的换行符
653 | if m_line == '':
654 | continue
655 | sep = '#'
656 | code = line.split(sep, 1)[0]
657 | if code != '':
658 | name = m_line.split(sep)[1]
659 | m_code = re.sub('[a-zA-Z]', "", code) #纯数字代码 SH000001 = 000001
660 | m_low_code = code.lower() #SH000001 = sh000001
661 | m_char = re.sub('[0-9]', "", code) #字母字符 SH0000001 = SH
662 | m_low_char = m_char.lower() #SH小写字符
663 | #雪球
664 | xq_url_quote = 'https://stock.xueqiu.com/v5/stock/quote.json?symbol={0}&extend=detail'.format(code) #个股信息
665 | # xq_url_quote = 'https://stock.xueqiu.com/v5/stock/batch/quote.json?extend=detail&is_delay_ft=1&is_delay_hk=0&symbol={}'.format(code) #个股信息
666 | xq_url_distrbution = 'https://stock.xueqiu.com/v5/stock/capital/distribution.json?symbol={}&_={}'.format(code, m_time) #今日流出
667 | xq_url_query = 'https://stock.xueqiu.com/v5/stock/capital/query.json?count=20&symbol={}&_={}'.format(code, m_time) #流出历史
668 | xq_url_blocktrans = 'https://stock.xueqiu.com/v5/stock/capital/blocktrans.json?symbol={}'.format(code) #大宗交易
669 |
670 | year_filetime = time.strftime("%Y", time.localtime()) # year-month-day-hour-minute
671 | #filetime = time.strftime("%Y%m%d", time.localtime()) # year-month-day-hour-minute
672 | num = 0
673 | if int(m_code) < 600000:
674 | num = 1
675 | #wyurl = 'http://quotes.money.163.com/cjmx/2021/20210929/1002241.xls'
676 | s_url = 'http://quotes.money.163.com/trade/cjmx_{0}.html'.format(m_code)
677 | self.filetime = self.Analysis_date(s_url)
678 | wyurl = ''
679 | if self.filetime != '':
680 | m_file_time = self.filetime
681 | wyfiletime = self.filetime.replace('-','')
682 | #'http://quotes.money.163.com/cjmx/2021/20210809/1300033.xls'
683 | wyurl = 'http://quotes.money.163.com/cjmx/{0}/{1}/{2}{3}.xls'.format(year_filetime, wyfiletime, num, m_code)
684 |
685 | #腾讯证券
686 | #tx_url_detail = 'http://stock.gtimg.cn/data/index.php?appn=detail&action=downlddoad&c={}&d={}'.format(m_low_code, filetime) #失效
687 | #tx_url_detail = 'http://stock.gtimg.cn/data/index.php?appn=detail&action=download&c={}&d={}'.format(m_low_code, 20201218) #补充日期 失效
688 |
689 | name_list.setdefault(name, [])
690 | name_list[name].append(xq_url_quote)
691 | name_list[name].append(xq_url_distrbution)
692 | if wyurl != '':
693 | name_list[name].append(wyurl)
694 | name_list[name].append(xq_url_query)
695 | name_list[name].append(xq_url_blocktrans)
696 | self.filetime = ''
697 |
698 | url = 'https://xueqiu.com'
699 | session = requests.session()
700 |
701 | session.get(url, headers=headers)
702 | for name in name_list:
703 | self.stop = True
704 | path = self.get_path(name)
705 | for m_url in name_list[name]:
706 | resp = None
707 | res = "xueqiu" in m_url
708 | if res == True:
709 | for ll in range(3):
710 | try:
711 | resp = requests.get(m_url, headers=headers, timeout=120)
712 | if resp.status_code == 200:
713 | break
714 | except Exception as e:
715 | pass
716 |
717 | data = json.loads(resp.text)
718 | print(data)
719 | if data == None:
720 | continue
721 |
722 | self.Deal_Xq(data, name)
723 | res = False
724 |
725 | res = "quotes" in m_url
726 | if res == True:
727 | self.Download_Xlsx(m_url, path, name, m_file_time)
728 | res = False
729 | time.sleep(1) # 减速
730 | self.con = 0
731 |
732 | del name_list #释放
733 | self.filetime = ''
734 |
735 |
736 | def main(self, filename):
737 | #Stock = SelfStock(file_name)
738 | #Stock.get_filename()
739 | Stock.get_SelfStock()
740 |
741 | Stock = SelfStock()
742 |
--------------------------------------------------------------------------------
/src/Sg_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 | from queue import Queue
5 |
6 | headers = {
7 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
8 | }
9 |
10 | class SongGuo(object):
11 | def __init__(self):
12 | pass
13 |
14 | def request(self):
15 | self.url = 'http://www.songguocaijing.com/'
16 | for _ in range(3):
17 | try:
18 | self.data = requests.get(self.url, headers=headers, timeout=(10, 40))
19 | break
20 | except:
21 | continue
22 | self.data.encoding = "utf-8"
23 | self.soup = BeautifulSoup(self.data.text, "lxml")
24 |
25 |
26 | def get_news(self):
27 | wb = load_workbook(self.xlsxname)
28 | sheet = wb.create_sheet("Sg")
29 | t_row = 1
30 | t_col = 1
31 | sheet.cell(row=t_row, column=t_col, value="松果财经")
32 | t_row = t_row + 1
33 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
34 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
35 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
36 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
37 | t_row = t_row + 1
38 |
39 | datalist = self.soup.find_all(class_='title-wp')
40 | da = ''
41 | da2 = ''
42 | da2 = self.soup.find_all(class_='article-description')
43 | m_queue = Queue(20)
44 | for des in da2:
45 | m_queue.put(des.get_text())
46 |
47 | for da in datalist:
48 | da.find('a')
49 | for news in da:
50 | m_href = 'http://www.songguocaijing.com/' + news['href']
51 | m_title = news['title']
52 | sheet.cell(row=t_row, column=t_col, value=m_title)
53 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
54 | sheet.cell(row=t_row, column=t_col + 2, value=m_queue.get())
55 | t_row = t_row + 1
56 |
57 | try:
58 | wb.save(self.xlsxname)
59 | except:
60 | print("Songguo Save Error")
61 |
62 | def main(self, file_name):
63 | self.xlsxname = file_name
64 | Sg.request()
65 | Sg.get_news()
66 |
67 | Sg = SongGuo()
68 |
--------------------------------------------------------------------------------
/src/Sina_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 | from openpyxl.styles import Font
5 | import datetime
6 | import time
7 | import json
8 | import threading
9 |
10 | """
11 | https://finance.sina.com.cn/
12 | http://finance.sina.com.cn/chanjing/
13 | http://feed.mix.sina.com.cn/api/roll/get?pageid=164&lid=1694&num=10&page=1&callback=feedCardJsonpCallback&_=1611561556802 公司新闻
14 | http://feed.mix.sina.com.cn/api/roll/get?pageid=164&lid=1695&num=10&page=1&callback=feedCardJsonpCallback&_=1611561513495 产业新闻
15 |
16 | http://finance.sina.com.cn/china/
17 | http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1687&num=10&page=1&callback=feedCardJsonpCallback&_=1611573471132 国内新闻
18 | http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1687&num=10&page=1&callback=feedCardJsonpCallback&_=1611573513741 宏观经济
19 | http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1689&num=10&page=1&callback=feedCardJsonpCallback&_=1611573537003 部委动态
20 | http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1690&num=10&page=1&callback=feedCardJsonpCallback&_=1611573555700 金融新闻
21 | http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1688&num=10&page=1&callback=feedCardJsonpCallback&_=1611573598249 地方经济
22 | """
23 |
24 | headers = {
25 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
26 | }
27 |
28 | class SinaNews(object):
29 | def __init__(self):
30 | pass
31 |
32 | def request(self):
33 | self.url = 'https://finance.sina.com.cn/'
34 |
35 | for ll in range(3):
36 | try:
37 | self.data = requests.get(self.url, headers=headers, timeout=120)
38 | if self.data.status_code == 200:
39 | break
40 | except Exception as e:
41 | pass
42 | self.data.encoding = "utf-8"
43 | self.soup = BeautifulSoup(self.data.text, "lxml")
44 |
45 |
46 | def getTopNew(self):
47 | wb = load_workbook(self.xlsxname)
48 | sheet = wb.create_sheet("Sina")
49 | t_row = 1
50 | t_col = 1
51 | sheet.cell(row=t_row, column=t_col, value="新浪财经")
52 | t_row = t_row + 1
53 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
54 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
55 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
56 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
57 | t_row = t_row + 1
58 |
59 | datalist = self.soup.find_all(class_='fin_tabs0_c0')
60 | datal = None
61 | for datal in datalist:
62 | t = 0 #留着先
63 |
64 | for ds in datal:
65 | ds = datal.find_all('a')
66 | for m_data in ds:
67 | m_href = m_data['href']
68 | m_title = m_data.get_text()
69 | if len(m_title) < 4:
70 | continue
71 | sheet.cell(row=t_row, column=t_col, value=m_title)
72 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
73 | t_row = t_row + 1
74 | break
75 |
76 | try:
77 | wb.save(self.xlsxname)
78 | except:
79 | print("SinaNews Save Error = getTopNew")
80 |
81 | def getStockNew(self):
82 | wb = load_workbook(self.xlsxname)
83 | sheet = wb.get_sheet_by_name("Sina")
84 | t_row = sheet.max_row + 3
85 | t_col = 1
86 | sheet.cell(row=t_row, column=t_col, value="证券新闻")
87 | t_row = t_row + 1
88 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
89 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
90 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
91 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
92 | t_row = t_row + 1
93 |
94 | datalist = self.soup.find_all(class_='m-p1-m-blk2')
95 | for dastl in datalist:
96 | t = 1
97 |
98 | for ds in dastl:
99 | ds = dastl.find_all('a')
100 | for m_data in ds:
101 | try:
102 | m_href = m_data['href']
103 | m_title = m_data.get_text()
104 | if len(m_title) < 4:
105 | continue
106 | sheet.cell(row=t_row, column=t_col, value=m_title)
107 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
108 | t_row = t_row + 1
109 | except:
110 | continue
111 | break
112 |
113 | try:
114 | wb.save(self.xlsxname)
115 | except:
116 | print("SinaNews Save Error = getStockNew")
117 |
118 |
119 | def getIndustryNew(self):
120 | t = time.time() * 1000
121 | n_time = int(t)
122 | new_list = list()
123 |
124 | j_url1 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=164&lid=1694&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #公司新闻
125 | j_url2 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=164&lid=1695&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #产业新闻
126 |
127 | cn_url1 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=3231&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #财经top10
128 | cn_url2 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1686&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #国内新闻
129 | cn_url3 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1687&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #宏观经济
130 | cn_url4 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1689&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #部委动态
131 | cn_url5 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1690&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #金融新闻
132 | cn_url6 = 'http://feed.mix.sina.com.cn/api/roll/get?pageid=155&lid=1688&num=10&page=1&callback=feedCardJsonpCallback&_={}'.format(n_time) #地方新闻
133 | new_list = {
134 | "财经Top10": cn_url1,
135 | "公司新闻": j_url1,
136 | "产业新闻": j_url2,
137 | "国内新闻": cn_url2,
138 | "宏观经济": cn_url3,
139 | "部委动态": cn_url4,
140 | "金融新闻": cn_url5,
141 | "地方新闻": cn_url6,
142 | }
143 | for newname in new_list:
144 | m_url = new_list[newname]
145 | self.Deal_News(m_url, newname)
146 |
147 |
148 | def Deal_News(self, url, new_name):
149 | wb = load_workbook(self.xlsxname)
150 | sheet = wb.get_sheet_by_name("Sina")
151 | t_row = sheet.max_row + 2
152 | t_col = 1
153 | sheet.cell(row=t_row, column=t_col, value="{}".format(new_name))
154 | t_row = t_row + 1
155 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
156 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
157 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
158 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
159 | t_row = t_row + 1
160 |
161 | data = None
162 | for ll in range(3):
163 | try:
164 | data = requests.get(url, headers=headers, timeout=120)
165 | if data.status_code == 200:
166 | break
167 | except Exception as e:
168 | pass
169 | t1 = 'try{feedCardJsonpCallback('
170 | t2 = ');}catch(e){};
'
171 | soup = BeautifulSoup(data.text, "lxml")
172 | data = str(soup)
173 | data = data.replace(t1, "")
174 | data = data.replace(t2, "")
175 | json_str = json.loads(data)
176 | json_str = json_str['result']['data']
177 | for m_data in json_str:
178 | m_href = m_data['url']
179 | m_title = m_data['title']
180 | tmp_m_time = m_data['ctime']
181 | timeStamp = int(tmp_m_time)
182 | timeArray = time.localtime(timeStamp)
183 | m_time = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
184 |
185 | sheet.cell(row=t_row, column=t_col, value=m_title)
186 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
187 | sheet.cell(row=t_row, column=t_col + 3, value=m_time)
188 | t_row = t_row + 1
189 |
190 |
191 | """
192 | t_row = t_row + 2
193 | sheet.cell(row=t_row, column=t_col, value="产业新闻")
194 | t_row = t_row + 1
195 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
196 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
197 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
198 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
199 | t_row = t_row + 1
200 |
201 | data = requests.get(j_url2, headers=headers)
202 | soup = BeautifulSoup(data.text, "lxml")
203 | data = str(soup)
204 | m1 = 'try{feedCardJsonpCallback('
205 | m2 = ');}catch(e){};
'
206 | data = data.replace(m1, "")
207 | data = data.replace(m2, "")
208 | json_str2 = json.loads(data)
209 | json_s = json_str2['result']['data']
210 | for m_data in json_s:
211 | m_href = m_data['url']
212 | m_title = m_data['title']
213 | tmp_m_time = m_data['ctime']
214 | timeStamp = int(tmp_m_time)
215 | timeArray = time.localtime(timeStamp)
216 | m_time = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
217 |
218 | sheet.cell(row=t_row, column=t_col, value=m_title)
219 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
220 | sheet.cell(row=t_row, column=t_col + 3, value=m_time)
221 | t_row = t_row + 1
222 | """
223 | try:
224 | wb.save(self.xlsxname)
225 | except:
226 | print("Sina getIndustryNew Save Error")
227 |
228 | def getCnNew(self):
229 |
230 | t = time.time() * 1000
231 | n_time = int(t)
232 |
233 |
234 |
235 | def main(self, file_name):
236 | self.xlsxname = file_name
237 | Sina.request()
238 | Sina.getTopNew()
239 | Sina.getStockNew()
240 | Sina.getIndustryNew()
241 | Sina.getCnNew()
242 |
243 | Sina = SinaNews()
244 |
--------------------------------------------------------------------------------
/src/Ths_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 | from openpyxl.styles import Font
5 | import json
6 | import time
7 | import threading
8 |
9 | headers = {
10 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
11 | }
12 |
13 | class TongHuaShun(object):
14 | def __init__(self):
15 | pass
16 |
17 | def request(self):
18 | self.url = 'http://www.10jqka.com.cn/'
19 | for ll in range(3):
20 | try:
21 | self.data = requests.get(self.url, headers=headers, timeout=120)
22 | if self.data.status_code == 200:
23 | break
24 | except Exception as e:
25 | pass
26 |
27 | self.data.encoding = "gbk"
28 | self.soup = BeautifulSoup(self.data.text, "lxml")
29 |
30 | #calendar
31 | calendar_time = time.strftime("%Y%m", time.localtime()) # year-month-day-hour-minute
32 | #self.url_calendar = 'http://stock.10jqka.com.cn/fincalendar.shtml#{}'.format(datatime)
33 | self.url_calendar = 'http://comment.10jqka.com.cn/tzrl/getTzrlData.php?callback=callback_dt&type=data&date={}'.format(calendar_time)
34 |
35 | for ll in range(3):
36 | try:
37 | self.data_calendar = requests.get(self.url_calendar, headers=headers, timeout=120)
38 | if self.data_calendar.status_code == 200:
39 | break
40 | except Exception as e:
41 | pass
42 |
43 |
44 | def Style(self):
45 | self.m_font = Font(
46 | size=12,
47 | bold=True,
48 | )
49 |
50 | self.head_font = Font(
51 | size=14,
52 | bold=True,
53 | )
54 |
55 | def getNew(self):
56 | wb = load_workbook(self.xlsxname)
57 | sheet = wb.create_sheet('Ths')
58 | datalist = self.soup.find_all(class_="item_txt")
59 | t_row = 1
60 | t_col = 1
61 |
62 | sheet.cell(row=t_row + 0, column=t_col + 0, value="同花顺财经")
63 | sheet.cell(row=t_row + 1, column=t_col + 0, value="新闻标题")
64 | sheet.cell(row=t_row + 1, column=t_col + 1, value="新闻链接")
65 | sheet.cell(row=t_row + 1, column=t_col + 2, value="新闻简介")
66 | sheet.cell(row=t_row + 1, column=t_col + 3, value="新闻时间")
67 | t_row = t_row + 2
68 |
69 | for news in datalist:
70 | newlist2 = news.select('p a')
71 | for m_new in newlist2:
72 | m_url = m_new['href']
73 | m_title = m_new['title']
74 | sheet.cell(row=t_row, column=t_col, value=m_title)
75 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
76 | t_row = t_row + 1
77 | try:
78 | wb.save(self.xlsxname)
79 | except Exception:
80 | print("THS Save Error = 1")
81 |
82 | def getInvestment(self):
83 | wb = load_workbook(self.xlsxname)
84 | sheet = wb.get_sheet_by_name('Ths')
85 | t_row = sheet.max_row
86 |
87 | datalist = self.soup.find_all(class_="content newhe") #投资机会上半部分 产经新闻 研报精选
88 | t_col = 1
89 | index = 0
90 | for newlist in datalist:
91 | #里面有个重复 加个判断去掉
92 | news = newlist.select('li a')
93 | for m_new in news:
94 | if index != 1:
95 | m_url = m_new['href']
96 | m_title = m_new['title']
97 | sheet.cell(row=t_row, column=t_col, value=m_title)
98 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
99 | t_row = t_row + 1
100 | index = index + 1
101 | try:
102 | wb.save(self.xlsxname)
103 | except Exception:
104 | print("THS Save Error = 2")
105 |
106 | def getInvestment2(self):
107 | wb = load_workbook(self.xlsxname)
108 | sheet = wb.get_sheet_by_name('Ths')
109 | t_row = sheet.max_row
110 | t_col = 1
111 | #投资机会后半部分获取
112 | datalist = self.soup.find_all(class_="last")
113 |
114 | i = 0
115 | for newlist in datalist:
116 | news = newlist.select('li a')
117 | for m_new in news:
118 | #这个分类筛选的结果重复的太多 但是数量是固定的
119 | if i >= 5:
120 | if i < 11:
121 | m_title = m_new.get_text()
122 | m_url = m_new['href']
123 | sheet.cell(row=t_row, column=t_col, value=m_title)
124 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
125 | t_row = t_row + 1
126 | i = i + 1
127 | try:
128 | wb.save(self.xlsxname)
129 | except Exception:
130 | print("THS Save Error = 3")
131 |
132 | def get_Newspaper(self):
133 | url_Newspaper = 'http://stock.10jqka.com.cn/bktt_list/'
134 | data_paper = requests.get(url_Newspaper, headers=headers)
135 | soup_paper = BeautifulSoup(data_paper.text, "lxml")
136 |
137 | datalist = soup_paper.select('body > div.content-1200 > div.module-l.fl > div.list-con > ul > li:nth-child(1) > span > a')
138 | datalist2 = soup_paper.select('body > div.content-1200 > div.module-l.fl > div.list-con > ul > li:nth-child(1) > a')
139 | m_url = datalist[0]['href']
140 | m_title = datalist[0]['title']
141 | m_content = datalist2[0].get_text() + "del"
142 | m_content = m_content.replace("...del", "")
143 |
144 | wb = load_workbook(self.xlsxname)
145 | sheet = wb.get_sheet_by_name('Ths')
146 | t_row = sheet.max_row + 1
147 | t_col = 1
148 |
149 | sheet.cell(row=t_row + 1, column=t_col, value="报刊头条") #下一个函数省一次xlwt操作 (投资日历)
150 | t_row = t_row + 2
151 | sheet.cell(row=t_row, column=t_col, value=m_title)
152 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
153 | sheet.cell(row=t_row, column=t_col + 2, value=m_content)
154 |
155 | # 为下一个函数省一次xlwt操作 (投资日历)
156 | sheet.cell(row=t_row + 2, column=t_col, value="投资日历")
157 | sheet.cell(row=t_row + 3 ,column=t_col, value="会议事件")
158 | sheet.cell(row=t_row + 3, column=t_col + 1, value="会议地点")
159 | sheet.cell(row=t_row + 3, column=t_col + 2, value="会议时间")
160 | try:
161 | wb.save(self.xlsxname)
162 | except Exception:
163 | print("THS Save Error = 4")
164 |
165 | threadLock = threading.Lock()
166 | def dealjson(self, json): #时间 事件 地点 json问题相关板块顺序是乱序 就不加了
167 | self.threadLock.acquire()
168 | wb = load_workbook(self.xlsxname)
169 | sheet = wb.get_sheet_by_name('Ths')
170 | t_row = sheet.max_row + 1
171 | t_col = 1
172 |
173 | for json_event in json['events']:
174 | m_event = json_event[0]
175 | m_place = json_event[2]
176 | sheet.cell(row=t_row, column=t_col, value=m_event)
177 | sheet.cell(row=t_row, column=t_col + 1, value=m_place)
178 | m_date = json['date']
179 | m_week = json['week']
180 | sheet.cell(row=t_row, column=t_col + 2, value=m_date + "-" + m_week)
181 | t_row = t_row + 1
182 | try:
183 | wb.save(self.xlsxname)
184 | self.threadLock.release()
185 | except Exception:
186 | self.threadLock.release()
187 | print("THS Save Error = 5")
188 |
189 |
190 | def get_Calendar(self): #投资日历
191 | data = self.data_calendar.text + "del"
192 | data = data.replace("callback_dt(", "")
193 | data = data.replace(");del", "")
194 | json_str = json.loads(data)
195 | t = 0
196 | #pool = ThreadPoolExecutor(max_workers=2)
197 | for m_json in json_str['data']:
198 | #future1 = pool.submit(self.dealjson, m_json)
199 | t1 = threading.Thread(target=self.dealjson, args=(m_json, ))
200 | t1.start()
201 | t1.join()
202 |
203 | def main(self, file_name):
204 | self.xlsxname = file_name
205 | Ths.request()
206 | Ths.getNew()
207 | Ths.getInvestment()
208 | Ths.getInvestment2()
209 | Ths.get_Newspaper()
210 | Ths.get_Calendar()
211 |
212 | Ths = TongHuaShun()
213 |
--------------------------------------------------------------------------------
/src/Tzj_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 | import re
5 |
6 | headers = {
7 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
8 | }
9 |
10 | class Touzijie(object):
11 | def __init__(self):
12 | pass
13 |
14 | def request(self):
15 | self.url = 'https://www.pedaily.cn'
16 | for ll in range(3):
17 | try:
18 | self.data = requests.get(self.url, headers=headers, timeout=120)
19 | if self.data.status_code == 200:
20 | break
21 | except Exception as e:
22 | pass
23 | self.data.encoding = "utf-8"
24 | self.soup = BeautifulSoup(self.data.text, "lxml")
25 |
26 | def get_topnews(self):
27 | wb = load_workbook(self.xlsxname)
28 | sheet = wb.create_sheet("Tzj")
29 | t_row = 1
30 | t_col = 1
31 |
32 | sheet.cell(row=t_row, column=t_col, value="每日TOP5")
33 | t_row = t_row + 1
34 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
35 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
36 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
37 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
38 | t_row = t_row + 1
39 |
40 | datalist = self.soup.select('#box-fix-content > div.tab-content > ul:nth-child(1)')
41 | for data in datalist:
42 | news = data.find_all('a')
43 | for m_new in news:
44 | m_href = m_new['href']
45 | m_title = m_new.get_text()
46 | sheet.cell(row=t_row, column=t_col, value=m_title)
47 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
48 | t_row = t_row + 1
49 |
50 | try:
51 | wb.save(self.xlsxname)
52 | except Exception:
53 | print("Tzj Save Error = 1")
54 |
55 | def get_news(self):
56 | wb = load_workbook(self.xlsxname)
57 | sheet = wb.get_sheet_by_name("Tzj")
58 | t_row = sheet.max_row + 2
59 | t_col = 1
60 | sheet.cell(row=t_row, column=t_col, value="最新资讯")
61 | t_row = t_row + 1
62 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
63 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
64 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
65 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
66 | t_row = t_row + 1
67 | datalist = self.soup.find_all(class_='news-list news-list-bottom news-list-special')
68 | for data in datalist:
69 | news = data.find_all(class_='txt')
70 | for m_news in news:
71 | m_news = m_news.find_all('h3')
72 | pattern = re.compile(r'((https|http)?:\/\/)[^\s]+[^" t]')
73 | searchObj = re.search(r'(.*)target="_blank">(.*?)', str(m_news), re.M | re.I)
74 | m_href = pattern.search(str(m_news)).group()
75 | m_title = searchObj.group(2)
76 | sheet.cell(row=t_row, column=t_col, value=m_title)
77 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
78 | t_row = t_row + 1
79 | try:
80 | wb.save(self.xlsxname)
81 | except Exception:
82 | print("Tzj Save Error = 2")
83 |
84 | def get_Instantnews(self):
85 | wb = load_workbook(self.xlsxname)
86 | sheet = wb.get_sheet_by_name("Tzj")
87 | t_row = sheet.max_row + 2
88 | t_col = 1
89 | sheet.cell(row=t_row, column=t_col, value="即时快讯")
90 | t_row = t_row + 1
91 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
92 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
93 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
94 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
95 | t_row = t_row + 1
96 | datalist = self.soup.find_all(class_='list-time hot-online')
97 |
98 | for trtag in datalist:
99 | data = trtag.find_all('li') # 在每个tr标签下,查找所有的td标签
100 | for news in data:
101 | m_href = news['data-url']
102 | m_title = news['data-title']
103 | sheet.cell(row=t_row, column=t_col, value=m_title)
104 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
105 | t_row = t_row + 1
106 |
107 | try:
108 | wb.save(self.xlsxname)
109 | except Exception:
110 | print("Tzj Save Error = 2")
111 |
112 | def get_invest(self):
113 | datalist = self.soup.find_all(class_='list-invest')
114 | print(datalist)
115 |
116 | def get_ipo(self):
117 | datalist = self.soup.find_all(class_='list-ipo')
118 | print(datalist)
119 |
120 |
121 | def main(self, file_name):
122 | self.xlsxname = file_name
123 | try:
124 | Tzj.request()
125 | Tzj.get_topnews()
126 | Tzj.get_news()
127 | Tzj.get_Instantnews()
128 | except Exception:
129 | pass
130 |
131 |
132 | Tzj = Touzijie()
--------------------------------------------------------------------------------
/src/UA.py:
--------------------------------------------------------------------------------
1 | USER_AGENT_LIST = [
2 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
3 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
4 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
5 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
6 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
7 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
8 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
9 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
10 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
12 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
13 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
14 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
15 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
16 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
17 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
18 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
19 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
20 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
21 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
22 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
23 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
24 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
25 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
26 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
29 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
30 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
31 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
32 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
33 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
34 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
35 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
36 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
37 | ]
38 | import random #这个文件的UA用不用都不影响
39 | USER_AGENT = random.choice(USER_AGENT_LIST)
40 | # 构造请求头
41 | headers = {'user-agent': USER_AGENT}
--------------------------------------------------------------------------------
/src/Wy_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import os
4 | from openpyxl import load_workbook, Workbook
5 | from openpyxl.styles import Font
6 | import json
7 | import time
8 | import threading
9 |
10 | """
11 | https://www.pynote.net/archives/2229
12 | font(字体类):字号、字体颜色、下划线等
13 | fill(填充类):颜色等
14 | border(边框类):设置单元格边框
15 | alignment(位置类):对齐方式
16 | number_format(格式类):数据格式
17 | protection(保护类):写保护
18 | """
19 |
20 | headers = {
21 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
22 | }
23 |
24 | class WangYi(object):
25 | def __init__(self):
26 | pass
27 |
28 | def request(self):
29 | #new
30 | self.url = 'https://money.163.com/'
31 | for ll in range(3):
32 | try:
33 | self.data = requests.get(self.url, headers=headers, timeout=120)
34 | if self.data.status_code == 200:
35 | break
36 | except Exception as e:
37 | pass
38 |
39 | self.soup = BeautifulSoup(self.data.text, "lxml")
40 | self.time = time.time()
41 |
42 | def Style(self):
43 | self.m_font = Font(name="微软雅黑",size=15,bold=True,italic=True,color="FF0000")
44 | self.head_font = Font(name="微软雅黑",size=20,bold=True,italic=True,color="FF0000")
45 |
46 |
47 | def getTopNew(self):
48 | datalist = self.soup.select("ul li h2")
49 | wb = Workbook()
50 | ws = wb['Sheet']
51 | wb.remove(ws)
52 | sheet = wb.create_sheet("Wy")
53 | """
54 | alignment = xlwt.Alignment
55 | alignment.horz = xlwt.Alignment.HORZ_CENTER
56 | alignment.vert = xlwt.Alignment.VERT_CENTER
57 | style2 = xlwt.XFStyle()
58 | style2.alignment = alignment
59 | """
60 | t_row = 6
61 | t_col = 1
62 | sheet.cell(row=t_row, column=t_col, value="网易财经")
63 | t_row = t_row + 1
64 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
65 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
66 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
67 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
68 | t_row = t_row + 1
69 |
70 | for li in datalist:
71 | url = li.find('a')['href']
72 | title = li.get_text()
73 | sheet.cell(row=t_row, column=t_col, value=title)
74 | sheet.cell(row=t_row, column=t_col + 1, value=url)
75 | t_row = t_row + 1
76 | try:
77 | wb.save(self.xlsxname)
78 | except Exception:
79 | print("Wy Save Error = 1")
80 |
81 | def getlist2(self):
82 | datalist2 = self.soup.find_all(class_='topnews_nlist topnews_nlist2')
83 |
84 | wb = load_workbook(self.xlsxname)
85 | sheet = wb.get_sheet_by_name('Wy')
86 | t_row = sheet.max_row # 获得行数
87 | t_col = 1
88 | for tp in datalist2:
89 | datalist3 = tp.select("li h3")
90 | for tn in datalist3:
91 | url = tn.find('a')['href']
92 | title = tn.get_text()
93 | sheet.cell(row=t_row, column=t_col, value=title)
94 | sheet.cell(row=t_row, column=t_col + 1, value=url)
95 | t_row = t_row + 1
96 | try:
97 | wb.save(self.xlsxname)
98 | except:
99 | print("Wangyi Save Error = 2")
100 |
101 | def getstock(self):
102 | #stock
103 | stockurl = 'https://money.163.com/stock'
104 | stockdata = requests.get(stockurl, headers=headers)
105 | soup = BeautifulSoup(stockdata.text, "lxml")
106 | stockl = soup.select('#stock2016_wrap > div > div.stock2016_content > div.idx_main.common_wrap.clearfix > div.news_main > div.news_main_wrap > div.topnews > div.topnews_first > h2 > a')
107 | top_url = stockl[0]['href']
108 | top_title = stockl[0].get_text()
109 |
110 | wb = load_workbook(self.xlsxname)
111 | sheet = wb.get_sheet_by_name('Wy')
112 | t_row = sheet.max_row # 获得行数
113 | t_col = 1
114 |
115 | stocknewlist = soup.find_all(class_='topnews_list')
116 | for s_new in stocknewlist:
117 | news = s_new.find_all('a')
118 | for tn in news:
119 | t_url = tn['href']
120 | t_title = tn.get_text()
121 | sheet.cell(row=t_row, column=t_col, value=t_title)
122 | sheet.cell(row=t_row, column=t_col + 1, value=t_url)
123 | t_row = t_row + 1
124 | try:
125 | wb.save(self.xlsxname)
126 | except:
127 | print("Wangyi Save Error = 4")
128 |
129 | threadLock = threading.Lock()
130 | def get_num(self, num, row, if_write):
131 | self.threadLock.acquire()
132 |
133 | num_price = num['price']
134 | num_open = num['open']
135 | num_updown = num['updown']
136 | num_high = num['high']
137 | num_low = num['low']
138 | num_yestclose = num['yestclose']
139 | num_percent = num['percent'] #num_percent 不直接写入
140 | num_update = num['update']
141 | m_percent = num_percent * 10000 #-0.020937 -%2.09
142 | percent = int(m_percent) /100
143 |
144 | wb = load_workbook(self.xlsxname)
145 | sheet = wb.get_sheet_by_name('Wy')
146 | t_col = 1
147 |
148 | if if_write == True:
149 | t_row = 1
150 | sheet.cell(row=t_row, column=t_col, value='大盘指数')
151 | sheet.cell(row=t_row, column=t_col + 1, value='当前价位')
152 | sheet.cell(row=t_row, column=t_col + 2, value='今日涨幅')
153 | sheet.cell(row=t_row, column=t_col + 3, value='涨跌价格')
154 | sheet.cell(row=t_row, column=t_col + 4, value='开盘价位')
155 | sheet.cell(row=t_row, column=t_col + 5, value='今日最高')
156 | sheet.cell(row=t_row, column=t_col + 6, value='今日最低')
157 | sheet.cell(row=t_row, column=t_col + 7, value='昨日收盘')
158 | sheet.cell(row=t_row, column=t_col + 8, value='更新时间')
159 | sheet.cell(row=t_row + 2, column=t_col, value='深证成指')
160 | sheet.cell(row=t_row + 1, column=t_col, value='上证指数')
161 | sheet.cell(row=t_row + 3, column=t_col, value='沪深300')
162 | t_row = t_row + 1
163 |
164 | sheet.cell(row=row, column=2, value=num_price)
165 | sheet.cell(row=row, column=3, value=str(percent) + "%")
166 | sheet.cell(row=row, column=4, value=num_updown)
167 | sheet.cell(row=row, column=5, value=num_open)
168 | sheet.cell(row=row, column=6, value=num_high)
169 | sheet.cell(row=row, column=7, value=num_low)
170 | sheet.cell(row=row, column=8, value=num_yestclose)
171 | sheet.cell(row=row, column=9, value=num_update)
172 | try:
173 | wb.save(self.xlsxname)
174 | self.threadLock.release()
175 | except:
176 | self.threadLock.release()
177 | print("Wangyi Save Error = 5")
178 |
179 |
180 | def getindex(self):
181 | #index
182 | indexurl = 'http://api.money.126.net/data/feed/1399001,1399300,0000001,HSRANK_COUNT_SHA,HSRANK_COUNT_SZA,HSRANK_COUNT_SH3?callback=ne_{}&[object%20Object]'.format(int(self.time))
183 | indexdata = requests.get(indexurl, headers=headers)
184 | #soup = BeautifulSoup(self.indexdata.text,)
185 | data = indexdata.text + "del"
186 | time = int(self.time)
187 | data = data.replace('ne_' + str(time) + '(', '')
188 | data = data.replace(');del', '')
189 | #json_d = json.dumps(data) # 编码
190 | json_str = json.loads(data) # 解码
191 |
192 | if_write = True
193 | n_data1 = json_str['0000001'] #上证指数_0000001
194 | n_data2 = json_str['1399001'] #深证成指_1399001
195 | n_data3 = json_str['1399300'] #沪深300_1399300
196 | t1 = threading.Thread(target=self.get_num, args=(n_data1, 2, if_write, ))
197 | t1.start()
198 | if_write = False
199 | t1.join()
200 | t2 = threading.Thread(target=self.get_num, args=(n_data2, 3, if_write, ))
201 | t3 = threading.Thread(target=self.get_num, args=(n_data3, 4, if_write, ))
202 | t2.start()
203 | t3.start()
204 | t2.join()
205 | t3.join()
206 | #self.get_num(n2, 2, if_write)
207 | #self.get_num(n3, 3, if_write)
208 |
209 | def get_bu(self, soup, if_write):
210 | wb = load_workbook(self.xlsxname)
211 | sheet = wb.get_sheet_by_name('Wy')
212 | t_row = sheet.max_row + 1
213 | t_col = 1
214 |
215 | if if_write == True:
216 | sheet.cell(row=t_row + 1, column=t_col, value="市场资讯")
217 | sheet.cell(row=t_row + 2, column=t_col, value="新闻标题")
218 | sheet.cell(row=t_row + 2, column=t_col + 1, value="新闻链接")
219 | sheet.cell(row=t_row + 2, column=t_col + 2, value="新闻简介")
220 | sheet.cell(row=t_row + 2, column=t_col + 3, value="新闻时间")
221 | t_row = t_row + 3 # 已经使用多少行
222 |
223 | datalist1 = soup.find_all(class_='list_item clearfix')
224 | for Newslist in datalist1:
225 | News = Newslist.find_all(class_='item_top')
226 | for m_new in News:
227 | m_new1 = m_new.find('a')
228 | m_new2 = m_new.find(class_='time')
229 | m_title = m_new1.get_text()
230 | m_url = m_new1['href']
231 | m_time = m_new2.get_text()
232 | sheet.cell(row=t_row, column=t_col, value=m_title)
233 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
234 | sheet.cell(row=t_row, column=t_col + 3, value=m_time)
235 | t_row = t_row + 1
236 | try:
237 | wb.save(self.xlsxname)
238 | except:
239 | print("Wangyi Save Error = 6")
240 |
241 |
242 | def getBusiness(self): #市场资讯 获取两页
243 | bu_url = 'http://money.163.com/special/00251LR5/cpznList.html'
244 | bu_url2 = 'http://money.163.com/special/00251LR5/cpznList_02.html' #第二页
245 |
246 | bu_data1 = requests.get(bu_url, headers=headers)
247 | soup1 = BeautifulSoup(bu_data1.text, "lxml")
248 |
249 | bu_data2 = requests.get(bu_url2, headers=headers)
250 | soup2 = BeautifulSoup(bu_data2.text, "lxml")
251 |
252 | if_write = True
253 | t1 =threading.Thread(target=self.get_bu, args=(soup1, if_write, ))
254 | t1.start()
255 | if_write = False
256 | t1.join()
257 | t2 =threading.Thread(target=self.get_bu, args=(soup1, if_write, ))
258 | t2.start()
259 | #self.get_bu(soup2, if_write)
260 |
261 |
262 | def get_Indu(self, soup, if_write):
263 | wb = load_workbook(self.xlsxname)
264 | sheet = wb.get_sheet_by_name('Wy')
265 | t_row = sheet.max_row + 1
266 | t_col = 1
267 | if if_write == True:
268 | sheet.cell(row=t_row + 1, column=t_col, value="行业板块")
269 | sheet.cell(row=t_row + 2, column=t_col, value="新闻标题")
270 | sheet.cell(row=t_row + 2, column=t_col + 1, value="新闻链接")
271 | sheet.cell(row=t_row + 2, column=t_col + 2, value="新闻简介")
272 | sheet.cell(row=t_row + 2, column=t_col + 3, value="新闻时间")
273 | t_row = t_row + 3
274 |
275 | datalist = soup.find_all(class_="col_l")
276 | for Newslist in datalist:
277 | News = Newslist.find_all(class_="list_item clearfix")
278 | for newlist in News:
279 | news = newlist.find_all(class_="item_top")
280 | for new in news:
281 | m_new = new.select('h2 a')
282 | m_url = m_new[0]['href']
283 | m_title = m_new[0].get_text()
284 | m_new2 = new.select('p span')
285 | m_time = m_new2[0].get_text()
286 | sheet.cell(row=t_row, column=t_col, value=m_title)
287 | sheet.cell(row=t_row, column=t_col + 1, value=m_url)
288 | sheet.cell(row=t_row, column=t_col + 3, value=m_time)
289 | t_row = t_row + 1
290 | try:
291 | wb.save(self.xlsxname)
292 | except Exception:
293 | print("Wangyi Save Error = 7")
294 |
295 | def getIndustry(self): #行业资讯 前两页
296 | url = 'http://money.163.com/special/00251LJV/hyyj.html'
297 | url2 = 'http://money.163.com/special/00251LJV/hyyj_02.html'
298 |
299 | Industry_data1 = requests.get(url, headers=headers)
300 | soup1 = BeautifulSoup(Industry_data1.text, "lxml")
301 | Industry_data2 = requests.get(url2, headers=headers)
302 | soup2 = BeautifulSoup(Industry_data2.text, "lxml")
303 | if_write = True
304 | t1 = threading.Thread(target=self.get_Indu, args=(soup1, if_write, ))
305 | t1.start()
306 | if_write = False
307 | t1.join()
308 | t2 = threading.Thread(target=self.get_Indu, args=(soup2, if_write, ))
309 | t2.start()
310 | t2.join()
311 | #self.get_Indu(soup2, if_write)
312 |
313 | def create_file(self, filename):
314 | wb = Workbook()
315 | ws = wb['Sheet']
316 | wb.remove(ws)
317 | sheet = wb.create_sheet("Wy")
318 | try:
319 | wb.save(filename)
320 | except Exception:
321 | print("Wy create_error = 1")
322 |
323 | def main(self, file_name):
324 | self.xlsxname = file_name
325 | Wy.request()
326 | Wy.getTopNew()
327 | Wy.getlist2()
328 | #stock
329 | Wy.getstock()
330 | Wy.getindex() # 主页原创栏目右边
331 | Wy.getBusiness()
332 | Wy.getIndustry()
333 |
334 | Wy = WangYi()
335 |
--------------------------------------------------------------------------------
/src/Xhs_Finance.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from openpyxl import load_workbook
4 |
5 | headers = {
6 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
7 | }
8 |
9 | class XinHuaNet(object):
10 | def __init__(self):
11 | pass
12 |
13 | def request(self):
14 | self.url = 'http://www.xinhuanet.com/fortunepro/'
15 | for ll in range(3):
16 | try:
17 | self.data = requests.get(self.url, headers=headers, timeout=120)
18 | if self.data.status_code == 200:
19 | break
20 | except Exception as e:
21 | pass
22 |
23 | self.data.encoding = "utf-8"
24 | self.soup = BeautifulSoup(self.data.text, "lxml")
25 |
26 |
27 | def getTopNew(self):
28 | wb = load_workbook(self.xlsxname)
29 | sheet = wb.create_sheet("Xhs")
30 | t_row = 1
31 | t_col = 1
32 |
33 | sheet.cell(row=t_row, column=t_col, value="财经TOP10")
34 | t_row = t_row + 1
35 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
36 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
37 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
38 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
39 | t_row = t_row + 1
40 |
41 | datalist = self.soup.find_all(class_='cjtop')
42 | for newlist in datalist:
43 | news = newlist.find_all('a')
44 | for m_new in news:
45 | m_href = m_new['href']
46 | m_title = m_new.get_text()
47 | sheet.cell(row=t_row, column=t_col, value=m_title)
48 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
49 | t_row = t_row + 1
50 |
51 | try:
52 | wb.save(self.xlsxname)
53 | except Exception:
54 | print("Xhs Save Error = 1")
55 |
56 | def getnews(self):
57 | wb = load_workbook(self.xlsxname)
58 | sheet = wb.get_sheet_by_name("Xhs")
59 | t_row = sheet.max_row + 2
60 | t_col = 1
61 | sheet.cell(row=t_row, column=t_col, value="财经新闻")
62 | t_row = t_row + 1
63 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
64 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
65 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
66 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
67 | t_row = t_row + 1
68 | datalist = self.soup.find_all(class_='xpage-content-list')
69 | for data in datalist:
70 | news = data.find_all('a')
71 | for m_new in news:
72 | m_href = m_new['href']
73 | m_title = m_new.get_text()
74 | if m_title == "":
75 | continue
76 | sheet.cell(row=t_row, column=t_col, value=m_title)
77 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
78 | t_row = t_row + 1
79 | try:
80 | wb.save(self.xlsxname)
81 | except Exception:
82 | print("Xhs Save Error = 2")
83 |
84 |
85 | def get_research_report(self):
86 | wb = load_workbook(self.xlsxname)
87 | sheet = wb.get_sheet_by_name("Xhs")
88 | t_row = sheet.max_row + 2
89 | t_col = 1
90 | sheet.cell(row=t_row, column=t_col, value="行业研报")
91 | t_row = t_row + 1
92 | sheet.cell(row=t_row, column=t_col, value="新闻标题")
93 | sheet.cell(row=t_row, column=t_col + 1, value="新闻链接")
94 | sheet.cell(row=t_row, column=t_col + 2, value="新闻简介")
95 | sheet.cell(row=t_row, column=t_col + 3, value="新闻时间")
96 | t_row = t_row + 1
97 | datalist = self.soup.find_all(class_='rtlist')
98 | for data in datalist:
99 | news = data.find_all('a')
100 | for m_new in news:
101 | m_href = m_new['href']
102 | m_title = m_new.get_text()
103 | if len(m_title) <= 4:
104 | continue
105 | sheet.cell(row=t_row, column=t_col, value=m_title)
106 | sheet.cell(row=t_row, column=t_col + 1, value=m_href)
107 | t_row = t_row + 1
108 |
109 | try:
110 | wb.save(self.xlsxname)
111 | except Exception:
112 | print("Xhs Save Error = 3")
113 |
114 |
115 | def main(self, file_name):
116 | self.xlsxname = file_name
117 | Xhs.request()
118 | Xhs.getTopNew()
119 | Xhs.getnews()
120 | Xhs.get_research_report()
121 |
122 |
123 | Xhs = XinHuaNet()
124 |
--------------------------------------------------------------------------------
/src/Xq_Community.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import xlrd, xlwt
4 | import xlutils
5 | import json
6 | from requests.cookies import RequestsCookieJar
7 |
8 | headers = {
9 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
10 | #'Cookie': 'xqat=3e14cc861fdd960a5d84e7316165286b1bfeafe3;',
11 | }
12 |
13 | class XueQiu(object):
14 | def __init__(self, file_name):
15 | self.xlsxname = file_name
16 |
17 |
18 | def Style(self):
19 | font = xlwt.Font() # 内容字体
20 | font2 = xlwt.Font() # 标题字体
21 | font3 = xlwt.Font() # 指数
22 | font.height = 20 * 11
23 | font2.height = 20 * 12
24 | font2.bold = True
25 | font3.height = 20 * 13
26 | self.style = xlwt.XFStyle() #标题 链接字体
27 | self.style_head = xlwt.XFStyle() #类别列字体
28 | self.style_index = xlwt.XFStyle() #指数字体
29 |
30 | self.style.font = font
31 | self.style_head.font = font2
32 | self.style_index.font = font3
33 |
34 | def Today(self):
35 | url = 'https://xueqiu.com/'
36 | session = requests.session()
37 | data = session.get(url, headers=headers)
38 | soup = BeautifulSoup(data.text, "lxml")
39 | """
40 | #取不到链接 只有json 后面考虑加不加入
41 | #url = xueqiu.com
42 | #data_url = xueqiu.com/today
43 | session = requests.session()
44 | session.get(url, headers=headers)
45 | resp = session.get(data_url, headers=headers).json()
46 | for items in resp['items']:
47 | title = items['original_status']['title']
48 | description = items['original_status']['description']
49 | keyword = items['original_status']['text']
50 |
51 | def get_SelfStock(self):
52 | url_list = list()
53 | with open("Code.txt", "r") as f:
54 | for line in f.readlines():
55 | line = line.strip('\n') # 去掉列表中每一个元素的换行符
56 | sep = '#'
57 | line = line.split(sep, 1)[0]
58 | if line != '':
59 | #url = 'https://xueqiu.com/S/{}'.format(line)
60 | url = 'https://stock.xueqiu.com/v5/stock/batch/quote.json?extend=detail&is_delay_ft=1&is_delay_hk=0&symbol={}'.format(line)
61 | url_list.append(url)
62 |
63 | url = 'https://xueqiu.com'
64 | session = requests.session()
65 | session.get(url, headers=headers)
66 | for url in url_list:
67 | resp = session.get(url, headers=headers)
68 | data = json.loads(resp.text)
69 | self.Deal_Xq_data()
70 | print(data)
71 | #data = requests.get(url, headers=headers)
72 |
73 | """
74 |
75 | def main(self, file_name):
76 | Xq = XueQiu(file_name)
77 | Xq.Style()
78 | #Xq.Today()
79 | #Xq.get_SelfStock()
80 |
81 |
--------------------------------------------------------------------------------
/src/search_us.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import os
4 | import time
5 |
6 | """
7 | POST https://www.en
8 | tobit.cn/trending/top/getWeiboRankSearch.do HTTP/1.1
9 | Host: www.entobit.cn
10 | Connection: keep-alive
11 | Content-Length: 33
12 | sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
13 | Accept: application/json, text/plain, */*
14 | Content-Type: application/x-www-form-urlencoded
15 | sec-ch-ua-mobile: ?0
16 | type: restful
17 | User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
18 | sec-ch-ua-platform: "Windows"
19 | Origin: https://www.entobit.cn
20 | Sec-Fetch-Site: same-origin
21 | Sec-Fetch-Mode: cors
22 | Sec-Fetch-Dest: empty
23 | Referer: https://www.ent
24 | obit.cn/hot-search/nav/home?accessToken=Cjr8dOf3BVcz6kQELQ24/SHCztOETa%2BGJuB%2BR4c2SaWErDW6BR7ZBuKo5idM1TWHQ2YHNf6GjPn6Vxb971zAPw==&bindPhone=false&isIos=&isWx=
25 | Accept-Encoding: gzip, deflate, br
26 | Accept-Language: zh-CN,zh;q=0.9
27 |
28 | keyword=%E7%BE%8E%E5%85%83&from=1
29 |
30 | HTTP/1.1 200 OK
31 | Server: nginx/1.18.0
32 | Date: Thu, 11 May 2023 08:52:42 GMT
33 | Content-Type: application/json;charset=UTF-8
34 | Content-Length: 6209
35 | Connection: keep-alive
36 |
37 | {"rows":[{"duration":9840,"searchNums":29164,"keywords":"莫兰特损失4000万美元","updateTime":1683794940,"type":"realTimeHotSpots","topRanking":21,"url":"https://s.weibo.com/weibo?q=%23%E8%8E%AB%E5%85%B0%E7%89%B9%E6%8D%9F%E5%A4%B14000%E4%B8%87%E7%BE%8E%E5%85%83%23","firstRankingTime":1683785700},{"duration":82260,"searchNums":1494363,"keywords":"特朗普性侵罪成 向女作家赔500万美元","updateTime":1683761340,"type":"toutiao","topRanking":1,"url":"https://m.toutiao.com/search/?keyword=特朗普性侵罪成 向女作家赔500万美元&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683675000},{"duration":3720,"searchNums":37860,"keywords":"美国宣布再向乌提供12亿美元军援","updateTime":1683737160,"type":"toutiao","topRanking":35,"url":"https://m.toutiao.com/search/?keyword=美国宣布再向乌提供12亿美元军援&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683733140},{"duration":6480,"searchNums":283486,"keywords":"外媒:“去美元化”已成当下趋势","updateTime":1683712200,"type":"toutiao","topRanking":8,"url":"https://m.toutiao.com/search/?keyword=外媒:“去美元化”已成当下趋势&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683700200},{"duration":12180,"searchNums":8581651,"keywords":"美将向乌提供12亿美元军事援助","updateTime":1683676080,"type":"kwai","topRanking":1,"url":"","firstRankingTime":1683614220},{"duration":18660,"searchNums":22312,"keywords":"崩铁希儿吸金近三千万美元","updateTime":1683606420,"type":"realTimeHotSpots","topRanking":21,"url":"https://s.weibo.com/weibo?q=%23%E5%B4%A9%E9%93%81%E5%B8%8C%E5%84%BF%E5%90%B8%E9%87%91%E8%BF%91%E4%B8%89%E5%8D%83%E4%B8%87%E7%BE%8E%E5%85%83%23","firstRankingTime":1683587820},{"duration":2460,"searchNums":164917,"keywords":"沙特将向苏丹提供价值1亿美元援助","updateTime":1683527040,"type":"toutiao","topRanking":31,"url":"https://m.toutiao.com/search/?keyword=沙特将向苏丹提供价值1亿美元援助&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683517140},{"duration":5100,"searchNums":2465723,"keywords":"美媒:好莱坞编剧罢工损失超100亿美元","updateTime":1683526800,"type":"baidu","topRanking":6,"url":"https://www.baidu.com/s?wd=%E7%BE%8E%E5%AA%92%3A%E5%A5%BD%E8%8E%B1%E5%9D%9E%E7%BC%96%E5%89%A7%E7%BD%A2%E5%B7%A5%E6%8D%9F%E5%A4%B1%E8%B6%85100%E4%BA%BF%E7%BE%8E%E5%85%83&sa=fyb_news&rsv_dl=fyb_news","firstRankingTime":1683506100},{"duration":58020,"searchNums":2320820,"keywords":"截至4月末外汇储备规模32048亿美元","updateTime":1683512820,"type":"toutiao","topRanking":2,"url":"https://m.toutiao.com/search/?keyword=截至4月末外汇储备规模32048亿美元&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683429660},{"duration":11160,"searchNums":52850,"keywords":"2023年全球票房破100亿美元","updateTime":1683455700,"type":"toutiao","topRanking":8,"url":"https://m.toutiao.com/search/?keyword=2023年全球票房破100亿美元&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683442080},{"duration":20520,"searchNums":24139,"keywords":"国际奥委会向中国奥委会捐赠1040万美元","updateTime":1683455640,"type":"realTimeHotSpots","topRanking":21,"url":"https://s.weibo.com/weibo?q=%23%E5%9B%BD%E9%99%85%E5%A5%A5%E5%A7%94%E4%BC%9A%E5%90%91%E4%B8%AD%E5%9B%BD%E5%A5%A5%E5%A7%94%E4%BC%9A%E6%8D%90%E8%B5%A01040%E4%B8%87%E7%BE%8E%E5%85%83%23","firstRankingTime":1683434700},{"duration":480,"searchNums":137737,"keywords":"巴菲特:美元未来不一定是储备货币","updateTime":1683436680,"type":"toutiao","topRanking":43,"url":"https://m.toutiao.com/search/?keyword=巴菲特:美元未来不一定是储备货币&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683436260},{"duration":11340,"searchNums":6937663,"keywords":"拜登政府对台提供5亿美元军援","updateTime":1683435360,"type":"kwai","topRanking":6,"url":"","firstRankingTime":1683359460},{"duration":660,"searchNums":21969,"keywords":"2023全球票房破100亿美元","updateTime":1683432660,"type":"realTimeHotSpots","topRanking":63,"url":"https://s.weibo.com/weibo?q=%232023%E5%85%A8%E7%90%83%E7%A5%A8%E6%88%BF%E7%A0%B4100%E4%BA%BF%E7%BE%8E%E5%85%83%23","firstRankingTime":1683430980},{"duration":41580,"searchNums":206584,"keywords":"中国奥委会获捐1040万美元","updateTime":1683418440,"type":"toutiao","topRanking":1,"url":"https://m.toutiao.com/search/?keyword=中国奥委会获捐1040万美元&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683376920},{"duration":28800,"searchNums":41653,"keywords":"外媒:美拟向台湾提供价值5亿美元武器","updateTime":1683416400,"type":"toutiao","topRanking":18,"url":"https://m.toutiao.com/search/?keyword=外媒:美拟向台湾提供价值5亿美元武器&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683350700},{"duration":2820,"searchNums":18464,"keywords":"4月中国游戏厂商全球吸金20亿美元","updateTime":1683368460,"type":"realTimeHotSpots","topRanking":63,"url":"https://s.weibo.com/weibo?q=%234%E6%9C%88%E4%B8%AD%E5%9B%BD%E6%B8%B8%E6%88%8F%E5%8E%82%E5%95%86%E5%85%A8%E7%90%83%E5%90%B8%E9%87%9120%E4%BA%BF%E7%BE%8E%E5%85%83%23","firstRankingTime":1683364500},{"duration":2700,"searchNums":0,"keywords":"Intel净亏损28亿美元","updateTime":1683356400,"type":"bilibili","topRanking":19,"url":"https://search.bilibili.com/all?keyword=Intel%E5%87%80%E4%BA%8F%E6%8D%9F28%E4%BA%BF%E7%BE%8E%E5%85%83&from_source=webtop_search&spm_id_from=333.851","firstRankingTime":1683353700},{"duration":2880,"searchNums":7215648,"keywords":"各国放弃美元怎么办","updateTime":1683355320,"type":"kwai","topRanking":33,"url":"","firstRankingTime":1683348540},{"duration":1440,"searchNums":295947,"keywords":"是否担心多国去美元化?白宫回应","updateTime":1683354180,"type":"toutiao","topRanking":19,"url":"https://m.toutiao.com/search/?keyword=是否担心多国去美元化?白宫回应&pd=synthesis&source=trending_list&traffic_source=","firstRankingTime":1683350160}],"total":3119}
38 | """
39 | session = requests.Session()
40 |
41 | headers = {
42 | 'Host': 'www.ent'
43 | 'obit.cn',
44 | 'Connection': 'keep-alive',
45 | 'Accept': 'application/json, text/plain, */*',
46 | 'Content-Type': 'application/x-www-form-urlencoded',
47 | 'Content-Length': '33',
48 | # 'Referer': 'https://www.entobit.cn/hot-search/nav/home?accessToken=Cjr8dOf3BVcz6kQELQ24/SHCztOETa%2BGJuB%2BR4c2SaWErDW6BR7ZBuKo5idM1TWHQ2YHNf6GjPn6Vxb971zAPw==&bindPhone=false&isIos=&isWx=',
49 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
50 | }
51 | def request(text, num):
52 | url = 'https://www.ento' \
53 | 'bit.cn/trending/top/getWeiboRankSearch.do'
54 | key = {
55 | 'keyword': '{0}'.format(text),
56 | 'from': '{0}'.format(num)
57 | }
58 |
59 | for _ in range(3):
60 | try:
61 | res = session.post(url, headers=headers, data=key, timeout=(20, 30))
62 | if res.status_code == 200:
63 | if res.text == "{\"rows\":[],\"total\":23}":
64 | print("time 10000")
65 | time.sleep(10000)
66 | with open("./{0}.txt".format(text), "a") as file:
67 | file.write(res.text + '\n')
68 | return True
69 | except Exception as e:
70 | print(e)
71 |
72 | return False
73 |
74 |
75 | if __name__ == '__main__':
76 | num = 1
77 | text = 'NFT'
78 | while True:
79 | print("cur = ", num)
80 | ret = request(text, num)
81 | time.sleep(5)
82 | if ret == False:
83 | print("error: ", num)
84 | time.sleep(10)
85 |
86 | num = num + 1
87 |
--------------------------------------------------------------------------------