├── .project
├── .pydevproject
├── .settings
└── org.eclipse.core.resources.prefs
├── README.md
└── src
├── base_info.py
├── finmain.py
├── getcookies.py
├── hq.py
├── kline.py
├── test_tushare.py
└── xueqiu_test.py
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | stock
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 | Default
4 | python interpreter
5 |
6 | /${PROJECT_DIR_NAME}/src
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/base_info.py=UTF-8
3 | encoding//src/finmain.py=UTF-8
4 | encoding//src/getcookies.py=UTF-8
5 | encoding//src/hello.py=UTF-8
6 | encoding//src/hq.py=UTF-8
7 | encoding//src/test_tushare.py=utf-8
8 | encoding//src/xueqiu_test.py=UTF-8
9 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # stock
2 | 使用python爬取雪球网站上的股票数据:股票代码、基础数据、财务数据、日线数据、除权信息等等,存储到本地mysql数据库,方便数据分析。
3 |
--------------------------------------------------------------------------------
/src/base_info.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | import pymysql
4 | from http.cookiejar import Cookie
5 | #下面三行为getcookie添加
6 | import os
7 | import sqlite3
8 | from win32.win32crypt import CryptUnprotectData
9 | #降低爬网速度
10 | import time
11 | from pymysql.times import Timestamp
12 | from goto import with_goto
13 | #import win32crypt
14 |
15 | from goto import with_goto
16 |
17 |
18 |
19 | def getcookiefromchrome(host='.xueqiu.com'):
20 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
21 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
22 | with sqlite3.connect(cookiepath) as conn:
23 | cu=conn.cursor()
24 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
25 | #print(cookies)
26 | return cookies
27 |
28 |
29 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次
30 | #
31 | class mysql_conn(object):
32 | # 魔术方法, 初始化, 构造函数
33 | def __init__(self):
34 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" )
35 | self.cursor = self.db.cursor()
36 | # 执行modify(修改)相关的操作
37 | def execute_modify_mysql(self, sql):
38 | self.cursor.execute(sql)
39 | self.db.commit()
40 | # 获取查询结果集
41 | def execute_select(self,sql):
42 | result=[]
43 | self.cursor.execute(sql)
44 | result = self.cursor.fetchall()
45 | return result
46 |
47 | # 魔术方法, 析构化 ,析构函数
48 | def __del__(self):
49 | self.cursor.close()
50 | self.db.close()
51 |
52 |
53 |
54 |
55 | # 因为不能访问, 所以我们加个头试试
56 | headers = {
57 | 'Accept': 'application/json, text/javascript, */*; q=0.01',
58 | 'Accept-Encoding': 'gzip, deflate, br',
59 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
60 | 'cache-control': 'no-cache',
61 | 'Connection': 'keep-alive',
62 | #'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722',
63 | #'Cookie':'Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538206459;Hm_lvt_1db88642e346389874251b5a1eded6e3=1538120732,1538144989,1538206010,1538206459', '_ga': 'GA1.2.1715450264.1532421729', '_gid': 'GA1.2.1367684432.1537964115', 'bid': '9bff933477b6c75b2ff40032e613edb6_jjzgesh9', 'device_id': '61f5d2eff7db22470fda980ead33cda9', 'remember': '1', 'remember.sig': 'K4F3faYzmVuqC0iXIERCQf55g2Y', 's': 'er17v6p058', 'snbim_minify': 'true', 'u': '1781168269', 'u.sig': 'cMmZfQkGyfjC5lehGsI4jsHDp-w', 'xq_a_token': '8a8848e34abe1b04ab2fb720b9d124b2368ec1b4', 'xq_a_token.sig': 'gUjJ-JIAMsQ2dcAIqZKMZbpclYU', 'xq_is_login': '1', 'xq_is_login.sig': 'J3LxgPVPUzbBg3Kee_PquUfih7Q', 'xq_r_token': '2827c657061f1072f18dd4208a8e548799fdf31b', 'xq_r_token.sig': 'y3_9YXXKVvXnZeppIJoOCI923S4'}'
64 | 'Cookie':'device_id=61f5d2eff7db22470fda980ead33cda9; _ga=GA1.2.1715450264.1532421729; s=er17v6p058; bid=9bff933477b6c75b2ff40032e613edb6_jjzgesh9; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=8a8848e34abe1b04ab2fb720b9d124b2368ec1b4; xq_a_token.sig=gUjJ-JIAMsQ2dcAIqZKMZbpclYU; xq_r_token=2827c657061f1072f18dd4208a8e548799fdf31b; xq_r_token.sig=y3_9YXXKVvXnZeppIJoOCI923S4; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1781168269; u.sig=cMmZfQkGyfjC5lehGsI4jsHDp-w; _gid=GA1.2.1367684432.1537964115; aliyungf_tc=AQAAAHiOFxzQvAkAnHQGcEZiz5DUijt0; snbim_minify=true; __utmc=1; __utmz=1.1538233687.73.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_1db88642e346389874251b5a1eded6e3=1538233575,1538233656,1538233685,1538276572; __utma=1.1715450264.1532421729.1538270997.1538281139.76; __utmt=1; __utmb=1.23.10.1538281139; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538283036',
65 | 'Host': 'xueqiu.com',
66 | 'Referer': 'https://xueqiu.com/S',
67 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
68 | 'X-Requested-With': 'XMLHttpRequest'
69 | }
70 |
71 | # urllib 的相关操作如下
72 | url = 'https://xueqiu.com/stock/f10/compinfo.json?symbol={symbol}&page=1&size=4&_={timestamp}'
73 |
74 | #股票列表
75 | #https://xueqiu.com/stock/cata/stocklist.json?page=1&size=30&order=desc&orderby=percent&type=11%2C12&_=1538234389909
76 |
77 | #股票基础信息
78 | #"https://xueqiu.com/stock/f10/compinfo.json?symbol=SZ000001"
79 |
80 | #连接数据库
81 | mc = mysql_conn()
82 | stock_symbol_list = mc.execute_select('select symbol from stocks order by symbol asc')
83 | stock_count=0
84 |
85 |
86 | #获取股票基础信息
87 | for (stock_symbol,) in stock_symbol_list:
88 |
89 | stock_count=stock_count+1
90 | timestp=int(round(time.time() * 1000))
91 | #降低速度
92 | #time.sleep(0.3)
93 |
94 | print('第%d支股票,%s'%(stock_count,stock_symbol))
95 | #print(url.format(symbol=stock_symbol,timestamp=timestp))
96 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers)
97 | #response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
98 | #print(response.status_code)
99 | #重复读取,直到成功
100 | while response.status_code != requests.codes.ok:
101 | print('重复读取第%d支股票,%s'%(stock_count,stock_symbol))
102 | time.sleep(0.3)
103 | timestp=int(round(time.time() * 1000))
104 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers)
105 | else:
106 | #存储数据
107 | res_dict = json.loads(response.text)
108 | #continue
109 | #print(res_dict)
110 |
111 | compinfo = res_dict['tqCompInfo']
112 | #print(compinfo)
113 | #
114 | data = {}
115 |
116 | #data_dict = json.loads(data_str)
117 | data['compcode'] = stock_symbol
118 | data['compname'] = compinfo['compname']
119 | data['engname'] = compinfo['engname']
120 | data['founddate'] = compinfo['founddate']
121 | data['regcapital'] = compinfo['regcapital']
122 | data['chairman'] = compinfo['chairman']
123 | data['manager'] = compinfo['manager']
124 | data['leconstant'] = compinfo['leconstant']
125 | data['accfirm'] = compinfo['accfirm']
126 | data['regaddr'] = compinfo['regaddr']
127 | data['officeaddr'] = compinfo['officeaddr']
128 | data['compintro'] = compinfo['compintro'].replace('"',' ')
129 | data['bizscope'] = compinfo['bizscope'].replace('"',' ')
130 | data['majorbiz'] = compinfo['majorbiz'].replace('"',' ')
131 | data['compsname'] = compinfo['compsname']
132 | data['region'] = compinfo['region']
133 | #print(data)
134 | try:
135 | sql = 'insert into comp(compname,engname,founddate,regcapital,chairman,manager,leconstant,accfirm,regaddr,officeaddr,compintro,bizscope,majorbiz,compcode,compsname,region) \
136 | values("{compname}","{engname}","{founddate}","{regcapital}","{chairman}","{manager}","{leconstant}","{accfirm}","{regaddr}","{officeaddr}","{compintro}","{bizscope}","{majorbiz}","{compcode}","{compsname}","{region}")\
137 | on duplicate key update compname="{compname}",engname="{engname}",regcapital="{regcapital}",chairman="{chairman}",manager="{manager}",leconstant="{leconstant}",accfirm="{accfirm}",regaddr="{regaddr}",\
138 | officeaddr="{officeaddr}",compintro="{compintro}",bizscope="{bizscope}",majorbiz="{majorbiz}",compsname="{compsname}",region="{region}",timestamp=CURRENT_TIMESTAMP'.format(**data)
139 | mc.execute_modify_mysql(sql)
140 | print('%s*%s 爬取成功' %(data['compcode'],data['compsname']))
141 | #print('-' * 50)
142 | except Exception as e :
143 | print('以上内容出错,没有存到数据库')
144 | print('-' * 50)
145 | print(e)
146 |
--------------------------------------------------------------------------------
/src/finmain.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | import pymysql
4 | from http.cookiejar import Cookie
5 | #下面三行为getcookie添加
6 | import os
7 | import sqlite3
8 | from win32.win32crypt import CryptUnprotectData
9 | #降低爬网速度
10 | import time
11 | from pymysql.times import Timestamp
12 |
13 | def getcookiefromchrome(host='.xueqiu.com'):
14 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
15 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
16 | with sqlite3.connect(cookiepath) as conn:
17 | cu=conn.cursor()
18 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
19 | #print(cookies)
20 | return cookies
21 |
22 |
23 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次
24 | #
25 | class mysql_conn(object):
26 | # 魔术方法, 初始化, 构造函数
27 | def __init__(self):
28 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" )
29 | self.cursor = self.db.cursor()
30 | # 执行modify(修改)相关的操作
31 | def execute_modify_mysql(self, sql):
32 | self.cursor.execute(sql)
33 | self.db.commit()
34 | # 获取查询结果集
35 | def execute_select(self,sql):
36 | result=[]
37 | self.cursor.execute(sql)
38 | result = self.cursor.fetchall()
39 | return result
40 |
41 | # 魔术方法, 析构化 ,析构函数
42 | def __del__(self):
43 | self.cursor.close()
44 | self.db.close()
45 |
46 | # 因为不能访问, 所以我们加个头试试
47 | headers = {
48 | 'Accept': 'application/json, text/javascript, */*; q=0.01',
49 | 'Accept-Encoding': 'gzip, deflate, br',
50 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
51 | 'cache-control': 'no-cache',
52 | 'Connection': 'keep-alive',
53 | 'Host': 'xueqiu.com',
54 | 'Referer': 'https://xueqiu.com/S',
55 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
56 | 'X-Requested-With': 'XMLHttpRequest'
57 | }
58 |
59 | # urllib 的相关操作如下
60 | url = 'https://xueqiu.com/stock/f10/finmainindex.json?symbol={symbol}&page=1&size=100_={timestamp}'
61 |
62 | ##股票基础信息
63 | #'https://xueqiu.com/stock/f10/compinfo.json?symbol={symbol}&page=1&size=4&_={timestamp}'
64 |
65 | #股票列表
66 | #https://xueqiu.com/stock/cata/stocklist.json?page=1&size=30&order=desc&orderby=percent&type=11%2C12&_=1538234389909
67 |
68 | #连接数据库
69 | mc = mysql_conn()
70 | stock_symbol_list = mc.execute_select('select a.symbol from stocks a LEFT JOIN finmain_log b on a.symbol=b.compcode where b.compcode is null order by a.symbol asc')
71 | stock_count=0
72 |
73 | #获取股票主要财务信息
74 | for (stock_symbol,) in stock_symbol_list:
75 |
76 | stock_count=stock_count+1
77 | timestp=int(round(time.time() * 1000))
78 | #降低速度
79 | #time.sleep(0.3)
80 |
81 | print('第%d支股票,%s'%(stock_count,stock_symbol))
82 |
83 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
84 |
85 | #重复读取,直到成功
86 | while response.status_code != requests.codes.ok:
87 | print('重复读取第%d支股票,%s'%(stock_count,stock_symbol))
88 | time.sleep(0.3)
89 | timestp=int(round(time.time() * 1000))
90 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
91 |
92 | else:
93 | pass
94 | #存储数据
95 | res_dict = json.loads(response.text.replace('null', '0'))
96 | reps = res_dict['list']
97 | #print(reps)
98 | #
99 | data = {}
100 | for reps_item in reps:
101 | try:
102 | #用股票代码替换雪球内部编号
103 | reps_item['compcode']=stock_symbol
104 | sql = 'insert into finmain(compcode,reportdate,basiceps,epsdiluted,epsweighted,naps,opercashpershare,peropecashpershare,netassgrowrate,dilutedroe,weightedroe,mainbusincgrowrate,netincgrowrate,totassgrowrate,salegrossprofitrto,mainbusiincome,mainbusiprofit,totprofit,netprofit,totalassets,totalliab,totsharequi,operrevenue,invnetcashflow,finnetcflow,chgexchgchgs,cashnetr,cashequfinbal)\
105 | values("{compcode}","{reportdate}","{basiceps}","{epsdiluted}","{epsweighted}","{naps}","{opercashpershare}","{peropecashpershare}","{netassgrowrate}","{dilutedroe}","{weightedroe}","{mainbusincgrowrate}","{netincgrowrate}","{totassgrowrate}","{salegrossprofitrto}","{mainbusiincome}","{mainbusiprofit}","{totprofit}","{netprofit}","{totalassets}","{totalliab}","{totsharequi}","{operrevenue}","{invnetcashflow}","{finnetcflow}","{chgexchgchgs}","{cashnetr}","{cashequfinbal}")\
106 | on duplicate key update basiceps="{basiceps}",epsdiluted="{epsdiluted}",epsweighted="{epsweighted}",naps="{naps}",opercashpershare="{opercashpershare}",peropecashpershare="{peropecashpershare}",netassgrowrate="{netassgrowrate}",dilutedroe="{dilutedroe}",weightedroe="{weightedroe}",mainbusincgrowrate="{mainbusincgrowrate}",netincgrowrate="{netincgrowrate}",totassgrowrate="{totassgrowrate}",salegrossprofitrto="{salegrossprofitrto}",mainbusiincome="{mainbusiincome}",mainbusiprofit="{mainbusiprofit}",totprofit="{totprofit}",netprofit="{netprofit}",totalassets="{totalassets}",totalliab="{totalliab}",totsharequi="{totsharequi}",operrevenue="{operrevenue}",invnetcashflow="{invnetcashflow}",finnetcflow="{finnetcflow}",chgexchgchgs="{chgexchgchgs}",cashnetr="{cashnetr}",cashequfinbal="{cashequfinbal}"'.format(**reps_item)
107 | #print(sql)
108 | mc.execute_modify_mysql(sql)
109 | print('%s*%s*财报 爬取成功' %(stock_symbol,reps_item['reportdate']))
110 | sql = 'insert into finmain_log (compcode,reportdate,timestamp) \
111 | values("{compcode}","{reportdate}",CURRENT_TIMESTAMP)'.format(**reps_item)
112 | mc.execute_modify_mysql(sql)
113 | except Exception as e :
114 | print('以上内容出错,没有存到数据库')
115 | print(e)
116 | print('-' * 50)
117 | print('-' * 50)
118 |
--------------------------------------------------------------------------------
/src/getcookies.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sqlite3
3 | import requests
4 | from win32.win32crypt import CryptUnprotectData
5 | #import win32crypt
6 |
7 | def getcookiefromchrome(host='.xueqiu.com'):
8 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
9 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
10 | with sqlite3.connect(cookiepath) as conn:
11 | cu=conn.cursor()
12 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
13 | print(cookies)
14 | return cookies
15 |
16 |
17 | #getcookiefromchrome()
18 | #getcookiefromchrome('.baidu.com')
19 |
20 | url='http://www.xueqiu.com/'
21 |
22 | httphead={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',}
23 |
24 |
25 | r=requests.get(url,headers=httphead,cookies=getcookiefromchrome('.xueqiu.com'),allow_redirects=1)
26 | print(r.text)
--------------------------------------------------------------------------------
/src/hq.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | import pymysql
4 | from http.cookiejar import Cookie
5 | #下面三行为getcookie添加
6 | import os
7 | import sqlite3
8 | from win32.win32crypt import CryptUnprotectData
9 | import time
10 | from pymysql.times import Timestamp
11 |
12 | def getcookiefromchrome(host='.xueqiu.com'):
13 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
14 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
15 | with sqlite3.connect(cookiepath) as conn:
16 | cu=conn.cursor()
17 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
18 | #print(cookies)
19 | return cookies
20 |
21 |
22 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次
23 | #
24 | class mysql_conn(object):
25 | # 魔术方法, 初始化, 构造函数
26 | def __init__(self):
27 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" )
28 | self.cursor = self.db.cursor()
29 | # 执行modify(修改)相关的操作
30 | def execute_modify_mysql(self, sql):
31 | self.cursor.execute(sql)
32 | self.db.commit()
33 | # 魔术方法, 析构化 ,析构函数
34 | def __del__(self):
35 | self.cursor.close()
36 | self.db.close()
37 |
38 |
39 |
40 |
41 | # 因为不能访问, 所以我们加个头试试
42 | headers = {
43 | #'Accept': '*/*',
44 | #'Accept-Encoding': 'gzip, deflate, br',
45 | #'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
46 | #'Connection': 'keep-alive',
47 | #'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722',
48 | #'Cookie':'Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538206459;Hm_lvt_1db88642e346389874251b5a1eded6e3=1538120732,1538144989,1538206010,1538206459', '_ga': 'GA1.2.1715450264.1532421729', '_gid': 'GA1.2.1367684432.1537964115', 'bid': '9bff933477b6c75b2ff40032e613edb6_jjzgesh9', 'device_id': '61f5d2eff7db22470fda980ead33cda9', 'remember': '1', 'remember.sig': 'K4F3faYzmVuqC0iXIERCQf55g2Y', 's': 'er17v6p058', 'snbim_minify': 'true', 'u': '1781168269', 'u.sig': 'cMmZfQkGyfjC5lehGsI4jsHDp-w', 'xq_a_token': '8a8848e34abe1b04ab2fb720b9d124b2368ec1b4', 'xq_a_token.sig': 'gUjJ-JIAMsQ2dcAIqZKMZbpclYU', 'xq_is_login': '1', 'xq_is_login.sig': 'J3LxgPVPUzbBg3Kee_PquUfih7Q', 'xq_r_token': '2827c657061f1072f18dd4208a8e548799fdf31b', 'xq_r_token.sig': 'y3_9YXXKVvXnZeppIJoOCI923S4'}'
49 | 'Host': 'xueqiu.com',
50 | 'Referer': 'https://xueqiu.com/hq',
51 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
52 |
53 | #'X-Requested-With': 'XMLHttpRequest',
54 | }
55 |
56 |
57 | url = 'https://xueqiu.com/stock/quote_order.json?page={page}&size=90&order=asc&exchange=CN&stockType={stocktype}&column=symbol%2Cname%2Ccurrent%2Cchg%2Cpercent%2Clast_close%2Copen%2Chigh%2Clow%2Cvolume%2Camount%2Cmarket_capital%2Cpe_ttm%2Chigh52w%2Clow52w%2Chasexist&orderBy=symbol&_={timestamp}'
58 |
59 | #https://xueqiu.com/stock/quote_order.json?page=1&size=90&order=desc&exchange=CN&stockType=sza&column=symbol%2Cname%2Ccurrent%2Cchg%2Cpercent%2Clast_close%2Copen%2Chigh%2Clow%2Cvolume%2Camount%2Cmarket_capital%2Cpe_ttm%2Chigh52w%2Clow52w%2Chasexist&orderBy=percent&_=1538830252423
60 |
61 | #获取股票代码、实时行情
62 |
63 | data={}
64 | stock_count=0 #计数器
65 | mc = mysql_conn() #数据库连接
66 | for stock_type in ('sha','sza'):
67 | timestp=int(round(time.time() * 1000))
68 | response = requests.get(url.format(page=1,stocktype=stock_type,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
69 | res_dict = json.loads(response.text)
70 |
71 | stock_count=0 #计数器
72 | #股票数量计数
73 | count_val = int(res_dict['count'])
74 | pagemax=count_val//90
75 | #count计数为json时使用
76 | #count_val = res_dict['count']
77 |
78 |
79 | for p in range(1,pagemax+2):
80 | timestp=int(round(time.time() * 1000))
81 | response = requests.get(url.format(page=p,stocktype=stock_type,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
82 | while response.status_code != requests.codes.ok:
83 | print('重复读取第%d页数据'%(p))
84 | time.sleep(0.3)
85 | timestp=int(round(time.time() * 1000))
86 | response = requests.get(url.format(page=p,stocktype=stock_type,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
87 |
88 | else:
89 | pass
90 |
91 | print('page--%d %s ' %(p,stock_type))
92 | #为空替换为0,防止出错
93 | res_dict = json.loads(response.text.replace('null', '0'))
94 | #print(res_dict)
95 | stock_list = res_dict['data']
96 | for stock_list_item in stock_list:
97 | #字段为数字
98 | data['symbol']=stock_list_item[0]
99 | data['code']=stock_list_item[0]
100 | data['name'] = stock_list_item[1]
101 | data['current']=stock_list_item[2]
102 | data['percent']=stock_list_item[4]
103 | data['high52w'] = stock_list_item[13]
104 | data['low52w'] = stock_list_item[14]
105 | data['marketcapital']=stock_list_item[11]
106 | data['amount']=stock_list_item[10]
107 | data['volume']=stock_list_item[9]
108 | data['pe_ttm']=stock_list_item[12]
109 |
110 | try:
111 | sql = 'insert into stocks(symbol,compcode,compsname,current,percent,high52w,low52w,marketcapital,amount,volume,pe_ttm) \
112 | values("{symbol}","{code}","{name}","{current}","{percent}","{high52w}","{low52w}","{marketcapital}","{amount}","{volume}","{pe_ttm}")\
113 | on duplicate key update current="{current}",percent="{percent}",high52w="{high52w}",low52w="{low52w}",marketcapital="{marketcapital}",amount="{amount}",volume="{volume}",pe_ttm="{pe_ttm}",timestamp=CURRENT_TIMESTAMP'.format(**data)
114 |
115 | mc.execute_modify_mysql(sql)
116 | stock_count=stock_count+1
117 | print('%s*共计%d支,获取第%d支——%s*%s 爬取成功' %(stock_type,count_val,stock_count,data['code'],data['name']))
118 | #print('-' * 50)
119 | except Exception as e :
120 | print('以上内容出错,没有存到数据库')
121 | print('-' * 50)
122 | print(e)
123 | #关闭数据库连接
124 | del mc
125 |
--------------------------------------------------------------------------------
/src/kline.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | import pymysql
4 | from http.cookiejar import Cookie
5 | #下面三行为getcookie添加
6 | import os
7 | import sqlite3
8 | from win32.win32crypt import CryptUnprotectData
9 | import time
10 | from pymysql.times import Timestamp
11 | from matplotlib._constrained_layout import _in_same_column
12 |
13 | def getcookiefromchrome(host='.xueqiu.com'):
14 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
15 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
16 | with sqlite3.connect(cookiepath) as conn:
17 | cu=conn.cursor()
18 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
19 | #print(cookies)
20 | return cookies
21 |
22 |
23 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次
24 | #
25 | class mysql_conn(object):
26 | # 魔术方法, 初始化, 构造函数
27 | def __init__(self):
28 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" )
29 | self.cursor = self.db.cursor()
30 | # 执行modify(修改)相关的操作
31 | def execute_modify_mysql(self, sql):
32 | self.cursor.execute(sql)
33 | def execute_commit(self):
34 | self.db.commit()
35 | # 获取查询结果集
36 | def execute_select(self,sql):
37 | result=[]
38 | self.cursor.execute(sql)
39 | result = self.cursor.fetchall()
40 | return result
41 | # 魔术方法, 析构化 ,析构函数
42 | def __del__(self):
43 | self.cursor.close()
44 | self.db.close()
45 |
46 |
47 |
48 | headers = {
49 | 'Accept': 'application/json, text/javascript, */*; q=0.01',
50 | 'Accept-Encoding': 'gzip, deflate, br',
51 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
52 | 'cache-control': 'no-cache',
53 | 'Connection': 'keep-alive',
54 | 'Host': 'stock.xueqiu.com',
55 | 'Referer': 'https://xueqiu.com/S',
56 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
57 | 'X-Requested-With': 'XMLHttpRequest'
58 | }
59 |
60 |
61 | #股票日线数据
62 | #symble=股票代码
63 | #begin=开始时间
64 | #end=结束时间
65 | #period=期间--day、week、month、quarter、year
66 | #type=before,afte,normal 前除权,后,不除权
67 | #indicator=kline,K线数据,ma-均线....kline,ma,macd,kdj,boll,rsi,wr,bias,cci,psy'
68 |
69 |
70 | url = 'https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={symbol}&begin=600000000000&end={timestamp}&period=day&type=before&indicator=kline'
71 |
72 |
73 | #获取股票代码、实时行情
74 |
75 | #连接数据库
76 | mc = mysql_conn()
77 | stock_symbol_list = mc.execute_select('select a.symbol from stocks a LEFT JOIN kline_log b on a.symbol=b.symbol where b.symbol is null order by a.symbol asc')
78 | stock_count=0
79 |
80 |
81 | #获取股票基础信息
82 | for (stock_symbol,) in stock_symbol_list:
83 |
84 | stock_count=stock_count+1
85 | timestp=int(round(time.time() * 1000))
86 | #降低速度
87 | #time.sleep(0.3)
88 |
89 | print('第%d支,%s-获取日线数据中...'%(stock_count,stock_symbol))
90 |
91 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
92 | #print(response.status_code)
93 | #重复读取,直到成功
94 | while response.status_code != requests.codes.ok:
95 | print('重复读取第%d支股票,%s'%(stock_count,stock_symbol))
96 | time.sleep(0.3)
97 | timestp=int(round(time.time() * 1000))
98 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
99 | else:
100 | #存储数据
101 | pass
102 |
103 | res_dict = json.loads(response.text)
104 | kline_json = res_dict['data']
105 | error_code=res_dict['error_code']
106 | error_description=res_dict['error_description']
107 |
108 | #print(kline_json['symbol'])
109 | #print(kline_json['column'])
110 | #print(kline_json['item'][0])
111 |
112 | for kline_item in kline_json['item']:
113 | #print(kline_item)
114 | data={}
115 | data['symbol']=stock_symbol
116 | data['timestamp']=kline_item[0]/1000
117 | data['volume']=kline_item[1]
118 | data['open']=round(kline_item[2],2)
119 | data['high']=round(kline_item[3],2)
120 | data['low']=round(kline_item[4],2)
121 | data['close']=round(kline_item[5],2)
122 | data['chg']=round(kline_item[6],2)
123 | data['percent']=round(kline_item[7],2)
124 | data['turnoverrate']=round(kline_item[8],2) #换手率
125 | data['period']='day' #日线
126 | data['type']='before' #前复权
127 |
128 | try:
129 | sql = 'insert into kline(symbol,timestamp,volume,open,high,low,close,chg,percent,turnoverrate,period,type) \
130 | values("{symbol}", from_unixtime("{timestamp}"),"{volume}","{open}","{high}","{low}","{close}","{chg}","{percent}","{turnoverrate}","{period}","{type}") \
131 | on duplicate key update volume="{volume}",open="{open}",high="{high}",low="{low}",close="{chg}",chg="{chg}",percent="{percent}",turnoverrate="{turnoverrate}",period="{period}",type="{type}"'.format(**data)
132 | #print(sql)
133 | mc.execute_modify_mysql(sql)
134 | #print('第%d支——%s %s 日线爬取成功 ' %(stock_count,stock_symbol,time.strftime('%Y-%m-%d',time.localtime(data['timestamp']))))
135 | except Exception as e :
136 | print('以上内容出错,没有存到数据库')
137 | print('-' * 50)
138 | print(e)
139 | #记录日志
140 | sql = 'insert into kline_log (symbol,timestamp) values("%s",CURRENT_TIMESTAMP)'%(stock_symbol)
141 | mc.execute_modify_mysql(sql)
142 | mc.execute_commit() #一支股票数据采集完整一次提交
143 | #关闭数据库连接
144 | print("OVER")
145 | del mc
146 |
--------------------------------------------------------------------------------
/src/test_tushare.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | import matplotlib.gridspec as gridspec
3 |
4 | G = gridspec.GridSpec(3, 3)
5 |
6 | axes_1 = subplot(G[0, :])
7 | xticks([]), yticks([])
8 | text(0.5,0.5, 'Axes 1',ha='center',va='center',size=24,alpha=.5)
9 |
10 | axes_2 = subplot(G[1,:-1])
11 | xticks([]), yticks([])
12 | text(0.5,0.5, 'Axes 2',ha='center',va='center',size=24,alpha=.5)
13 |
14 | axes_3 = subplot(G[1:, -1])
15 | xticks([]), yticks([])
16 | text(0.5,0.5, 'Axes 3',ha='center',va='center',size=24,alpha=.5)
17 |
18 | axes_4 = subplot(G[-1,0])
19 | xticks([]), yticks([])
20 | text(0.5,0.5, 'Axes 4',ha='center',va='center',size=24,alpha=.5)
21 |
22 | axes_5 = subplot(G[-1,-2])
23 | xticks([]), yticks([])
24 | text(0.5,0.5, 'Axes 5',ha='center',va='center',size=24,alpha=.5)
25 |
26 | #plt.savefig('../figures/gridspec.png', dpi=64)
27 | show()
--------------------------------------------------------------------------------
/src/xueqiu_test.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | import pymysql
4 | from http.cookiejar import Cookie
5 | #下面三行为getcookie添加
6 | import os
7 | import sqlite3
8 | from win32.win32crypt import CryptUnprotectData
9 | #import win32crypt
10 |
11 | def getcookiefromchrome(host='.xueqiu.com'):
12 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
13 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
14 | with sqlite3.connect(cookiepath) as conn:
15 | cu=conn.cursor()
16 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
17 | #print(cookies)
18 | return cookies
19 |
20 |
21 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次
22 | #
23 | class mysql_conn(object):
24 | # 魔术方法, 初始化, 构造函数
25 | def __init__(self):
26 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stocks",port=3311, charset="utf8mb4" )
27 | self.cursor = self.db.cursor()
28 | # 执行modify(修改)相关的操作
29 | def execute_modify_mysql(self, sql):
30 | self.cursor.execute(sql)
31 | self.db.commit()
32 | # 魔术方法, 析构化 ,析构函数
33 | def __del__(self):
34 | self.cursor.close()
35 | self.db.close()
36 |
37 |
38 |
39 |
40 | # 因为不能访问, 所以我们加个头试试
41 | headers = {
42 | #'Accept': '*/*',
43 | #'Accept-Encoding': 'gzip, deflate, br',
44 | #'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
45 | #'Connection': 'keep-alive',
46 | #'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722',
47 | #'Cookie':'Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538206459;Hm_lvt_1db88642e346389874251b5a1eded6e3=1538120732,1538144989,1538206010,1538206459', '_ga': 'GA1.2.1715450264.1532421729', '_gid': 'GA1.2.1367684432.1537964115', 'bid': '9bff933477b6c75b2ff40032e613edb6_jjzgesh9', 'device_id': '61f5d2eff7db22470fda980ead33cda9', 'remember': '1', 'remember.sig': 'K4F3faYzmVuqC0iXIERCQf55g2Y', 's': 'er17v6p058', 'snbim_minify': 'true', 'u': '1781168269', 'u.sig': 'cMmZfQkGyfjC5lehGsI4jsHDp-w', 'xq_a_token': '8a8848e34abe1b04ab2fb720b9d124b2368ec1b4', 'xq_a_token.sig': 'gUjJ-JIAMsQ2dcAIqZKMZbpclYU', 'xq_is_login': '1', 'xq_is_login.sig': 'J3LxgPVPUzbBg3Kee_PquUfih7Q', 'xq_r_token': '2827c657061f1072f18dd4208a8e548799fdf31b', 'xq_r_token.sig': 'y3_9YXXKVvXnZeppIJoOCI923S4'}'
48 | #'Host': 'xueqiu.com',
49 | #'Referer': 'https://xueqiu.com/',
50 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
51 | #'X-Requested-With': 'XMLHttpRequest',
52 | #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
53 | }
54 |
55 | # urllib 的相关操作如下
56 | url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'
57 | #"https://xueqiu.com/stock/f10/compinfo.json?symbol=SZ000001"
58 |
59 |
60 | response = requests.get(url, headers=headers,cookies=getcookiefromchrome('.xueqiu.com'))
61 | res_dict = json.loads(response.text)
62 |
63 | print(res_dict)
64 |
65 | list_list = res_dict['list']
66 | #print(list_list)
67 | # 遍历 list_list
68 | data = {}
69 | for list_item_dict in list_list:
70 | # list 列表内的一个item, 他是一个dict
71 | data_str = list_item_dict['data']
72 | data_dict = json.loads(data_str)
73 | data['ids'] = data_dict['id']
74 | data['title'] = data_dict['title']
75 | data['description'] = data_dict['description']
76 | data['target'] = data_dict['target']
77 | print(data_dict['id'])
78 | print(data_dict['title'])
79 | print(data_dict['description'])
80 | print(data_dict['target'])
81 |
82 | # print(list_item_dict)
83 | try:
84 | sql = 'insert into xueqiu(ids,title,description,target) values("{ids}","{title}","{description}","{target}")'.format(**data)
85 | mc = mysql_conn()
86 | mc.execute_modify_mysql(sql)
87 | print('以上内容爬取成功')
88 | print('-' * 50)
89 | except:
90 | print('以上内容出错,没有存到数据库')
91 | print('-' * 50)
92 |
--------------------------------------------------------------------------------