├── .project ├── .pydevproject ├── .settings └── org.eclipse.core.resources.prefs ├── README.md └── src ├── base_info.py ├── finmain.py ├── getcookies.py ├── hq.py ├── kline.py ├── test_tushare.py └── xueqiu_test.py /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | stock 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | Default 4 | python interpreter 5 | 6 | /${PROJECT_DIR_NAME}/src 7 | 8 | 9 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/base_info.py=UTF-8 3 | encoding//src/finmain.py=UTF-8 4 | encoding//src/getcookies.py=UTF-8 5 | encoding//src/hello.py=UTF-8 6 | encoding//src/hq.py=UTF-8 7 | encoding//src/test_tushare.py=utf-8 8 | encoding//src/xueqiu_test.py=UTF-8 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stock 2 | 使用python爬取雪球网站上的股票数据:股票代码、基础数据、财务数据、日线数据、除权信息等等,存储到本地mysql数据库,方便数据分析。 3 | -------------------------------------------------------------------------------- /src/base_info.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import pymysql 4 | from http.cookiejar import Cookie 5 | #下面三行为getcookie添加 6 | import os 7 | import sqlite3 8 | from win32.win32crypt import CryptUnprotectData 9 | #降低爬网速度 10 | import time 11 | from pymysql.times import Timestamp 12 | from goto import with_goto 13 | #import win32crypt 14 | 15 | from goto import with_goto 16 | 17 | 18 | 19 | def getcookiefromchrome(host='.xueqiu.com'): 20 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies" 21 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host 22 | with sqlite3.connect(cookiepath) as conn: 23 | cu=conn.cursor() 24 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()} 25 | #print(cookies) 26 | return cookies 27 | 28 | 29 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次 30 | # 31 | class mysql_conn(object): 32 | # 魔术方法, 初始化, 构造函数 33 | def __init__(self): 34 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" ) 35 | self.cursor = self.db.cursor() 36 | # 执行modify(修改)相关的操作 37 | def execute_modify_mysql(self, sql): 38 | self.cursor.execute(sql) 39 | self.db.commit() 40 | # 获取查询结果集 41 | def execute_select(self,sql): 42 | result=[] 43 | self.cursor.execute(sql) 44 | result = self.cursor.fetchall() 45 | return result 46 | 47 | # 魔术方法, 析构化 ,析构函数 48 | def __del__(self): 49 | self.cursor.close() 50 | self.db.close() 51 | 52 | 53 | 54 | 55 | # 因为不能访问, 所以我们加个头试试 56 | headers = { 57 | 'Accept': 'application/json, text/javascript, */*; q=0.01', 58 | 'Accept-Encoding': 'gzip, deflate, br', 59 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 60 | 'cache-control': 'no-cache', 61 | 'Connection': 'keep-alive', 62 | #'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722', 63 | #'Cookie':'Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538206459;Hm_lvt_1db88642e346389874251b5a1eded6e3=1538120732,1538144989,1538206010,1538206459', '_ga': 'GA1.2.1715450264.1532421729', '_gid': 'GA1.2.1367684432.1537964115', 'bid': '9bff933477b6c75b2ff40032e613edb6_jjzgesh9', 'device_id': '61f5d2eff7db22470fda980ead33cda9', 'remember': '1', 'remember.sig': 'K4F3faYzmVuqC0iXIERCQf55g2Y', 's': 'er17v6p058', 'snbim_minify': 'true', 'u': '1781168269', 'u.sig': 'cMmZfQkGyfjC5lehGsI4jsHDp-w', 'xq_a_token': '8a8848e34abe1b04ab2fb720b9d124b2368ec1b4', 'xq_a_token.sig': 'gUjJ-JIAMsQ2dcAIqZKMZbpclYU', 'xq_is_login': '1', 'xq_is_login.sig': 'J3LxgPVPUzbBg3Kee_PquUfih7Q', 'xq_r_token': '2827c657061f1072f18dd4208a8e548799fdf31b', 'xq_r_token.sig': 'y3_9YXXKVvXnZeppIJoOCI923S4'}' 64 | 'Cookie':'device_id=61f5d2eff7db22470fda980ead33cda9; _ga=GA1.2.1715450264.1532421729; s=er17v6p058; bid=9bff933477b6c75b2ff40032e613edb6_jjzgesh9; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=8a8848e34abe1b04ab2fb720b9d124b2368ec1b4; xq_a_token.sig=gUjJ-JIAMsQ2dcAIqZKMZbpclYU; xq_r_token=2827c657061f1072f18dd4208a8e548799fdf31b; xq_r_token.sig=y3_9YXXKVvXnZeppIJoOCI923S4; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1781168269; u.sig=cMmZfQkGyfjC5lehGsI4jsHDp-w; _gid=GA1.2.1367684432.1537964115; aliyungf_tc=AQAAAHiOFxzQvAkAnHQGcEZiz5DUijt0; snbim_minify=true; __utmc=1; __utmz=1.1538233687.73.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_1db88642e346389874251b5a1eded6e3=1538233575,1538233656,1538233685,1538276572; __utma=1.1715450264.1532421729.1538270997.1538281139.76; __utmt=1; __utmb=1.23.10.1538281139; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538283036', 65 | 'Host': 'xueqiu.com', 66 | 'Referer': 'https://xueqiu.com/S', 67 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 68 | 'X-Requested-With': 'XMLHttpRequest' 69 | } 70 | 71 | # urllib 的相关操作如下 72 | url = 'https://xueqiu.com/stock/f10/compinfo.json?symbol={symbol}&page=1&size=4&_={timestamp}' 73 | 74 | #股票列表 75 | #https://xueqiu.com/stock/cata/stocklist.json?page=1&size=30&order=desc&orderby=percent&type=11%2C12&_=1538234389909 76 | 77 | #股票基础信息 78 | #"https://xueqiu.com/stock/f10/compinfo.json?symbol=SZ000001" 79 | 80 | #连接数据库 81 | mc = mysql_conn() 82 | stock_symbol_list = mc.execute_select('select symbol from stocks order by symbol asc') 83 | stock_count=0 84 | 85 | 86 | #获取股票基础信息 87 | for (stock_symbol,) in stock_symbol_list: 88 | 89 | stock_count=stock_count+1 90 | timestp=int(round(time.time() * 1000)) 91 | #降低速度 92 | #time.sleep(0.3) 93 | 94 | print('第%d支股票,%s'%(stock_count,stock_symbol)) 95 | #print(url.format(symbol=stock_symbol,timestamp=timestp)) 96 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers) 97 | #response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 98 | #print(response.status_code) 99 | #重复读取,直到成功 100 | while response.status_code != requests.codes.ok: 101 | print('重复读取第%d支股票,%s'%(stock_count,stock_symbol)) 102 | time.sleep(0.3) 103 | timestp=int(round(time.time() * 1000)) 104 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers) 105 | else: 106 | #存储数据 107 | res_dict = json.loads(response.text) 108 | #continue 109 | #print(res_dict) 110 | 111 | compinfo = res_dict['tqCompInfo'] 112 | #print(compinfo) 113 | # 114 | data = {} 115 | 116 | #data_dict = json.loads(data_str) 117 | data['compcode'] = stock_symbol 118 | data['compname'] = compinfo['compname'] 119 | data['engname'] = compinfo['engname'] 120 | data['founddate'] = compinfo['founddate'] 121 | data['regcapital'] = compinfo['regcapital'] 122 | data['chairman'] = compinfo['chairman'] 123 | data['manager'] = compinfo['manager'] 124 | data['leconstant'] = compinfo['leconstant'] 125 | data['accfirm'] = compinfo['accfirm'] 126 | data['regaddr'] = compinfo['regaddr'] 127 | data['officeaddr'] = compinfo['officeaddr'] 128 | data['compintro'] = compinfo['compintro'].replace('"',' ') 129 | data['bizscope'] = compinfo['bizscope'].replace('"',' ') 130 | data['majorbiz'] = compinfo['majorbiz'].replace('"',' ') 131 | data['compsname'] = compinfo['compsname'] 132 | data['region'] = compinfo['region'] 133 | #print(data) 134 | try: 135 | sql = 'insert into comp(compname,engname,founddate,regcapital,chairman,manager,leconstant,accfirm,regaddr,officeaddr,compintro,bizscope,majorbiz,compcode,compsname,region) \ 136 | values("{compname}","{engname}","{founddate}","{regcapital}","{chairman}","{manager}","{leconstant}","{accfirm}","{regaddr}","{officeaddr}","{compintro}","{bizscope}","{majorbiz}","{compcode}","{compsname}","{region}")\ 137 | on duplicate key update compname="{compname}",engname="{engname}",regcapital="{regcapital}",chairman="{chairman}",manager="{manager}",leconstant="{leconstant}",accfirm="{accfirm}",regaddr="{regaddr}",\ 138 | officeaddr="{officeaddr}",compintro="{compintro}",bizscope="{bizscope}",majorbiz="{majorbiz}",compsname="{compsname}",region="{region}",timestamp=CURRENT_TIMESTAMP'.format(**data) 139 | mc.execute_modify_mysql(sql) 140 | print('%s*%s 爬取成功' %(data['compcode'],data['compsname'])) 141 | #print('-' * 50) 142 | except Exception as e : 143 | print('以上内容出错,没有存到数据库') 144 | print('-' * 50) 145 | print(e) 146 | -------------------------------------------------------------------------------- /src/finmain.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import pymysql 4 | from http.cookiejar import Cookie 5 | #下面三行为getcookie添加 6 | import os 7 | import sqlite3 8 | from win32.win32crypt import CryptUnprotectData 9 | #降低爬网速度 10 | import time 11 | from pymysql.times import Timestamp 12 | 13 | def getcookiefromchrome(host='.xueqiu.com'): 14 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies" 15 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host 16 | with sqlite3.connect(cookiepath) as conn: 17 | cu=conn.cursor() 18 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()} 19 | #print(cookies) 20 | return cookies 21 | 22 | 23 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次 24 | # 25 | class mysql_conn(object): 26 | # 魔术方法, 初始化, 构造函数 27 | def __init__(self): 28 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" ) 29 | self.cursor = self.db.cursor() 30 | # 执行modify(修改)相关的操作 31 | def execute_modify_mysql(self, sql): 32 | self.cursor.execute(sql) 33 | self.db.commit() 34 | # 获取查询结果集 35 | def execute_select(self,sql): 36 | result=[] 37 | self.cursor.execute(sql) 38 | result = self.cursor.fetchall() 39 | return result 40 | 41 | # 魔术方法, 析构化 ,析构函数 42 | def __del__(self): 43 | self.cursor.close() 44 | self.db.close() 45 | 46 | # 因为不能访问, 所以我们加个头试试 47 | headers = { 48 | 'Accept': 'application/json, text/javascript, */*; q=0.01', 49 | 'Accept-Encoding': 'gzip, deflate, br', 50 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 51 | 'cache-control': 'no-cache', 52 | 'Connection': 'keep-alive', 53 | 'Host': 'xueqiu.com', 54 | 'Referer': 'https://xueqiu.com/S', 55 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 56 | 'X-Requested-With': 'XMLHttpRequest' 57 | } 58 | 59 | # urllib 的相关操作如下 60 | url = 'https://xueqiu.com/stock/f10/finmainindex.json?symbol={symbol}&page=1&size=100_={timestamp}' 61 | 62 | ##股票基础信息 63 | #'https://xueqiu.com/stock/f10/compinfo.json?symbol={symbol}&page=1&size=4&_={timestamp}' 64 | 65 | #股票列表 66 | #https://xueqiu.com/stock/cata/stocklist.json?page=1&size=30&order=desc&orderby=percent&type=11%2C12&_=1538234389909 67 | 68 | #连接数据库 69 | mc = mysql_conn() 70 | stock_symbol_list = mc.execute_select('select a.symbol from stocks a LEFT JOIN finmain_log b on a.symbol=b.compcode where b.compcode is null order by a.symbol asc') 71 | stock_count=0 72 | 73 | #获取股票主要财务信息 74 | for (stock_symbol,) in stock_symbol_list: 75 | 76 | stock_count=stock_count+1 77 | timestp=int(round(time.time() * 1000)) 78 | #降低速度 79 | #time.sleep(0.3) 80 | 81 | print('第%d支股票,%s'%(stock_count,stock_symbol)) 82 | 83 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 84 | 85 | #重复读取,直到成功 86 | while response.status_code != requests.codes.ok: 87 | print('重复读取第%d支股票,%s'%(stock_count,stock_symbol)) 88 | time.sleep(0.3) 89 | timestp=int(round(time.time() * 1000)) 90 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 91 | 92 | else: 93 | pass 94 | #存储数据 95 | res_dict = json.loads(response.text.replace('null', '0')) 96 | reps = res_dict['list'] 97 | #print(reps) 98 | # 99 | data = {} 100 | for reps_item in reps: 101 | try: 102 | #用股票代码替换雪球内部编号 103 | reps_item['compcode']=stock_symbol 104 | sql = 'insert into finmain(compcode,reportdate,basiceps,epsdiluted,epsweighted,naps,opercashpershare,peropecashpershare,netassgrowrate,dilutedroe,weightedroe,mainbusincgrowrate,netincgrowrate,totassgrowrate,salegrossprofitrto,mainbusiincome,mainbusiprofit,totprofit,netprofit,totalassets,totalliab,totsharequi,operrevenue,invnetcashflow,finnetcflow,chgexchgchgs,cashnetr,cashequfinbal)\ 105 | values("{compcode}","{reportdate}","{basiceps}","{epsdiluted}","{epsweighted}","{naps}","{opercashpershare}","{peropecashpershare}","{netassgrowrate}","{dilutedroe}","{weightedroe}","{mainbusincgrowrate}","{netincgrowrate}","{totassgrowrate}","{salegrossprofitrto}","{mainbusiincome}","{mainbusiprofit}","{totprofit}","{netprofit}","{totalassets}","{totalliab}","{totsharequi}","{operrevenue}","{invnetcashflow}","{finnetcflow}","{chgexchgchgs}","{cashnetr}","{cashequfinbal}")\ 106 | on duplicate key update basiceps="{basiceps}",epsdiluted="{epsdiluted}",epsweighted="{epsweighted}",naps="{naps}",opercashpershare="{opercashpershare}",peropecashpershare="{peropecashpershare}",netassgrowrate="{netassgrowrate}",dilutedroe="{dilutedroe}",weightedroe="{weightedroe}",mainbusincgrowrate="{mainbusincgrowrate}",netincgrowrate="{netincgrowrate}",totassgrowrate="{totassgrowrate}",salegrossprofitrto="{salegrossprofitrto}",mainbusiincome="{mainbusiincome}",mainbusiprofit="{mainbusiprofit}",totprofit="{totprofit}",netprofit="{netprofit}",totalassets="{totalassets}",totalliab="{totalliab}",totsharequi="{totsharequi}",operrevenue="{operrevenue}",invnetcashflow="{invnetcashflow}",finnetcflow="{finnetcflow}",chgexchgchgs="{chgexchgchgs}",cashnetr="{cashnetr}",cashequfinbal="{cashequfinbal}"'.format(**reps_item) 107 | #print(sql) 108 | mc.execute_modify_mysql(sql) 109 | print('%s*%s*财报 爬取成功' %(stock_symbol,reps_item['reportdate'])) 110 | sql = 'insert into finmain_log (compcode,reportdate,timestamp) \ 111 | values("{compcode}","{reportdate}",CURRENT_TIMESTAMP)'.format(**reps_item) 112 | mc.execute_modify_mysql(sql) 113 | except Exception as e : 114 | print('以上内容出错,没有存到数据库') 115 | print(e) 116 | print('-' * 50) 117 | print('-' * 50) 118 | -------------------------------------------------------------------------------- /src/getcookies.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sqlite3 3 | import requests 4 | from win32.win32crypt import CryptUnprotectData 5 | #import win32crypt 6 | 7 | def getcookiefromchrome(host='.xueqiu.com'): 8 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies" 9 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host 10 | with sqlite3.connect(cookiepath) as conn: 11 | cu=conn.cursor() 12 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()} 13 | print(cookies) 14 | return cookies 15 | 16 | 17 | #getcookiefromchrome() 18 | #getcookiefromchrome('.baidu.com') 19 | 20 | url='http://www.xueqiu.com/' 21 | 22 | httphead={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',} 23 | 24 | 25 | r=requests.get(url,headers=httphead,cookies=getcookiefromchrome('.xueqiu.com'),allow_redirects=1) 26 | print(r.text) -------------------------------------------------------------------------------- /src/hq.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import pymysql 4 | from http.cookiejar import Cookie 5 | #下面三行为getcookie添加 6 | import os 7 | import sqlite3 8 | from win32.win32crypt import CryptUnprotectData 9 | import time 10 | from pymysql.times import Timestamp 11 | 12 | def getcookiefromchrome(host='.xueqiu.com'): 13 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies" 14 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host 15 | with sqlite3.connect(cookiepath) as conn: 16 | cu=conn.cursor() 17 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()} 18 | #print(cookies) 19 | return cookies 20 | 21 | 22 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次 23 | # 24 | class mysql_conn(object): 25 | # 魔术方法, 初始化, 构造函数 26 | def __init__(self): 27 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" ) 28 | self.cursor = self.db.cursor() 29 | # 执行modify(修改)相关的操作 30 | def execute_modify_mysql(self, sql): 31 | self.cursor.execute(sql) 32 | self.db.commit() 33 | # 魔术方法, 析构化 ,析构函数 34 | def __del__(self): 35 | self.cursor.close() 36 | self.db.close() 37 | 38 | 39 | 40 | 41 | # 因为不能访问, 所以我们加个头试试 42 | headers = { 43 | #'Accept': '*/*', 44 | #'Accept-Encoding': 'gzip, deflate, br', 45 | #'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 46 | #'Connection': 'keep-alive', 47 | #'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722', 48 | #'Cookie':'Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538206459;Hm_lvt_1db88642e346389874251b5a1eded6e3=1538120732,1538144989,1538206010,1538206459', '_ga': 'GA1.2.1715450264.1532421729', '_gid': 'GA1.2.1367684432.1537964115', 'bid': '9bff933477b6c75b2ff40032e613edb6_jjzgesh9', 'device_id': '61f5d2eff7db22470fda980ead33cda9', 'remember': '1', 'remember.sig': 'K4F3faYzmVuqC0iXIERCQf55g2Y', 's': 'er17v6p058', 'snbim_minify': 'true', 'u': '1781168269', 'u.sig': 'cMmZfQkGyfjC5lehGsI4jsHDp-w', 'xq_a_token': '8a8848e34abe1b04ab2fb720b9d124b2368ec1b4', 'xq_a_token.sig': 'gUjJ-JIAMsQ2dcAIqZKMZbpclYU', 'xq_is_login': '1', 'xq_is_login.sig': 'J3LxgPVPUzbBg3Kee_PquUfih7Q', 'xq_r_token': '2827c657061f1072f18dd4208a8e548799fdf31b', 'xq_r_token.sig': 'y3_9YXXKVvXnZeppIJoOCI923S4'}' 49 | 'Host': 'xueqiu.com', 50 | 'Referer': 'https://xueqiu.com/hq', 51 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' 52 | 53 | #'X-Requested-With': 'XMLHttpRequest', 54 | } 55 | 56 | 57 | url = 'https://xueqiu.com/stock/quote_order.json?page={page}&size=90&order=asc&exchange=CN&stockType={stocktype}&column=symbol%2Cname%2Ccurrent%2Cchg%2Cpercent%2Clast_close%2Copen%2Chigh%2Clow%2Cvolume%2Camount%2Cmarket_capital%2Cpe_ttm%2Chigh52w%2Clow52w%2Chasexist&orderBy=symbol&_={timestamp}' 58 | 59 | #https://xueqiu.com/stock/quote_order.json?page=1&size=90&order=desc&exchange=CN&stockType=sza&column=symbol%2Cname%2Ccurrent%2Cchg%2Cpercent%2Clast_close%2Copen%2Chigh%2Clow%2Cvolume%2Camount%2Cmarket_capital%2Cpe_ttm%2Chigh52w%2Clow52w%2Chasexist&orderBy=percent&_=1538830252423 60 | 61 | #获取股票代码、实时行情 62 | 63 | data={} 64 | stock_count=0 #计数器 65 | mc = mysql_conn() #数据库连接 66 | for stock_type in ('sha','sza'): 67 | timestp=int(round(time.time() * 1000)) 68 | response = requests.get(url.format(page=1,stocktype=stock_type,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 69 | res_dict = json.loads(response.text) 70 | 71 | stock_count=0 #计数器 72 | #股票数量计数 73 | count_val = int(res_dict['count']) 74 | pagemax=count_val//90 75 | #count计数为json时使用 76 | #count_val = res_dict['count'] 77 | 78 | 79 | for p in range(1,pagemax+2): 80 | timestp=int(round(time.time() * 1000)) 81 | response = requests.get(url.format(page=p,stocktype=stock_type,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 82 | while response.status_code != requests.codes.ok: 83 | print('重复读取第%d页数据'%(p)) 84 | time.sleep(0.3) 85 | timestp=int(round(time.time() * 1000)) 86 | response = requests.get(url.format(page=p,stocktype=stock_type,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 87 | 88 | else: 89 | pass 90 | 91 | print('page--%d %s ' %(p,stock_type)) 92 | #为空替换为0,防止出错 93 | res_dict = json.loads(response.text.replace('null', '0')) 94 | #print(res_dict) 95 | stock_list = res_dict['data'] 96 | for stock_list_item in stock_list: 97 | #字段为数字 98 | data['symbol']=stock_list_item[0] 99 | data['code']=stock_list_item[0] 100 | data['name'] = stock_list_item[1] 101 | data['current']=stock_list_item[2] 102 | data['percent']=stock_list_item[4] 103 | data['high52w'] = stock_list_item[13] 104 | data['low52w'] = stock_list_item[14] 105 | data['marketcapital']=stock_list_item[11] 106 | data['amount']=stock_list_item[10] 107 | data['volume']=stock_list_item[9] 108 | data['pe_ttm']=stock_list_item[12] 109 | 110 | try: 111 | sql = 'insert into stocks(symbol,compcode,compsname,current,percent,high52w,low52w,marketcapital,amount,volume,pe_ttm) \ 112 | values("{symbol}","{code}","{name}","{current}","{percent}","{high52w}","{low52w}","{marketcapital}","{amount}","{volume}","{pe_ttm}")\ 113 | on duplicate key update current="{current}",percent="{percent}",high52w="{high52w}",low52w="{low52w}",marketcapital="{marketcapital}",amount="{amount}",volume="{volume}",pe_ttm="{pe_ttm}",timestamp=CURRENT_TIMESTAMP'.format(**data) 114 | 115 | mc.execute_modify_mysql(sql) 116 | stock_count=stock_count+1 117 | print('%s*共计%d支,获取第%d支——%s*%s 爬取成功' %(stock_type,count_val,stock_count,data['code'],data['name'])) 118 | #print('-' * 50) 119 | except Exception as e : 120 | print('以上内容出错,没有存到数据库') 121 | print('-' * 50) 122 | print(e) 123 | #关闭数据库连接 124 | del mc 125 | -------------------------------------------------------------------------------- /src/kline.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import pymysql 4 | from http.cookiejar import Cookie 5 | #下面三行为getcookie添加 6 | import os 7 | import sqlite3 8 | from win32.win32crypt import CryptUnprotectData 9 | import time 10 | from pymysql.times import Timestamp 11 | from matplotlib._constrained_layout import _in_same_column 12 | 13 | def getcookiefromchrome(host='.xueqiu.com'): 14 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies" 15 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host 16 | with sqlite3.connect(cookiepath) as conn: 17 | cu=conn.cursor() 18 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()} 19 | #print(cookies) 20 | return cookies 21 | 22 | 23 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次 24 | # 25 | class mysql_conn(object): 26 | # 魔术方法, 初始化, 构造函数 27 | def __init__(self): 28 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stock",port=3311, charset="utf8mb4" ) 29 | self.cursor = self.db.cursor() 30 | # 执行modify(修改)相关的操作 31 | def execute_modify_mysql(self, sql): 32 | self.cursor.execute(sql) 33 | def execute_commit(self): 34 | self.db.commit() 35 | # 获取查询结果集 36 | def execute_select(self,sql): 37 | result=[] 38 | self.cursor.execute(sql) 39 | result = self.cursor.fetchall() 40 | return result 41 | # 魔术方法, 析构化 ,析构函数 42 | def __del__(self): 43 | self.cursor.close() 44 | self.db.close() 45 | 46 | 47 | 48 | headers = { 49 | 'Accept': 'application/json, text/javascript, */*; q=0.01', 50 | 'Accept-Encoding': 'gzip, deflate, br', 51 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 52 | 'cache-control': 'no-cache', 53 | 'Connection': 'keep-alive', 54 | 'Host': 'stock.xueqiu.com', 55 | 'Referer': 'https://xueqiu.com/S', 56 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 57 | 'X-Requested-With': 'XMLHttpRequest' 58 | } 59 | 60 | 61 | #股票日线数据 62 | #symble=股票代码 63 | #begin=开始时间 64 | #end=结束时间 65 | #period=期间--day、week、month、quarter、year 66 | #type=before,afte,normal 前除权,后,不除权 67 | #indicator=kline,K线数据,ma-均线....kline,ma,macd,kdj,boll,rsi,wr,bias,cci,psy' 68 | 69 | 70 | url = 'https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={symbol}&begin=600000000000&end={timestamp}&period=day&type=before&indicator=kline' 71 | 72 | 73 | #获取股票代码、实时行情 74 | 75 | #连接数据库 76 | mc = mysql_conn() 77 | stock_symbol_list = mc.execute_select('select a.symbol from stocks a LEFT JOIN kline_log b on a.symbol=b.symbol where b.symbol is null order by a.symbol asc') 78 | stock_count=0 79 | 80 | 81 | #获取股票基础信息 82 | for (stock_symbol,) in stock_symbol_list: 83 | 84 | stock_count=stock_count+1 85 | timestp=int(round(time.time() * 1000)) 86 | #降低速度 87 | #time.sleep(0.3) 88 | 89 | print('第%d支,%s-获取日线数据中...'%(stock_count,stock_symbol)) 90 | 91 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 92 | #print(response.status_code) 93 | #重复读取,直到成功 94 | while response.status_code != requests.codes.ok: 95 | print('重复读取第%d支股票,%s'%(stock_count,stock_symbol)) 96 | time.sleep(0.3) 97 | timestp=int(round(time.time() * 1000)) 98 | response = requests.get(url.format(symbol=stock_symbol,timestamp=timestp), headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 99 | else: 100 | #存储数据 101 | pass 102 | 103 | res_dict = json.loads(response.text) 104 | kline_json = res_dict['data'] 105 | error_code=res_dict['error_code'] 106 | error_description=res_dict['error_description'] 107 | 108 | #print(kline_json['symbol']) 109 | #print(kline_json['column']) 110 | #print(kline_json['item'][0]) 111 | 112 | for kline_item in kline_json['item']: 113 | #print(kline_item) 114 | data={} 115 | data['symbol']=stock_symbol 116 | data['timestamp']=kline_item[0]/1000 117 | data['volume']=kline_item[1] 118 | data['open']=round(kline_item[2],2) 119 | data['high']=round(kline_item[3],2) 120 | data['low']=round(kline_item[4],2) 121 | data['close']=round(kline_item[5],2) 122 | data['chg']=round(kline_item[6],2) 123 | data['percent']=round(kline_item[7],2) 124 | data['turnoverrate']=round(kline_item[8],2) #换手率 125 | data['period']='day' #日线 126 | data['type']='before' #前复权 127 | 128 | try: 129 | sql = 'insert into kline(symbol,timestamp,volume,open,high,low,close,chg,percent,turnoverrate,period,type) \ 130 | values("{symbol}", from_unixtime("{timestamp}"),"{volume}","{open}","{high}","{low}","{close}","{chg}","{percent}","{turnoverrate}","{period}","{type}") \ 131 | on duplicate key update volume="{volume}",open="{open}",high="{high}",low="{low}",close="{chg}",chg="{chg}",percent="{percent}",turnoverrate="{turnoverrate}",period="{period}",type="{type}"'.format(**data) 132 | #print(sql) 133 | mc.execute_modify_mysql(sql) 134 | #print('第%d支——%s %s 日线爬取成功 ' %(stock_count,stock_symbol,time.strftime('%Y-%m-%d',time.localtime(data['timestamp'])))) 135 | except Exception as e : 136 | print('以上内容出错,没有存到数据库') 137 | print('-' * 50) 138 | print(e) 139 | #记录日志 140 | sql = 'insert into kline_log (symbol,timestamp) values("%s",CURRENT_TIMESTAMP)'%(stock_symbol) 141 | mc.execute_modify_mysql(sql) 142 | mc.execute_commit() #一支股票数据采集完整一次提交 143 | #关闭数据库连接 144 | print("OVER") 145 | del mc 146 | -------------------------------------------------------------------------------- /src/test_tushare.py: -------------------------------------------------------------------------------- 1 | from pylab import * 2 | import matplotlib.gridspec as gridspec 3 | 4 | G = gridspec.GridSpec(3, 3) 5 | 6 | axes_1 = subplot(G[0, :]) 7 | xticks([]), yticks([]) 8 | text(0.5,0.5, 'Axes 1',ha='center',va='center',size=24,alpha=.5) 9 | 10 | axes_2 = subplot(G[1,:-1]) 11 | xticks([]), yticks([]) 12 | text(0.5,0.5, 'Axes 2',ha='center',va='center',size=24,alpha=.5) 13 | 14 | axes_3 = subplot(G[1:, -1]) 15 | xticks([]), yticks([]) 16 | text(0.5,0.5, 'Axes 3',ha='center',va='center',size=24,alpha=.5) 17 | 18 | axes_4 = subplot(G[-1,0]) 19 | xticks([]), yticks([]) 20 | text(0.5,0.5, 'Axes 4',ha='center',va='center',size=24,alpha=.5) 21 | 22 | axes_5 = subplot(G[-1,-2]) 23 | xticks([]), yticks([]) 24 | text(0.5,0.5, 'Axes 5',ha='center',va='center',size=24,alpha=.5) 25 | 26 | #plt.savefig('../figures/gridspec.png', dpi=64) 27 | show() -------------------------------------------------------------------------------- /src/xueqiu_test.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import pymysql 4 | from http.cookiejar import Cookie 5 | #下面三行为getcookie添加 6 | import os 7 | import sqlite3 8 | from win32.win32crypt import CryptUnprotectData 9 | #import win32crypt 10 | 11 | def getcookiefromchrome(host='.xueqiu.com'): 12 | cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies" 13 | sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host 14 | with sqlite3.connect(cookiepath) as conn: 15 | cu=conn.cursor() 16 | cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()} 17 | #print(cookies) 18 | return cookies 19 | 20 | 21 | # mysql_coon 主要的功能就是, 将链接数据库的操作变成只连接一次 22 | # 23 | class mysql_conn(object): 24 | # 魔术方法, 初始化, 构造函数 25 | def __init__(self): 26 | self.db = pymysql.connect(host="localhost",user="test",password="test",db="stocks",port=3311, charset="utf8mb4" ) 27 | self.cursor = self.db.cursor() 28 | # 执行modify(修改)相关的操作 29 | def execute_modify_mysql(self, sql): 30 | self.cursor.execute(sql) 31 | self.db.commit() 32 | # 魔术方法, 析构化 ,析构函数 33 | def __del__(self): 34 | self.cursor.close() 35 | self.db.close() 36 | 37 | 38 | 39 | 40 | # 因为不能访问, 所以我们加个头试试 41 | headers = { 42 | #'Accept': '*/*', 43 | #'Accept-Encoding': 'gzip, deflate, br', 44 | #'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 45 | #'Connection': 'keep-alive', 46 | #'Cookie': 'aliyungf_tc=AQAAALoQF3p02gsAUhVFebQ3uBBNZn+H; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.516718356.1534295265; _gid=GA1.2.1050085592.1534295265; u=301534295266356; device_id=f5c21e143ce8060c74a2de7cbcddf0b8; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534295265,1534295722; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534295722', 47 | #'Cookie':'Hm_lpvt_1db88642e346389874251b5a1eded6e3=1538206459;Hm_lvt_1db88642e346389874251b5a1eded6e3=1538120732,1538144989,1538206010,1538206459', '_ga': 'GA1.2.1715450264.1532421729', '_gid': 'GA1.2.1367684432.1537964115', 'bid': '9bff933477b6c75b2ff40032e613edb6_jjzgesh9', 'device_id': '61f5d2eff7db22470fda980ead33cda9', 'remember': '1', 'remember.sig': 'K4F3faYzmVuqC0iXIERCQf55g2Y', 's': 'er17v6p058', 'snbim_minify': 'true', 'u': '1781168269', 'u.sig': 'cMmZfQkGyfjC5lehGsI4jsHDp-w', 'xq_a_token': '8a8848e34abe1b04ab2fb720b9d124b2368ec1b4', 'xq_a_token.sig': 'gUjJ-JIAMsQ2dcAIqZKMZbpclYU', 'xq_is_login': '1', 'xq_is_login.sig': 'J3LxgPVPUzbBg3Kee_PquUfih7Q', 'xq_r_token': '2827c657061f1072f18dd4208a8e548799fdf31b', 'xq_r_token.sig': 'y3_9YXXKVvXnZeppIJoOCI923S4'}' 48 | #'Host': 'xueqiu.com', 49 | #'Referer': 'https://xueqiu.com/', 50 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 51 | #'X-Requested-With': 'XMLHttpRequest', 52 | #'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' 53 | } 54 | 55 | # urllib 的相关操作如下 56 | url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111' 57 | #"https://xueqiu.com/stock/f10/compinfo.json?symbol=SZ000001" 58 | 59 | 60 | response = requests.get(url, headers=headers,cookies=getcookiefromchrome('.xueqiu.com')) 61 | res_dict = json.loads(response.text) 62 | 63 | print(res_dict) 64 | 65 | list_list = res_dict['list'] 66 | #print(list_list) 67 | # 遍历 list_list 68 | data = {} 69 | for list_item_dict in list_list: 70 | # list 列表内的一个item, 他是一个dict 71 | data_str = list_item_dict['data'] 72 | data_dict = json.loads(data_str) 73 | data['ids'] = data_dict['id'] 74 | data['title'] = data_dict['title'] 75 | data['description'] = data_dict['description'] 76 | data['target'] = data_dict['target'] 77 | print(data_dict['id']) 78 | print(data_dict['title']) 79 | print(data_dict['description']) 80 | print(data_dict['target']) 81 | 82 | # print(list_item_dict) 83 | try: 84 | sql = 'insert into xueqiu(ids,title,description,target) values("{ids}","{title}","{description}","{target}")'.format(**data) 85 | mc = mysql_conn() 86 | mc.execute_modify_mysql(sql) 87 | print('以上内容爬取成功') 88 | print('-' * 50) 89 | except: 90 | print('以上内容出错,没有存到数据库') 91 | print('-' * 50) 92 | --------------------------------------------------------------------------------