├── 10jqk_gn.py ├── 10jqkserver.js ├── AI图谱.emmx ├── AI图谱.png ├── Harmonic_N.py ├── README.md ├── TomDeMark9.py ├── all_zig.py ├── apiserver.py ├── base.html ├── bs_get_industry_check.py ├── bs_hs300s.py ├── bs_industry_klang.py ├── bs_kdata.py ├── bs_sz50s.py ├── bs_to_csv.py ├── bs_to_db.py ├── bs_zz500s.py ├── btr1.py ├── btr2.py ├── btr28.py ├── btrboll.py ├── btrmacd.py ├── btrmrk.py ├── btrrsi.py ├── btrstoch.py ├── butterfly.html ├── calc.py ├── common ├── __init__.py ├── common.py └── framework.py ├── config.py ├── doctorxiong_fund.py ├── dragonphoenix.py ├── etf.html ├── features_lstm.py ├── features_tree.py ├── fibonacci.py ├── fund_stock_count.py ├── get_day_all_stock_info.py ├── get_day_all_stock_infov2.py ├── get_day_all_stock_infov3.py ├── get_day_all_stock_infov4.py ├── get_industry_sort.py ├── gn.html ├── gn_dict.py ├── hk_eastmoney.py ├── hk_qq.py ├── incon.dat ├── joeng.py ├── jqfinance.py ├── klang_bt.py ├── klang_msg.py ├── kline.html ├── klinebk.html ├── lstm_attention_predict.py ├── lstm_bin_predict.py ├── macd1.py ├── macd2.py ├── proxy_flask.py ├── proxy_server.py ├── pserver.js ├── requirements.txt ├── sendrequest.py ├── sendrequest_task.py ├── set_dayma.py ├── sina_min_kline.py ├── start.py ├── start.sh ├── stock-prediction ├── demo │ ├── demo1.py │ ├── demo2.py │ ├── demo3.py │ ├── demo4.py │ ├── demo5.py │ ├── demo6_svm.py │ ├── demo7_lstm.py │ ├── demo8_lstm.py │ └── demo9_lstm.py ├── parameters.py ├── stock_prediction.py ├── test.py └── train.py ├── stock_prediction_lstmV1.py ├── stock_prediction_lstmV2.py ├── swingtrade.py ├── talib_docs.py ├── tcn_predict.py ├── tdx.py ├── tdx_block.py ├── tdx_day.py ├── tdx_features.py ├── tdx_info.py ├── tdxbk.py ├── tdxhy.py ├── test_fm.py ├── testtdx.py ├── tf-lstm-stock.py ├── transverse_tree.py ├── ts_to_csv.py ├── wave_abc.py ├── xgb_class_predict.py ├── zigzag.py ├── zigzag_lib.py ├── zigzag_plt.py ├── zigzag_stock.py └── zx.html /10jqk_gn.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | import time 4 | import gn_dict 5 | 6 | gndict = gn_dict.gndict 7 | headers = { 8 | 'Connection': 'keep-alive', 9 | 'Accept': 'text/html, */*; q=0.01', 10 | 'X-Requested-With': 'XMLHttpRequest', 11 | 'hexin-v': 'A1jUmtUwjTrX5KG-XkGeHIBdL43pQbw-HqaQ75JJpetEv_a7OlGMW261YNfh', 12 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4573.0 Mobile Safari/537.36', 13 | 'Referer': 'http://q.10jqka.com.cn/gn/', 14 | 'Accept-Language': 'zh-CN,zh;q=0.9', 15 | } 16 | 17 | urlcode = 'http://q.10jqka.com.cn/gn/detail/code/%s/' 18 | 19 | url8 = urlcode % '300008' # 新能源 20 | url7 = urlcode % '300777' 21 | url1 = urlcode % '300382' # 稀土永磁,赚钱的板块 22 | 23 | page = 1 24 | def get_gnpage(page): 25 | time.sleep(1) 26 | 27 | response = requests.get('http://q.10jqka.com.cn/gn/index/field/addtime/order/desc/page/%s/ajax/1/' % page, headers=headers, verify=False) 28 | print(response.text.split("",response.text,re.S|re.M)) 39 | 40 | time.sleep(0.5) 41 | response = session.get(url) 42 | print(response.text) 43 | print(re.findall("",response.text,re.S|re.M)) 44 | 45 | def get_gndict_home(): 46 | time.sleep(1) 47 | resp = requests.get('http://q.10jqka.com.cn/gn/',headers=headers) 48 | all_gn = re.findall('''(.*?)''',resp.text,re.S|re.M) 49 | gn_dict1 = {} 50 | for i in all_gn: 51 | gn_dict1[i[0]]=i[1] 52 | print(gn_dict1) 53 | 54 | # 55 | # gndict save to gn_dict.py file 56 | # 57 | 58 | #get_gndict_home() 59 | 60 | get_gnsort(1) 61 | 62 | #get_gnpage(1) 63 | #get_gnpage(2) 64 | 65 | -------------------------------------------------------------------------------- /10jqkserver.js: -------------------------------------------------------------------------------- 1 | httpProxy = require('http-proxy'); 2 | 3 | var URL = 'http://data.10jqka.com.cn'; 4 | 5 | 6 | server = httpProxy.createServer({ secure: false, target: URL }, function (req, res, proxy) { 7 | 8 | 9 | proxy.proxyRequest(req, res, { secure: false, target: URL }); 10 | 11 | }) 12 | 13 | server.on('proxyReq', function(proxyReq, req, res, options) { 14 | proxyReq.setHeader('Host','data.10jqka.com.cn'); 15 | }); 16 | 17 | 18 | 19 | server.on('proxyRes', function(proxyRes, req, res, options) { 20 | proxyRes.on('data', function () { 21 | // 同花顺有CROS 权限 22 | //res.setHeader('Access-Control-Allow-Origin', '*'); 23 | //res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'); 24 | }); 25 | 26 | }); 27 | 28 | 29 | 30 | console.log("Listening on port 8008") 31 | 32 | server.listen(8008); 33 | -------------------------------------------------------------------------------- /AI图谱.emmx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asmcos/quantrader/0a6dad21b4225cefc8cd0633b578346bb49ef6fe/AI图谱.emmx -------------------------------------------------------------------------------- /AI图谱.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asmcos/quantrader/0a6dad21b4225cefc8cd0633b578346bb49ef6fe/AI图谱.png -------------------------------------------------------------------------------- /Harmonic_N.py: -------------------------------------------------------------------------------- 1 | #谐波理论之N字战法 harmonic 2 | 3 | from common.framework import * 4 | import pandas as pd 5 | from fibonacci import * 6 | 7 | filename = './datas/stock_harmonic_n.html' 8 | 9 | #金叉 10 | def CrossUp(a,b): 11 | if a[-1] >= b[-1] and a[-2] b[-2]: 18 | return True 19 | 20 | return False 21 | 22 | 23 | """ 24 | RSV:=(CLOSE-LLV(LOW,N))/(HHV(HIGH,N)-LLV(LOW,N))*100; 25 | K:SMA(RSV,M1,1); 26 | D:SMA(K,M2,1); 27 | """ 28 | 29 | period = 6 30 | # 最大最小的列表 31 | 32 | Nlist = [] 33 | #d,code,name,skip1,skip2 34 | Nstocklist = [] 35 | def mn(datas,code,name): 36 | 37 | if len(datas) < 10: 38 | return [] 39 | closes = datas['close'] 40 | dates = datas['date'] 41 | distance = 0 42 | prev_close = -1 43 | 44 | mnlist = [] 45 | for i in range(period,len(dates)-period): 46 | m = talib.MAX(closes[i-period:i+period],len(closes[i-period:i+period])) 47 | n = talib.MIN(closes[i-period:i+period],len(closes[i-period:i+period])) #d 是最近时间,所以D不能往后太多 48 | m1 = m.values[-1] 49 | n1 = n.values[-1] 50 | if float(m1) == float(closes[i]): 51 | print("max",dates[i],closes[i],i-distance) 52 | mnlist.append([1,dates.values[i],float(closes.values[i]),i]) 53 | distance = i 54 | prev_close = closes[i] 55 | if float(n1) == float(closes[i]): 56 | print("min",dates[i],closes[i],i-distance) 57 | mnlist.append([0,dates.values[i],float(closes.values[i]),i]) 58 | distance = i 59 | prev_close = closes[i] 60 | 61 | return mnlist 62 | 63 | # a > b 64 | def scope(a,b): 65 | return (a-b) / a * 100 66 | 67 | 68 | Nlist=[] 69 | 70 | #搜索 X,A,B 71 | def N(mnlist,code,name): 72 | 73 | X = None 74 | A = None 75 | B = None 76 | status = 0 #反转状态 X->A->B 77 | distance = 0 #周期 > 10天? 78 | for i in mnlist: 79 | if i[0] == 0 and status == 0: 80 | X = i 81 | status = 1 82 | distance = i[3] 83 | if i[0] == 1 and status == 1 and i[3] - distance > 10 and scope(i[2],X[2]) > 10: 84 | status = 2 85 | A = i 86 | distance = i[3] 87 | if i[0] == 0 and status == 2 and i[3] - distance > 10 and scope(A[2],i[2]) > 10: 88 | status = 0 89 | distance = 0 90 | B = i 91 | b1 = downN(A[2],X[2],0.786) #b 92 | b2 = downN(A[2],X[2],0.618) #b 93 | if approx(B[2],b1): 94 | print("N 0.786",(X[2],X[1]),(A[2],A[1]),(B[2],B[1]),b1) 95 | Nlist.append([code,name,B[1]]) 96 | if approx(B[2],b2): 97 | print("N 0.618",(X[2],X[1]),(A[2],A[1]),(B[2],B[1]),b2) 98 | Nlist.append([code,name,B[1]]) 99 | 100 | # 搜索最大最小值,统计日K 101 | def waterfall(code,name,datas): 102 | 103 | try: 104 | df = datas 105 | turn = df.turn[df.index[-1]] 106 | volume = df.volume[df.index[-1]] 107 | close = df.close[df.index[-1]] 108 | hqltsz = volume / turn / 1000000 109 | if hqltsz*close < 300: 110 | return 111 | except: 112 | return 113 | 114 | mnlist = mn(datas,code,name) 115 | N(mnlist,code,name) 116 | 117 | def create_clickable_code(code): 118 | code = code.replace(".","") 119 | url_template= '''{code}'''.format(code=code) 120 | return url_template 121 | 122 | def save(): 123 | 124 | df = pd.DataFrame(Nlist,columns=['code','name','date']) 125 | df['code'] = df['code'].apply(create_clickable_code) 126 | content ='\n谐波理论之N字形态\n' 127 | content += df.to_html(escape=False,float_format='%.2f') 128 | 129 | print("save file",filename) 130 | save_file(filename,content) 131 | 132 | 133 | if __name__ == "__main__": 134 | init_stock_list() 135 | loop_all(waterfall) 136 | save() 137 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 学习量化交易记录, 2 | quantrader是基于backtrack框架的回测系统,包含常见的macd,二八动量等。 3 | 希望收集各种常见的交易策略算法,供各网友学习,交流。 4 | 5 | 6 | # 安装依赖库 7 | 8 | pip3 install -r requirements.txt 9 | 10 | 我使用的是python3,所以都选用pip3 11 | 12 | 文档会记录在 http://www.zhanluejia.net.cn 13 | ``` 14 | . 15 | ├── README.md 16 | ├── bs_to_csv.py # baostock 内容存到csv文件 17 | ├── btr1.py #backtrader 代码例子 18 | ├── btr2.py #backtrader 代码的sma例子 19 | ├── btrmacd.py # btr macd 例子,4.13调试第一版 20 | ├── btrmrk.py # btr MACD,KDJ,RSI三个维度判断买卖 21 | ├── start.py # 执行脚本 22 | ├── datas 23 | │   ├── bs_sh.600600.csv # 通过baostock网站存储的数据 24 | │   └── orcl-1995-2014.txt #数据来自backtrader 源代码 25 | ├── macd1.py # baostock + talib + macd 例子, 26 | ├── macd2.py # tushare + talib + macd 例子 27 | ├── bs_get_industry.py #获取沪市、深市数据 28 | ├── bs_get_industry_check.py #获取沪市、深市股票,并且有在线数据 29 | ├── dbmongo.py # 数据 存储 30 | ├── requirements.txt # 安装一些python3依赖库 31 | └── ts_to_csv.py # tushare to csv 32 | ``` 33 | 34 | ## 运行其中一个例子 35 | 36 | 37 | ``` 38 | # 获取青岛啤酒的数据 39 | python3 ts_to_csv.py --code 600600 40 | # 使用macd策略 41 | python3 btrmacd.py --datafile ./datas/ts_600600.csv 42 | ``` 43 | 44 | ### 结果如下: 45 | ``` 46 | Starting Portfolio Value: 50000.00 47 | 2018-03-26, BUY CREATE, 39.81 48 | 2018-03-27, BUY EXECUTED, Price: 39.95, Cost: 47661.54, Comm 157.28 49 | 2018-07-31, SELL CREATE, 43.92 50 | 2018-08-01, SELL EXECUTED, Price: 43.92, Cost: 47661.54, Comm 172.91 51 | 2019-01-16, BUY CREATE, 36.10 52 | 2019-01-17, BUY EXECUTED, Price: 36.50, Cost: 52231.50, Comm 172.36 53 | 2020-01-06, SELL CREATE, 50.01 54 | 2020-01-07, SELL EXECUTED, Price: 50.20, Cost: 52231.50, Comm 237.06 55 | Final Portfolio Value: 73601.29 56 | ``` 57 | 58 | 结果是5万的启动资金2年后7万3. 59 | 60 | 61 | ### 数据的获取 62 | 63 | * common/* #公共部分 64 | * bs_get_industry_check.py #获取所有股票列表 包含板块信息 65 | * bs_industry_klang.py #获取板块信息,并且提交到 klang.org.cn 66 | * tdxhy.py #获取 通达信板块信息 67 | 68 | #使用LSTM 预测股票 69 | 70 | ```bash 71 | python3 stock_prediction_lstmV1.py 600600 72 | ``` 73 | 最后一行数据就是未来5天的预测。 74 | 75 | 代码说明 76 | * 使用的是 tensorflow 的keras LSTM算法,tf是2.x版本 77 | * 股票数据接口来自tushare,建议切换成tushare pro版本 78 | * 例子代码是30为一个数据周期预测未来5天。 79 | * 股票预测仅仅是一个趋势,不是很准,大家不能用来炒股,仅仅用来学习 80 | 81 | # 二八轮动例子 82 | 83 | ```bash 84 | python3 btr28.py 85 | ``` 86 | 87 | 代码说明 88 | * 采用沪深300,中证500作为轮动 89 | * 交易框架使用backtrader 90 | * 采用聚宽获取数据接口,其中切换了几个其他的数据接口没有完整的中证500周数据 91 | * 在原始数据里增加了计算好的4周增长率数据 92 | -------------------------------------------------------------------------------- /TomDeMark9.py: -------------------------------------------------------------------------------- 1 | from common.framework import * 2 | 3 | resultgt_list = [] 4 | resultlt_list = [] 5 | #greater than 大于 6 | def isgt(dts): 7 | i1 = dts.index[-1] 8 | #i2 = dts.index[-2] 9 | #i3 = dts.index[-3] 10 | #i4 = dts.index[-4] 11 | i5 = dts.index[-5] 12 | 13 | if (dts.close[i1] > dts.close[i5] 14 | #and 15 | #dts.close[i1] > dts.close[i3] 16 | # and 17 | #dts.close[i1] > dts.close[i4] 18 | # and 19 | #dts.close[i1] > dts.close[i5] 20 | ): 21 | return True 22 | else: 23 | return False 24 | 25 | #less than 小于 26 | def islt(dts): 27 | i1 = dts.index[-1] 28 | #i2 = dts.index[-2] 29 | #i3 = dts.index[-3] 30 | #i4 = dts.index[-4] 31 | i5 = dts.index[-5] 32 | 33 | if (dts.close[i1] < dts.close[i5] 34 | # and 35 | #dts.close[i1] < dts.close[i3] 36 | # and 37 | #dts.close[i1] < dts.close[i4] 38 | # and 39 | #dts.close[i1] < dts.close[i5]): 40 | ): 41 | return True 42 | else: 43 | return False 44 | 45 | 46 | def td9(code,name,datas): 47 | print(code,name) 48 | gtstatus = 0 49 | ltstatus = 0 50 | if len(datas)<7: 51 | return 52 | for i in range(5,len(datas)): 53 | if isgt(datas[i-4:i+1]): 54 | gtstatus += 1 55 | if gtstatus > 3 and i == (len(datas)-1): 56 | turn = datas.turn[datas.index[i]] 57 | volume = datas.volume[datas.index[i]] 58 | if volume < 1 or float(turn) == 0 : 59 | continue 60 | hqltsz = float(datas.close[datas.index[i]]) * float(volume) / float(turn) / 1000000 61 | hqltsz = float('%.2f' % hqltsz) 62 | if hqltsz < 50.0: 63 | continue 64 | print(OKRED,datas.date[datas.index[i]],gtstatus,turn,volume,hqltsz,ENDC) 65 | resultgt_list.append([name,code,datas.date[datas.index[i]],gtstatus,hqltsz]) 66 | else: 67 | gtstatus = 0 68 | 69 | if islt(datas[i-4:i+1]): 70 | ltstatus += 1 71 | if ltstatus > 3 and i == (len(datas)-1): 72 | turn = datas.turn[datas.index[i]] 73 | volume = datas.volume[datas.index[i]] 74 | if volume < 1 or float(turn) ==0 : 75 | continue 76 | hqltsz = float(datas.close[datas.index[i]]) * float(volume) / float(turn) / 1000000 77 | hqltsz = float('%.2f' % hqltsz) 78 | if hqltsz < 50.0: 79 | continue 80 | print(OKGREEN,datas.date[datas.index[i]],ltstatus,turn,volume,hqltsz,ENDC) 81 | resultlt_list.append([name,code,datas.date[datas.index[i]],ltstatus,hqltsz]) 82 | 83 | else: 84 | ltstatus = 0 85 | def display(): 86 | for i in resultgt_list + resultlt_list: 87 | print(i) 88 | 89 | def save(): 90 | df = pd.DataFrame(resultgt_list +[['0','sh.0000','0',1,10.0]]+ resultlt_list, columns = ['name','code','date','9转第N天','流通股值']) 91 | print("保存在",'./datas/stock_'+endday+"9dt.html") 92 | save_df_tohtml('./datas/stock_'+endday+"9dt.html",df) 93 | 94 | if __name__ == "__main__": 95 | init_stock_list() 96 | loop_all(td9) 97 | display() 98 | save() 99 | -------------------------------------------------------------------------------- /all_zig.py: -------------------------------------------------------------------------------- 1 | import os,sys 2 | 3 | filename = "zigzag_plt.py" 4 | 5 | if len(sys.argv) > 1: 6 | filename = sys.argv[1] 7 | 8 | 9 | from Klang import Kl,Klang 10 | Klang.Klang_init() 11 | 12 | from threading import Thread 13 | 14 | def th_task(code): 15 | os.system('python3 ' + filename + " " + code + " 0") 16 | 17 | def do_task(tasklist): 18 | for stock in tasklist: 19 | new_thread = Thread(target=th_task,args=(stock["code"],)) 20 | new_thread.start() 21 | new_thread.join() #等待最后一个结束 22 | 23 | count = 12 24 | for index in range(0,len(Kl.stocklist),count): 25 | do_task(Kl.stocklist[index:index+count]) 26 | 27 | 28 | -------------------------------------------------------------------------------- /apiserver.py: -------------------------------------------------------------------------------- 1 | from flask import Flask,jsonify,request,Response 2 | import json 3 | from hk_eastmoney import get_stock_price_bylist 4 | app = Flask(__name__) 5 | 6 | # 根路径 7 | @app.route('/list') 8 | def codelist(): 9 | codelist = request.args.get("code") 10 | data = get_stock_price_bylist(codelist.split(",")) 11 | return Response( 12 | json.dumps(data, ensure_ascii=False), 13 | content_type="application/json; charset=utf-8" 14 | ) 15 | 16 | 17 | # 启动 Flask 应用 18 | if __name__ == '__main__': 19 | app.run(debug=True) 20 | -------------------------------------------------------------------------------- /base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | Klang(金浪)板块实时数据 19 | 20 | 21 | 22 | 23 | 24 |
25 |
26 | %s 27 |
28 | 29 |
30 | 31 |
32 | 33 |
34 | 35 | 36 | 37 | 38 | 39 | 40 | %s 41 | 42 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /bs_get_industry_check.py: -------------------------------------------------------------------------------- 1 | import baostock as bs 2 | import pandas as pd 3 | import os 4 | # 登录系统 5 | lg = bs.login() 6 | # 显示登陆返回信息 7 | print('login respond error_code:'+lg.error_code) 8 | print('login respond error_msg:'+lg.error_msg) 9 | 10 | # 获取行业分类数据 11 | rs = bs.query_stock_industry() 12 | # rs = bs.query_stock_basic(code_name="浦发银行") 13 | print('query_stock_industry error_code:'+rs.error_code) 14 | print('query_stock_industry respond error_msg:'+rs.error_msg) 15 | 16 | filename_sl = os.path.expanduser("~/.klang_stock_list.csv") 17 | 18 | if not os.path.exists(filename_sl): 19 | # 打印结果集 20 | industry_list = [] 21 | 22 | while (rs.error_code == '0') & rs.next(): 23 | # 获取一条记录,将记录合并在一起 24 | row = rs.get_row_data() 25 | kdata = bs.query_history_k_data_plus(row[1], 'date,open,high,low,close,volume', start_date='2020-12-01', 26 | frequency='d') 27 | if len(kdata.get_row_data()) == 0: 28 | continue 29 | tdxbk = ""#tdxhy.gettdxbk(row[1]) 30 | tdxgn = ""#tdxhy.gettdxgn(row[1]) 31 | row.append(tdxbk) 32 | row.append(tdxgn) 33 | print(row) 34 | industry_list.append(row) 35 | 36 | fields = rs.fields 37 | fields.append('tdxbk') 38 | fields.append('tdxgn') 39 | 40 | result = pd.DataFrame(industry_list, columns=rs.fields) 41 | # 结果集输出到csv文件 42 | result.to_csv(filename_sl, index=False) 43 | print(result) 44 | else: 45 | 46 | import tdxhy 47 | # 打印结果集 48 | industry_list = [] 49 | 50 | while (rs.error_code == '0') & rs.next(): 51 | # 获取一条记录,将记录合并在一起 52 | row = rs.get_row_data() 53 | kdata = bs.query_history_k_data_plus(row[1], 'date,open,high,low,close,volume', start_date='2020-12-01', 54 | frequency='d') 55 | if len(kdata.get_row_data()) == 0: 56 | continue 57 | tdxbk = tdxhy.gettdxbk(row[1]) 58 | tdxgn = tdxhy.gettdxgn(row[1]) 59 | row.append(tdxbk) 60 | row.append(tdxgn) 61 | print(row) 62 | industry_list.append(row) 63 | 64 | fields = rs.fields 65 | fields.append('tdxbk') 66 | fields.append('tdxgn') 67 | 68 | result = pd.DataFrame(industry_list, columns=rs.fields) 69 | # 结果集输出到csv文件 70 | result.to_csv(filename_sl, index=False) 71 | print(result) 72 | 73 | 74 | # 登出系统 75 | bs.logout() 76 | -------------------------------------------------------------------------------- /bs_hs300s.py: -------------------------------------------------------------------------------- 1 | import baostock as bs 2 | import pandas as pd 3 | 4 | # 登陆系统 5 | lg = bs.login() 6 | # 显示登陆返回信息 7 | print('login respond error_code:'+lg.error_code) 8 | print('login respond error_msg:'+lg.error_msg) 9 | 10 | # 获取沪深300成分股 11 | rs = bs.query_hs300_stocks() 12 | print('query_hs300 error_code:'+rs.error_code) 13 | print('query_hs300 error_msg:'+rs.error_msg) 14 | 15 | # 打印结果集 16 | hs300_stocks = [] 17 | while (rs.error_code == '0') & rs.next(): 18 | # 获取一条记录,将记录合并在一起 19 | hs300_stocks.append(rs.get_row_data()) 20 | result = pd.DataFrame(hs300_stocks, columns=rs.fields) 21 | # 结果集输出到csv文件 22 | result.to_csv("./datas/bs_hs300_stocks.csv", encoding="utf-8", index=False) 23 | print(result) 24 | 25 | # 登出系统 26 | bs.logout() 27 | -------------------------------------------------------------------------------- /bs_industry_klang.py: -------------------------------------------------------------------------------- 1 | import baostock as bs 2 | import pandas as pd 3 | import requests 4 | import json 5 | import tdxhy 6 | import time 7 | import os 8 | from common.common import * 9 | 10 | # 登录系统 11 | lg = bs.login() 12 | # 显示登陆返回信息 13 | print('login respond error_code:'+lg.error_code) 14 | print('login respond error_msg:'+lg.error_msg) 15 | 16 | # 获取行业分类数据 17 | rs = bs.query_stock_industry() 18 | # rs = bs.query_stock_basic(code_name="浦发银行") 19 | print('query_stock_industry error_code:'+rs.error_code) 20 | print('query_stock_industry respond error_msg:'+rs.error_msg) 21 | 22 | 23 | filename_cm = os.path.expanduser("~/.klang_stock_cm.csv") 24 | if not os.path.exists(filename_cm): 25 | cm = 0 26 | else: 27 | cm = 1 28 | cmdict = {} 29 | cm_list = open(filename_cm).readlines() 30 | cm_list = cm_list[1+int(offset):] #删除第一行 31 | 32 | for i in cm_list: 33 | ilist = i.split(',') 34 | code = ilist[0].split('.')[1].lower() + '.' + ilist[0].split('.')[0] 35 | cmdict[code] = ilist[2] 36 | # 打印结果集 37 | industry_list = [] 38 | while (rs.error_code == '0') & rs.next() : 39 | # 获取一条记录,将记录合并在一起 40 | row = rs.get_row_data() 41 | kdata = bs.query_history_k_data_plus(row[1], 'date,open,high,low,close,volume', start_date='2020-12-01', 42 | frequency='d') 43 | if len(kdata.get_row_data()) == 0: 44 | continue 45 | tdxbk = tdxhy.gettdxbk(row[1]) 46 | tdxgn = tdxhy.gettdxgn(row[1]) 47 | 48 | row.append(tdxbk) 49 | row.append(tdxgn) 50 | if cm == 1: 51 | code = row[1] 52 | chouma = cmdict.get(code,"50") 53 | row.append(chouma) 54 | print(row) 55 | industry_list.append(row) 56 | 57 | fields = rs.fields 58 | fields.append('tdxbk') 59 | fields.append('tdxgn') 60 | 61 | if cm == 1: 62 | fields.append('chouma') 63 | 64 | datas = pd.DataFrame(industry_list, columns=fields) 65 | 66 | datas = datas.to_json(orient='table') 67 | jsondatas = json.loads(datas)['data'] 68 | 69 | hostname = "https://klang.org.cn" 70 | #hostname = "http://klang.zhanluejia.net.cn" 71 | resp = requests.post(hostname+"/industries/drop") 72 | print(resp.content) 73 | try: 74 | resp = requests.post(hostname+"/industries/updates",json=jsondatas,timeout=2000) 75 | print(resp.content) 76 | except: 77 | time.sleep(2) 78 | requests.post(hostname+"/industries/updates",json=jsondatas,timeout=2000) 79 | 80 | # 登出系统 81 | bs.logout() 82 | -------------------------------------------------------------------------------- /bs_kdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; py-indent-offset:4 -*- 3 | 4 | """ 从baostock获取daily数据到datas目录下的csv文件当中,文件名如:bs_sh.000001.csv """ 5 | """ python3 bs_to_csv.py --code sh.600600 """ 6 | import baostock as bs 7 | import pandas as pd 8 | from datetime import datetime 9 | import os 10 | import requests 11 | import talib 12 | # 判断是否 是显示,还是重新下载数据计算 13 | # 数据每天只需要下载一次 14 | from common.common import * 15 | from fibonacci import search_pattern 16 | from fibonacci import Displaybutterfly 17 | 18 | today = datetime.now().strftime('%Y-%m-%d') 19 | 20 | lg = bs.login() 21 | 22 | period = 8 23 | 24 | def get_data(name,code,start,end,adj): 25 | mnlist = [] 26 | rs = bs.query_history_k_data_plus(code, 'date,open,high,low,close,volume,code,turn', start_date=start, 27 | frequency='d' ) 28 | datas = rs.get_data() 29 | if len(datas) < 2: 30 | return 31 | print(len(datas),datas.date[datas.index[-1]]) 32 | closes = datas['close'] 33 | dates = datas['date'] 34 | 35 | for i in range(period,len(dates)-period): 36 | m = talib.MAX(closes[i-period:i+period],len(closes[i-period:i+period])) 37 | n = talib.MIN(closes[i-period:i+period],len(closes[i-period:i+period])) #d 是最近时间,所以D不能往后太多 38 | m1 = m.values[-1] 39 | n1 = n.values[-1] 40 | if float(m1) == float(closes[i]): 41 | #print("max",dates[i],closes[i]) 42 | mnlist.append([1,datas.values[i],float(closes.values[i])]) 43 | if float(n1) == float(closes[i]): 44 | #print("min",dates[i],closes[i],i,closes[i-period:i+5]) 45 | mnlist.append([0,datas.values[i],float(closes.values[i])]) 46 | 47 | 48 | # 追加D发现最近的D 49 | for i in range(len(dates)-period,len(datas)-1): 50 | try: 51 | n = talib.MIN(closes[i-period:i+2],len(closes[i-period:i+2])) #d 是最近时间,所以D不能往后太多 52 | n1 = n.values[-1] 53 | if float(n1) == float(closes[i]): 54 | #print("min",dates[i],closes[i]) 55 | mnlist.append([0,datas.values[i],float(closes.values[i])]) 56 | except: 57 | pass 58 | search_pattern(name,code,mnlist)#fibonacci.py 59 | 60 | 61 | def getstockinfo(stock): 62 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 63 | # 时间,股票代码,名称,类别 64 | d,code,name,skip1,skip2,HQLTSZ= stock.split(',') 65 | return code,name 66 | 67 | 68 | 69 | 70 | # 判断是否已经下载了股票分类代码 71 | 72 | if not os.path.exists('./datas/stock_industry_check.csv'): 73 | print('正在下载股票库列表....') 74 | os.system('python3 bs_get_industry_check.py') 75 | 76 | stocklist = open('./datas/stock_industry_check.csv').readlines() 77 | stocklist = stocklist[1+int(offset):] #删除第一行 78 | 79 | 80 | def LoopOne(): 81 | for stock in stocklist: 82 | code ,name = getstockinfo(stock) 83 | print('正在获取',name,'代码',code) 84 | get_data(name,code,"2020-12-01",today,"3") 85 | 86 | 87 | if __name__ == "__main__": 88 | LoopOne() 89 | 90 | period = 15 91 | LoopOne() #big butterfly 92 | Displaybutterfly() #fibonacci.py 93 | -------------------------------------------------------------------------------- /bs_sz50s.py: -------------------------------------------------------------------------------- 1 | import baostock as bs 2 | import pandas as pd 3 | 4 | # 登陆系统 5 | lg = bs.login() 6 | # 显示登陆返回信息 7 | print('login respond error_code:'+lg.error_code) 8 | print('login respond error_msg:'+lg.error_msg) 9 | 10 | # 获取上证50成分股 11 | rs = bs.query_sz50_stocks() 12 | print('query_sz50 error_code:'+rs.error_code) 13 | print('query_sz50 error_msg:'+rs.error_msg) 14 | 15 | # 打印结果集 16 | sz50_stocks = [] 17 | while (rs.error_code == '0') & rs.next(): 18 | # 获取一条记录,将记录合并在一起 19 | sz50_stocks.append(rs.get_row_data()) 20 | result = pd.DataFrame(sz50_stocks, columns=rs.fields) 21 | # 结果集输出到csv文件 22 | result.to_csv("./datas/bs_sz50_stocks.csv", encoding="utf-8", index=False) 23 | print(result) 24 | 25 | # 登出系统 26 | bs.logout() 27 | -------------------------------------------------------------------------------- /bs_to_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; py-indent-offset:4 -*- 3 | 4 | """ 从baostock获取daily数据到datas目录下的csv文件当中,文件名如:bs_sh.000001.csv """ 5 | """ python3 bs_to_csv.py --code sh.600600 """ 6 | import baostock as bs 7 | import pandas as pd 8 | import click 9 | from datetime import datetime 10 | 11 | today = datetime.now().strftime('%Y-%m-%d') 12 | 13 | 14 | """ 函数参数装饰器 """ 15 | @click.command() 16 | @click.option("--code", default="sh.600000", help="baostock股票/指数代码,如sh.600000") 17 | @click.option("--start", default="2010-01-01", help="开始日期, 格式如:2010-01-01") 18 | @click.option("--end", default=today, help="结束日期, 格式如:2010-01-01") 19 | @click.option("--adj", default="3", help="复权类型(只针对股票):3: 未复权 2:前复权 1:后复权 , 默认1") 20 | def get_data(code, start, end, adj): 21 | lg = bs.login() 22 | print('login respond error_code:' + lg.error_code) 23 | print('login respond error_msg:' + lg.error_msg) 24 | 25 | rs = bs.query_history_k_data_plus(code, 'date,open,high,low,close,volume', start_date=start, end_date=end, 26 | frequency='d', adjustflag=adj) 27 | print('query_history_k_data_plus respond error_code:' + rs.error_code) 28 | print('query_history_k_data_plus respond error_msg:' + rs.error_msg) 29 | # 打印结果集 30 | data_list = [] 31 | while (rs.error_code == '0') & rs.next(): 32 | # 获取一条记录,将记录合并在一起 33 | data_list.append(rs.get_row_data()) 34 | 35 | data = pd.DataFrame(data_list, columns=rs.fields) 36 | 37 | columns = ['date', 'open', 'high', 'low', 'close', 'volume'] 38 | data.to_csv("./datas/bs_{0}.csv".format(code), 39 | sep=',', index=False, columns=columns) 40 | 41 | 42 | if __name__ == "__main__": 43 | get_data() 44 | -------------------------------------------------------------------------------- /bs_to_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; py-indent-offset:4 -*- 3 | 4 | """ 从baostock获取daily数据到datas目录下的csv文件当中,文件名如:bs_sh.000001.csv """ 5 | """ python3 bs_to_csv.py --code sh.600600 """ 6 | import baostock as bs 7 | import pandas as pd 8 | import click 9 | from datetime import datetime 10 | import os 11 | import json 12 | import argparse 13 | import requests 14 | import time 15 | import tdxhy 16 | 17 | # 判断是否 是显示,还是重新下载数据计算 18 | # 数据每天只需要下载一次 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("--offset", help="开始执行的位置",default='0') 22 | args = parser.parse_args() 23 | 24 | offset = args.offset 25 | 26 | 27 | 28 | today = datetime.now().strftime('%Y-%m-%d') 29 | 30 | lg = bs.login() 31 | """ 函数参数装饰器 32 | @click.command() 33 | @click.option("--name", default="浦发银行", help="baostock股票/指数代码,如浦发银行") 34 | @click.option("--code", default="sh.600000", help="baostock股票/指数代码,如sh.600000") 35 | @click.option("--start", default="2010-01-01", help="开始日期, 格式如:2010-01-01") 36 | @click.option("--end", default=today, help="结束日期, 格式如:2010-01-01") 37 | @click.option("--adj", default="3", help="复权类型(只针对股票):3: 未复权 2:前复权 1:后复权 , 默认1") 38 | """ 39 | def get_data(name,code, start, end, adj): 40 | 41 | rs = bs.query_history_k_data_plus(code, 'date,open,high,low,close,volume,turn', start_date=start, end_date=end, 42 | frequency='d', adjustflag=adj) 43 | 44 | #print('query_history_k_data_plus respond error_code:' + rs.error_code) 45 | #print('query_history_k_data_plus respond error_msg:' + rs.error_msg) 46 | # 打印结果集 47 | columns = ['date', 'open', 'high', 'low', 'close', 'volume','turn'] 48 | data_list = [] 49 | while (rs.error_code == '0') & rs.next(): 50 | # 获取一条记录,将记录合并在一起 51 | data_list.append(rs.get_row_data()) 52 | 53 | 54 | def get_data_post_server(name,code,start,end,adj): 55 | rs = bs.query_history_k_data_plus(code, 'date,open,high,low,close,volume,code,turn', start_date=start, 56 | frequency='d' ) 57 | datas = rs.get_data() 58 | if len(datas) < 2: 59 | return 60 | print(len(datas),datas.date[datas.index[-1]]) 61 | #datas['name'] = name 62 | #datas = datas.set_index('date') 63 | datas = datas.to_json(orient='table') 64 | jsondatas = json.loads(datas)['data'] 65 | for d in jsondatas: 66 | d['name'] = name 67 | del d['index'] 68 | #print(jsondatas) 69 | #resp = requests.post("http://127.0.0.1:1337/dayks/updates",json=jsondatas) 70 | #print(resp.content) 71 | try: 72 | requests.post("http://klang.zhanluejia.net.cn/dayks/updates",json=jsondatas,timeout=2000) 73 | except: 74 | time.sleep(2) 75 | requests.post("http://klang.zhanluejia.net.cn/dayks/updates",json=jsondatas,timeout=2000) 76 | 77 | #获取股票的名字和代码号 78 | def getstockinfo(stock): 79 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 80 | # 时间,股票代码,名称,类别 81 | d,code,name,skip1,skip2,tdxbk,tdxgn= stock.split(',') 82 | return code,name,tdxbk,tdxgn 83 | 84 | 85 | 86 | 87 | # 判断是否已经下载了股票分类代码 88 | filename_sl = os.path.expanduser("~/.klang_stock_list.csv") 89 | 90 | if not os.path.exists(filename_sl): 91 | print('正在下载股票库列表....') 92 | os.system('python3 bs_get_industry_check.py') 93 | 94 | stocklist = open(filename_sl).readlines() 95 | stocklist = stocklist[1+int(offset):] #删除第一行 96 | 97 | 98 | if __name__ == "__main__": 99 | for stock in stocklist: 100 | code ,name,tdxbk,tdxgn = getstockinfo(stock) 101 | print('正在获取',name,'代码',code) 102 | get_data_post_server(name,code,"2021-06-01",today,"3") 103 | -------------------------------------------------------------------------------- /bs_zz500s.py: -------------------------------------------------------------------------------- 1 | import baostock as bs 2 | import pandas as pd 3 | 4 | # 登陆系统 5 | lg = bs.login() 6 | # 显示登陆返回信息 7 | print('login respond error_code:'+lg.error_code) 8 | print('login respond error_msg:'+lg.error_msg) 9 | 10 | # 获取中证500成分股 11 | rs = bs.query_zz500_stocks() 12 | print('query_zz500 error_code:'+rs.error_code) 13 | print('query_zz500 error_msg:'+rs.error_msg) 14 | 15 | # 打印结果集 16 | zz500_stocks = [] 17 | while (rs.error_code == '0') & rs.next(): 18 | # 获取一条记录,将记录合并在一起 19 | zz500_stocks.append(rs.get_row_data()) 20 | result = pd.DataFrame(zz500_stocks, columns=rs.fields) 21 | # 结果集输出到csv文件 22 | result.to_csv("./datas/bs_zz500_stocks.csv", encoding="utf-8", index=False) 23 | print(result) 24 | 25 | # 登出系统 26 | bs.logout() 27 | 28 | -------------------------------------------------------------------------------- /btr1.py: -------------------------------------------------------------------------------- 1 | from __future__ import (absolute_import, division, print_function, 2 | unicode_literals) 3 | 4 | import datetime # For datetime objects 5 | import os.path # To manage paths 6 | import sys # To find out the script name (in argv[0]) 7 | 8 | # Import the backtrader platform 9 | import backtrader as bt 10 | 11 | 12 | # Create a Stratey 13 | class TestStrategy(bt.Strategy): 14 | 15 | def log(self, txt, dt=None): 16 | ''' Logging function fot this strategy''' 17 | dt = dt or self.datas[0].datetime.date(0) 18 | print('%s, %s' % (dt.isoformat(), txt)) 19 | 20 | def __init__(self): 21 | # Keep a reference to the "close" line in the data[0] dataseries 22 | self.dataclose = self.datas[0].close 23 | 24 | # To keep track of pending orders 25 | self.order = None 26 | 27 | def notify_order(self, order): 28 | if order.status in [order.Submitted, order.Accepted]: 29 | # Buy/Sell order submitted/accepted to/by broker - Nothing to do 30 | return 31 | 32 | # Check if an order has been completed 33 | # Attention: broker could reject order if not enough cash 34 | if order.status in [order.Completed]: 35 | if order.isbuy(): 36 | self.log('BUY EXECUTED, %.2f' % order.executed.price) 37 | elif order.issell(): 38 | self.log('SELL EXECUTED, %.2f' % order.executed.price) 39 | 40 | self.bar_executed = len(self) 41 | 42 | elif order.status in [order.Canceled, order.Margin, order.Rejected]: 43 | self.log('Order Canceled/Margin/Rejected') 44 | 45 | # Write down: no pending order 46 | self.order = None 47 | 48 | def next(self): 49 | # Simply log the closing price of the series from the reference 50 | self.log('Close, %.2f' % self.dataclose[0]) 51 | 52 | # Check if an order is pending ... if yes, we cannot send a 2nd one 53 | if self.order: 54 | return 55 | 56 | # Check if we are in the market 57 | if not self.position: 58 | 59 | # Not yet ... we MIGHT BUY if ... 60 | if self.dataclose[0] < self.dataclose[-1]: 61 | # current close less than previous close 62 | 63 | if self.dataclose[-1] < self.dataclose[-2]: 64 | # previous close less than the previous close 65 | 66 | # BUY, BUY, BUY!!! (with default parameters) 67 | self.log('BUY CREATE, %.2f' % self.dataclose[0]) 68 | 69 | # Keep track of the created order to avoid a 2nd order 70 | self.order = self.buy() 71 | 72 | else: 73 | 74 | # Already in the market ... we might sell 75 | if len(self) >= (self.bar_executed + 5): 76 | # SELL, SELL, SELL!!! (with all possible default parameters) 77 | self.log('SELL CREATE, %.2f' % self.dataclose[0]) 78 | 79 | # Keep track of the created order to avoid a 2nd order 80 | self.order = self.sell() 81 | 82 | 83 | if __name__ == '__main__': 84 | # Create a cerebro entity 85 | cerebro = bt.Cerebro() 86 | 87 | # Add a strategy 88 | cerebro.addstrategy(TestStrategy) 89 | 90 | # Datas are in a subfolder of the samples. Need to find where the script is 91 | # because it could have been called from anywhere 92 | modpath = os.path.dirname(os.path.abspath(sys.argv[0])) 93 | datapath = os.path.join(modpath, './datas/orcl-1995-2014.txt') 94 | 95 | # Create a Data Feed 96 | data = bt.feeds.YahooFinanceCSVData( 97 | dataname=datapath, 98 | # Do not pass values before this date 99 | fromdate=datetime.datetime(2000, 1, 1), 100 | # Do not pass values before this date 101 | todate=datetime.datetime(2000, 12, 31), 102 | # Do not pass values after this date 103 | reverse=False) 104 | 105 | # Add the Data Feed to Cerebro 106 | cerebro.adddata(data) 107 | 108 | # Set our desired cash start 109 | cerebro.broker.setcash(100000.0) 110 | 111 | # Print out the starting conditions 112 | print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue()) 113 | 114 | # Run over everything 115 | cerebro.run() 116 | 117 | # Print out the final result 118 | print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue()) 119 | 120 | -------------------------------------------------------------------------------- /calc.py: -------------------------------------------------------------------------------- 1 | import dbmongo 2 | import numpy as np 3 | 4 | 5 | startlist=[] 6 | finallist=[] 7 | 8 | all = dbmongo.get_all_backtest('2019-10-08','2020-04-22') 9 | for a in all: 10 | startlist.append(a['startvalue']) 11 | finallist.append(a['finalvalue']) 12 | 13 | print(np.array(startlist).mean(),np.array(finallist).mean()) 14 | print(np.array(startlist).sum(),np.array(finallist).sum()) 15 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asmcos/quantrader/0a6dad21b4225cefc8cd0633b578346bb49ef6fe/common/__init__.py -------------------------------------------------------------------------------- /common/common.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | import os 4 | import pandas as pd 5 | 6 | import argparse 7 | import time 8 | from datetime import datetime 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--offset", help="开始执行的位置",default='0') 12 | parser.add_argument("--display", help="显示",default='0') 13 | parser.add_argument("--endday", help="日期",default='0') 14 | parser.add_argument("--start", help="日期",default='2021-01-01') 15 | parser.add_argument("--download", help="下载",default='1') 16 | 17 | args = parser.parse_known_args() 18 | if len(args) > 1: 19 | args = args[0] 20 | offset = args.offset 21 | download = args.download 22 | endday = args.endday 23 | start = args.start 24 | 25 | today = datetime.now() 26 | if endday== '0': 27 | endday = str(today.year) + str(today.month) + str(today.day) 28 | 29 | end = endday 30 | 31 | def save_file(filename,content): 32 | f = open(filename,"w+") 33 | f.write(content) 34 | f.close() 35 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | jqauth = { 2 | 'name':'', 3 | 'passwd':'', 4 | } 5 | 6 | tspro = { 7 | 'passwd':'' 8 | } 9 | -------------------------------------------------------------------------------- /doctorxiong_fund.py: -------------------------------------------------------------------------------- 1 | # 基金接口 https://www.doctorxiong.club/api 2 | 3 | import requests 4 | import pandas as pd 5 | import numpy as np 6 | 7 | def get_all_fund(): 8 | resp = requests.get("https://api.doctorxiong.club/v1/fund/all") 9 | df = pd.DataFrame(resp.json()['data']) 10 | print(len(df)) 11 | print(df) 12 | df.to_csv("./datas/fund_list.csv") 13 | 14 | def get_hot_fund(): 15 | resp = requests.get("https://api.doctorxiong.club/v1/fund/hot") 16 | df = pd.DataFrame(resp.json()['data']) 17 | print(len(df)) 18 | print(df) 19 | df.to_csv("./datas/fund_hot.csv") 20 | 21 | def get_fund_stock(code): 22 | url = "https://api.doctorxiong.club/v1/fund/position?code=%d"%code 23 | resp = requests.get(url) 24 | print(resp.json()) 25 | 26 | #get_fund_stock(970016) 27 | 28 | get_all_fund() 29 | 30 | -------------------------------------------------------------------------------- /dragonphoenix.py: -------------------------------------------------------------------------------- 1 | #Dragon phoenix 龙凤呈祥 2 | 3 | from common.framework import * 4 | import pandas as pd 5 | 6 | 7 | filename = './datas/stock_dragon.html' 8 | 9 | #金叉 10 | def CrossUp(a,b): 11 | if a[-1] >= b[-1] and a[-2] b[-2]: 18 | return True 19 | 20 | return False 21 | 22 | 23 | """ 24 | RSV:=(CLOSE-LLV(LOW,N))/(HHV(HIGH,N)-LLV(LOW,N))*100; 25 | K:SMA(RSV,M1,1); 26 | D:SMA(K,M2,1); 27 | """ 28 | 29 | period = 5 30 | #大瀑布列表 31 | wflist = [] 32 | #d,code,name,skip1,skip2 33 | wfstocklist = [] 34 | def mn(datas,code,name): 35 | 36 | if len(datas) < 10: 37 | return [] 38 | mnlist = [] 39 | closes = datas['close'] 40 | dates = datas['date'] 41 | distance = 0 42 | prev_close = -1 43 | for i in range(period,len(dates)-period): 44 | m = talib.MAX(closes[i-period:i+period],len(closes[i-period:i+period])) 45 | n = talib.MIN(closes[i-period:i+period],len(closes[i-period:i+period])) #d 是最近时间,所以D不能往后太多 46 | m1 = m.values[-1] 47 | n1 = n.values[-1] 48 | if float(m1) == float(closes[i]): 49 | print("max",dates[i],closes[i],i-distance) 50 | mnlist.append([1,datas.values[i],float(closes.values[i]),i]) 51 | distance = i 52 | prev_close = closes[i] 53 | if float(n1) == float(closes[i]): 54 | print("min",dates[i],closes[i],i-distance) 55 | if (i - distance) > 20 and closes[i] < prev_close: 56 | print(OKBLUE,"bigwaterfall",code,name,dates[i],i-distance,ENDC) 57 | wflist.append([code,name,dates[i]]) 58 | wfstocklist.append("1,"+code+","+name+",skip1,skip2") 59 | mnlist.append([0,datas.values[i],float(closes.values[i]),i]) 60 | distance = i 61 | prev_close = closes[i] 62 | 63 | 64 | return mnlist 65 | 66 | def search_wf(code): 67 | for i in wflist: 68 | if i[0] == code: 69 | return True 70 | return False 71 | 72 | # 搜索瀑布,统计日K 73 | def waterfall(code,name,datas): 74 | mn(datas,code,name) 75 | 76 | dragonlist=[] 77 | #搜索 macd金叉,kd 金叉,统计60分钟线 78 | def dp(code,name,datas): 79 | print(code,name) 80 | 81 | 82 | df1 = datas #股票数据 83 | # 数据太少 macd 计算不准确 84 | if len(datas) < 50: 85 | return 86 | 87 | # macd = macd * 2 88 | # 21,89,13 89 | df1['diff'], df1['dea'], df1['macd'] = talib.MACD(df1['close'], fastperiod=21, slowperiod=89, signalperiod=13) 90 | 91 | 92 | # 55,13,8 93 | df1['K'],df1['D'] = KD(df1['high'], df1['low'], df1['close'], fastk=55, slowk=13, slowd=8) 94 | 95 | distance = 0 96 | for i in range(10,len(datas)): 97 | ma = CrossUp(df1['diff'].values[:i],df1['dea'].values[:i]) 98 | kd = CrossUp(df1['K'].values[:i],df1['D'].values[:i]) 99 | if ma or kd: 100 | if ma and kd : distance = 0 101 | if distance < 10: 102 | print(OKGREEN,ma,kd,datas['time'].iloc[i],distance,ENDC) 103 | if search_wf(code) and df1['K'].values[i] <= 60: 104 | print(OKGREEN,"dragon",name,code,datas['time'].iloc[i],ENDC) 105 | dragonlist.append([code,name,datas['time'].iloc[i]]) 106 | distance = 0 107 | 108 | distance += 1 109 | 110 | def create_clickable_code(code): 111 | code = code.replace(".","") 112 | url_template= '''{code}'''.format(code=code) 113 | return url_template 114 | 115 | def save(): 116 | 117 | df = pd.DataFrame(dragonlist,columns=['code','name','date']) 118 | df['code'] = df['code'].apply(create_clickable_code) 119 | content ='\n龙凤呈祥\n' 120 | content += df.to_html(escape=False,float_format='%.2f') 121 | 122 | print("save file",filename) 123 | save_file(filename,content) 124 | 125 | 126 | if __name__ == "__main__": 127 | init_stock_list() 128 | loop_all(waterfall) 129 | loop_60all(dp,wfstocklist) 130 | save() 131 | -------------------------------------------------------------------------------- /features_lstm.py: -------------------------------------------------------------------------------- 1 | from Klang import (Kl,Klang_init, 2 | C,O,V,H,L, CLOSE,HIGH,DATETIME, 3 | MA,CROSS,BARSLAST,HHV,LLV,COUNT,BARSLASTFIND, 4 | MAX,MIN,MACD) 5 | from Klang.common import end as today 6 | import talib 7 | 8 | import sys 9 | import linecache 10 | import pandas as pd 11 | import requests,time 12 | import json 13 | 14 | def PrintException(): 15 | exc_type, exc_obj, tb = sys.exc_info() 16 | f = tb.tb_frame 17 | lineno = tb.tb_lineno 18 | filename = f.f_code.co_filename 19 | linecache.checkcache(filename) 20 | line = linecache.getline(filename, lineno, f.f_globals) 21 | print ('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)) 22 | 23 | all_dict = {} 24 | target_day = 10 #收盘价之后的几天内最高价格,判断是否有涨价空间 25 | hostname = "http://klang.org.cn" 26 | hostname = "http://klang.zhanluejia.net.cn" 27 | 28 | 29 | def save_dict_tofile(datas,filename): 30 | content = json.dumps(datas) 31 | f = open(filename,"w+") 32 | f.write(content) 33 | f.close() 34 | 35 | def get_features(code,end): 36 | try: 37 | json = requests.get(hostname+"/features", 38 | params={"code":code,"end":end,"limit":200},timeout=1000).json() 39 | except: 40 | time.sleep(2) 41 | json = requests.get(hostname+"/features", 42 | params={"code":code,"end":end,"limit":200},timeout=1000).json() 43 | 44 | df = pd.json_normalize(json) 45 | if len(df) < 1: 46 | return [] 47 | df = df.drop(columns=['_id','codedate','id']) 48 | datas = df.sort_values(by="date",ascending=True) 49 | 50 | return datas 51 | 52 | def main_loop(start,endday): 53 | global all_dict 54 | 55 | #for df in Kl.df_all[:100]: 56 | for df in Kl.df_all: 57 | 58 | 59 | Kl.code(df["code"]) 60 | 61 | if start is None: 62 | Kl.date(end=endday) 63 | else: 64 | Kl.date(start=start,end=endday) 65 | try: 66 | if len(Kl.currentdf['df']) <= target_day: 67 | continue 68 | 69 | allDate = DATETIME.data 70 | # 如果target_day = N,表示,最后的N 天数据不能作为训练或者测试数据 71 | # 我们会计算这个 N 天的最大值作为目标值,计算涨幅空间 72 | 73 | featureday = allDate[-target_day] 74 | datas = get_features(df['code'],featureday) 75 | print(df['code'],df['name']) 76 | datas = datas[(datas['date'] >= allDate[0]) & (datas['date'] < featureday)] 77 | 78 | max_target = talib.MAX(C.data,target_day) 79 | rise_target = (max_target[target_day:].values / C.data[:-target_day].values - 1 ) * 100 80 | 81 | datas['oc'] = (O.data[:-target_day].values / C.data[:-target_day].values - 1)*100 82 | datas['close'] = (C.data[:-target_day] / C.data[0]).values 83 | 84 | datas['target'] = rise_target 85 | all_dict[df['code']] = datas.to_json() 86 | except KeyboardInterrupt: 87 | break 88 | except: 89 | PrintException() 90 | 91 | 92 | fields = [ 93 | 'code', 'date', 'dea', 'diff', 'ma10', 'ma120', 'ma20', 'ma30', 'ma5', 94 | 'ma60', 'macd', 'name', 'rise', 'risevol','oc','close', 95 | 'target'] 96 | 97 | Klang_init() 98 | 99 | main_loop(start=None,endday='2021-12-01') 100 | 101 | save_dict_tofile(all_dict,'lstm_train'+today+'.csv') 102 | 103 | -------------------------------------------------------------------------------- /features_tree.py: -------------------------------------------------------------------------------- 1 | from Klang import (Kl,Klang_init, 2 | C,O,V,H,L, CLOSE,HIGH,DATETIME, 3 | MA,CROSS,BARSLAST,HHV,LLV,COUNT,BARSLASTFIND, 4 | MAX,MIN,MACD) 5 | from Klang.common import end as today 6 | import talib 7 | 8 | import sys 9 | import linecache 10 | import pandas as pd 11 | import requests,time 12 | 13 | def PrintException(): 14 | exc_type, exc_obj, tb = sys.exc_info() 15 | f = tb.tb_frame 16 | lineno = tb.tb_lineno 17 | filename = f.f_code.co_filename 18 | linecache.checkcache(filename) 19 | line = linecache.getline(filename, lineno, f.f_globals) 20 | print ('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)) 21 | 22 | all_list = [] 23 | target_day = 10 #收盘价之后的几天内最高价格,判断是否有涨价空间 24 | hostname = "http://klang.org.cn" 25 | hostname = "http://klang.zhanluejia.net.cn" 26 | 27 | def get_features(code,end): 28 | try: 29 | json = requests.get(hostname+"/features", 30 | params={"code":code,"end":end,"limit":200},timeout=1000).json() 31 | except: 32 | time.sleep(2) 33 | json = requests.get(hostname+"/features", 34 | params={"code":code,"end":end,"limit":200},timeout=1000).json() 35 | 36 | df = pd.json_normalize(json) 37 | if len(df) < 1: 38 | return [] 39 | df = df.drop(columns=['_id','codedate','id']) 40 | datas = df.sort_values(by="date",ascending=True) 41 | 42 | return datas 43 | 44 | def main_loop(start,endday): 45 | global all_list 46 | 47 | #for df in Kl.df_all[:1000]: 48 | for df in Kl.df_all: 49 | 50 | Kl.code(df["code"]) 51 | 52 | if start is None: 53 | Kl.date(end=endday) 54 | else: 55 | Kl.date(start=start,end=endday) 56 | try: 57 | if len(Kl.currentdf['df']) <= target_day: 58 | continue 59 | 60 | allDate = DATETIME.data 61 | # 如果target_day = N,表示,最后的N 天数据不能作为训练或者测试数据 62 | # 我们会计算这个 N 天的最大值作为目标值,计算涨幅空间 63 | 64 | featureday = allDate[-target_day] 65 | datas = get_features(df['code'],featureday) 66 | print(df['code'],df['name']) 67 | datas = datas[(datas['date'] >= allDate[0]) & (datas['date'] < featureday)] 68 | #print(datas.date,len(datas),C.data[:-target_day]) 69 | #print(pd.DataFrame({"max":talib.MAX(C.data,target_day)[target_day:].values,"close":C.data[:-target_day].values})) 70 | # 计算涨幅空间 71 | max_target = talib.MAX(C.data,target_day) 72 | rise_target = (max_target[target_day:].values / C.data[:-target_day].values - 1 ) * 100 73 | datas['oc'] = (O.data[:-target_day].values / C.data[:-target_day].values - 1)*100 74 | datas['target'] = rise_target 75 | for i in datas.values.tolist(): 76 | all_list.append(i) 77 | except KeyboardInterrupt: 78 | break 79 | except: 80 | PrintException() 81 | 82 | 83 | fields = [ 84 | 'code', 'date', 'dea', 'diff', 'ma10', 'ma120', 'ma20', 'ma30', 'ma5', 85 | 'ma60', 'macd', 'name', 'rise', 'risevol','oc', 86 | 'target'] 87 | 88 | Klang_init() 89 | 90 | main_loop(start=None,endday='2021-10-01') 91 | df = pd.DataFrame(all_list,columns=fields) 92 | df.to_csv('transverse_train'+today+'.csv',index=False) 93 | 94 | 95 | """ 96 | all_list = [] 97 | pred_data = 1 98 | main_loop(start='2021-07-15',endday=today) 99 | df = pd.DataFrame(all_list,columns=fields) 100 | df.to_csv('transverse_pred'+today+'.csv',index=False) 101 | """ 102 | -------------------------------------------------------------------------------- /fund_stock_count.py: -------------------------------------------------------------------------------- 1 | #统计每只股票的基金数据 2 | import os 3 | import pandas as pd 4 | 5 | offset = 0 6 | result_list = [] 7 | def create_clickable_code(code): 8 | code = code.split(".")[1] 9 | url_template= '''{code}'''.format(code=code) 10 | return url_template 11 | 12 | 13 | def get_stock_count(code,name,industry): 14 | p = os.popen("cd datas/fund ; grep '"+name+"' * | wc -l") 15 | count = p.read() 16 | #print(code,name,count.strip('" \n')) 17 | result_list.append([code,name,int(count.strip('" \n')),industry]) 18 | #获取股票的名字和代码号 19 | def getstockinfo(stock): 20 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 21 | # 时间,股票代码,名称,类别 22 | d,code,name,skip1,skip2 = stock.split(',') 23 | return code,name,skip1 24 | 25 | 26 | # 判断是否已经下载了股票分类代码 27 | 28 | if not os.path.exists('./datas/stock_industry_check.csv'): 29 | print('正在下载股票库列表....') 30 | os.system('python3 bs_get_industry_check.py') 31 | 32 | stocklist = open('./datas/stock_industry_check.csv').readlines() 33 | stocklist = stocklist[1+int(offset):] #删除第一行 34 | 35 | for stock in stocklist: 36 | code,name,industry = getstockinfo(stock) 37 | get_stock_count(code,name,industry) 38 | 39 | #print(result_list) 40 | 41 | df = pd.DataFrame(result_list,columns=['code','name','fund','行业']) 42 | df['code']=df['code'].apply(create_clickable_code) 43 | df = df.sort_values(by="fund",ascending=False) 44 | print(df.to_html(escape=False)) 45 | 46 | #df = pd.DataFrame(result_list) 47 | #df.to_csv("./datas/stockcount.csv") 48 | 49 | -------------------------------------------------------------------------------- /get_day_all_stock_info.py: -------------------------------------------------------------------------------- 1 | # 2 | # exec script 3 | import os 4 | import sys 5 | import signal 6 | import threading,time 7 | import queue 8 | import pandas as pd 9 | from datetime import datetime 10 | 11 | #################### 12 | #1. 获取股票数据 13 | #################### 14 | today = datetime.now() 15 | endday = str(today.year) + str(today.month) + str(today.day) 16 | 17 | code = 'sh.600600' 18 | 19 | HEADER = '\033[95m' 20 | OKBLUE = '\033[94m' 21 | OKGREEN = '\033[92m' 22 | WARNING = '\033[93m' 23 | FAIL = '\033[91m' 24 | ENDC = '\033[0m' 25 | BOLD = '\033[1m' 26 | UNDERLINE = '\033[4m' 27 | 28 | all_up_down_list=[] 29 | 30 | def handler(signum, frame): 31 | print("是不是想让我退出啊") 32 | make_save_data() 33 | sys.exit() 34 | 35 | def make_save_data(): 36 | df = pd.DataFrame(all_up_down_list, columns = ['昨日收盘','前日收盘','差价','百分比','名称','代码']) 37 | df.to_csv("./datas/stock_up_down_{0}.csv".format(endday),) 38 | print(df) 39 | 40 | 41 | 42 | 43 | def upordown(code,name): 44 | filename = './datas/bs_'+code+'.csv' 45 | df = pd.read_csv(filename) 46 | if len(df) < 2: 47 | return 48 | lastday = df.index[-1] 49 | lastday2 = df.index[-2] 50 | closeld = df.close[lastday] 51 | closeld2 = df.close[lastday2] 52 | print(OKBLUE) 53 | print("%.2f %.2f %.2f %.3f %s %s" %(closeld,closeld2, 54 | closeld-closeld2, 55 | (closeld-closeld2)/closeld2, 56 | name,code) 57 | ) 58 | print(ENDC) 59 | all_up_down_list.append([ 60 | closeld,closeld2, 61 | closeld-closeld2, 62 | (closeld-closeld2)/closeld2, 63 | name,code 64 | ]) 65 | 66 | #获取最新数据 67 | def get_code_cvs(code): 68 | os.system('rm -f ./datas/bs_' + code+'.csv') 69 | y1 = os.system('python3 bs_to_csv.py --code '+code+' --start 2020-10-01') 70 | if y1 == 2 : #ctrl+c 71 | print(y1) 72 | handler("1","get_code_cvs") 73 | #sys.exit() 74 | 75 | #获取股票的名字和代码号 76 | def getstockinfo(stock): 77 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 78 | # 时间,股票代码,名称,类别 79 | d,code,name,skip1,skip2 = stock.split(',') 80 | #code = code.split('.')[1] bs not need the line 81 | return code,name 82 | 83 | #获取所有的股票并下载数据 84 | def get_data_thread(n): 85 | for stock in stocklist: 86 | code ,name = getstockinfo(stock) 87 | print('正在获取',name,'代码',code) 88 | get_code_cvs(code) 89 | q.put((code,name)) 90 | q.task_done() 91 | 92 | signal.signal(signal.SIGINT, handler) 93 | signal.signal(signal.SIGHUP, handler) 94 | signal.signal(signal.SIGTERM, handler) 95 | q = queue.Queue() 96 | 97 | if len(sys.argv) > 1: 98 | code = sys.argv[1] 99 | 100 | if not os.path.exists('./datas/stock_industry_check.csv'): 101 | print('正在下载股票库列表....') 102 | os.system('python3 bs_get_industry_check.py') 103 | 104 | stocklist = open('./datas/stock_industry_check.csv').readlines() 105 | stocklist = stocklist[1:] #删除第一行 106 | 107 | 108 | threading.Thread(target=get_data_thread,args=(1,)).start() 109 | 110 | 111 | while True: 112 | code,name = q.get() 113 | print('正在分析',name,'代码',code) 114 | upordown(code,name) 115 | 116 | make_save_data() 117 | 118 | -------------------------------------------------------------------------------- /get_day_all_stock_infov3.py: -------------------------------------------------------------------------------- 1 | # 2 | # exec script 3 | # 计算股票昨日涨跌 前50,和100日之前的涨跌对比 4 | # tushare 接口 5 | import os 6 | import sys 7 | import signal 8 | import threading,time 9 | import queue 10 | import pandas as pd 11 | from datetime import datetime 12 | import tushare as ts 13 | import config 14 | import argparse 15 | 16 | # 判断是否 是显示,还是重新下载数据计算 17 | # 数据每天只需要下载一次 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--display", help="显示本地数据",default='0') 21 | args = parser.parse_args() 22 | 23 | display = args.display 24 | 25 | #################### 26 | #1. 获取股票数据 27 | #################### 28 | tspro = ts.pro_api(config.tspro['passwd']) 29 | today = datetime.now() 30 | endday = str(today.year) + str(today.month) + str(today.day) 31 | 32 | # print 打印color 表 33 | HEADER = '\033[95m' 34 | OKBLUE = '\033[94m' 35 | OKGREEN = '\033[92m' 36 | WARNING = '\033[93m' 37 | FAIL = '\033[91m' 38 | ENDC = '\033[0m' 39 | BOLD = '\033[1m' 40 | UNDERLINE = '\033[4m' 41 | 42 | #所有的股票表计算后的数据表 43 | all_up_down_list=[] 44 | 45 | # 处理异常,在出现异常的时候存盘 46 | def handler(signum, frame): 47 | print("是不是想让我退出啊") 48 | make_save_data() 49 | sys.exit() 50 | 51 | #存盘并且打印 52 | def make_save_data(): 53 | df = pd.DataFrame(all_up_down_list, columns = ['昨日收盘','前日收盘','百日收盘','昨日涨跌','百日涨跌','名称','代码']) 54 | df.to_csv("./datas/stock_up_down_{0}.csv".format(endday),float_format='%.2f',index_label="序号") 55 | 56 | 57 | #仅仅显示 58 | def display_save_data(): 59 | df= pd.read_csv("./datas/stock_up_down_{0}.csv".format(endday)) 60 | 61 | df = df.sort_values(by="昨日涨跌",ascending=False) 62 | print(df.iloc[0:50]) 63 | 64 | df = df.sort_values(by="百日涨跌",ascending=False) 65 | print(df.iloc[0:50]) 66 | 67 | 68 | def upordown(code,name): 69 | #df = ts.get_hist_data(code,start='2020-05-01') 70 | df = tspro.daily(ts_code=code, start_date='20200501') 71 | if len(df) < 2: 72 | return 73 | lastday = df.index[0] 74 | lastday2 = df.index[1] 75 | closeld = float(df.close[lastday]) 76 | closeld2 = float(df.close[lastday2]) 77 | 78 | closeld100 = 0.0 79 | delta100 = 0.0 80 | if len(df) > 99: 81 | closeld100 = float(df.close[df.index[99]]) 82 | delta100 = float((closeld-closeld100) / closeld100 ) * 100.0 83 | 84 | print(OKBLUE) 85 | print("%.2f %.2f %.2f %.2f %.2f %s %s" %(closeld,closeld2, 86 | closeld100, 87 | (closeld-closeld2)/closeld2 * 100.0, 88 | delta100, 89 | name,code) 90 | ) 91 | print(ENDC) 92 | 93 | all_up_down_list.append([ 94 | closeld,closeld2, 95 | closeld100, 96 | (closeld-closeld2)/closeld2 * 100.0, 97 | delta100, 98 | name,code 99 | ]) 100 | 101 | 102 | #获取股票的名字和代码号 103 | def getstockinfo(stock): 104 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 105 | # 时间,股票代码,名称,类别 106 | d,code,name,skip1,skip2 = stock.split(',') 107 | 108 | shsz = code.split('.')[0] 109 | code = code.split('.')[1] 110 | if shsz == 'sh': 111 | shsz = '.SH' 112 | if shsz == 'sz': 113 | shsz = '.SZ' 114 | return code+shsz,name 115 | 116 | #获取所有的股票并下载数据 117 | def get_data_thread(n): 118 | for stock in stocklist: 119 | code ,name = getstockinfo(stock) 120 | print('正在获取',name,'代码',code) 121 | q.put((code,name)) 122 | q.task_done() 123 | 124 | 125 | # 126 | # 程序开始,监听信号 127 | # 128 | signal.signal(signal.SIGINT, handler) 129 | signal.signal(signal.SIGHUP, handler) 130 | signal.signal(signal.SIGTERM, handler) 131 | q = queue.Queue() 132 | 133 | # 判断是否已经下载了股票分类代码 134 | 135 | if not os.path.exists('./datas/stock_industry_check.csv'): 136 | print('正在下载股票库列表....') 137 | os.system('python3 bs_get_industry_check.py') 138 | 139 | stocklist = open('./datas/stock_industry_check.csv').readlines() 140 | stocklist = stocklist[1:] #删除第一行 141 | 142 | # 判断是仅仅显示,还是需要下载数据计算 143 | if display == '1': 144 | display_save_data() 145 | else : 146 | threading.Thread(target=get_data_thread,args=(1,)).start() 147 | while True: 148 | code,name = q.get() 149 | print('正在分析',name,'代码',code) 150 | upordown(code,name) 151 | 152 | make_save_data() 153 | 154 | -------------------------------------------------------------------------------- /get_industry_sort.py: -------------------------------------------------------------------------------- 1 | import time 2 | import requests 3 | import demjson 4 | import re 5 | import pandas as pd 6 | 7 | url = "http://q.jrjimg.cn/?q=cn|bk|17&n=hqa&c=l&o=pl,d&p=1020&_dc=%d" % int(time.time()*1000) 8 | urlbk = "http://q.jrjimg.cn/?q=cn|s|bk%s&c=m&n=hqa&o=pl,d&p=1020&_dc=%d" 9 | 10 | def getbktop20(): 11 | resp = requests.get(url) 12 | 13 | data = re.findall("var hqa=(.+);",resp.text,re.M|re.S) 14 | if(len(data) > 0): 15 | data = data[0] 16 | jsondata = demjson.decode(data) 17 | 18 | df = pd.DataFrame(jsondata['HqData']) 19 | 20 | bkdf = df.loc[:,[1,2,6,7,8,10,14,16]] 21 | return bkdf 22 | 23 | def create_clickable_code(code): 24 | code = code.replace(".","") 25 | url_template= '''{code}'''.format(code=code) 26 | return url_template 27 | def create_clickable_name(name): 28 | url_template= '''{name}'''.format(name=name) 29 | return url_template 30 | 31 | def create_color_rise(rise): 32 | url_template= '''{rise}'''.format(rise=rise) 33 | return url_template 34 | 35 | 36 | def getonebkinfo(bkcode,bkname): 37 | resp = requests.get(urlbk%(bkcode,int(time.time()*1000))) 38 | data = re.findall("var hqa=(.+);",resp.text,re.M|re.S) 39 | if(len(data) > 0): 40 | data = data[0] 41 | jsondata = demjson.decode(data) 42 | 43 | df = pd.DataFrame(jsondata['HqData']) 44 | df[0] = df[0].apply(create_clickable_code) 45 | df[12] = df[12].apply(create_color_rise) 46 | 47 | print(df.loc[:,[0,1,2,8,12]].to_html(escape=False)) 48 | 49 | bkdf = getbktop20() 50 | print(bkdf.to_html(escape=False)) 51 | 52 | for i in range(len(bkdf)): 53 | bkcode = bkdf[1][i] 54 | bkname = bkdf[2][i] 55 | rise = bkdf[10][i] 56 | print(bkname,rise) 57 | getonebkinfo(bkcode,bkname) 58 | -------------------------------------------------------------------------------- /hk_eastmoney.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | import json 4 | import time 5 | 6 | t = str(time.time()) 7 | 8 | def maket(mc): 9 | return {"1":"sh","0":"sz","116":"hk","128":"hk"}.get(str(mc)) 10 | 11 | def divisor1(mc): 12 | return {"1":100,"0":100,"116":1000,"128":1000}.get(str(mc)) 13 | 14 | def divisor(f1): 15 | return {2:100,3:1000}.get(f1,100) 16 | 17 | def replace_market_code(code): 18 | code = code.lower() 19 | code = code.replace("sh", "1.").replace("sz", "0.").replace("hk", "116.").replace("..",".") 20 | return code 21 | 22 | def replace_market_result(result): 23 | div = divisor(result['f1']) 24 | code = maket(result['f13']) + result['f12'] 25 | name = result['f14'] 26 | 27 | price = f"{result['f2'] / div:.2f}" 28 | rise = f"{result['f3'] / div:.2f}" 29 | 30 | return [code,name,price,rise] 31 | 32 | def remake_code(codes): 33 | 34 | # 处理所有代码 35 | result = [replace_market_code(code) for code in codes] 36 | return result 37 | 38 | def remake_result(codes): 39 | 40 | 41 | # 处理所有代码 42 | result = [replace_market_result(code) for code in codes] 43 | return result 44 | 45 | 46 | 47 | 48 | def get_timeline(code): 49 | #code = "116.01822" 50 | #code = "0.300059" 51 | code = replace_market_code(code) 52 | url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get?fields1=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13&fields2=f51,f52,f53,f54,f55,f56,f57,f58&ut=fa5fd1943c7b386f172d6893dbfba10b&iscr=0&ndays=1&secid=%s&_=%s26" % (code,t) 53 | 54 | resp = requests.get(url) 55 | return resp.text 56 | 57 | 58 | def get_dayk(code): 59 | code = replace_market_code(code) 60 | 61 | url = "https://push2his.eastmoney.com/api/qt/stock/kline/get?fields1=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&beg=20240101&end=20500101&ut=fa5fd1943c7b386f172d6893dbfba10b&rtntype=6&secid=%s&klt=101&fqt=1" %(code) 62 | 63 | try: 64 | resp = requests.get(url,timeout=(5, 30)) 65 | except requests.exceptions.Timeout: 66 | return get_dayk(code) 67 | 68 | data_json = resp.json() 69 | dayks = [] 70 | for i in data_json["data"]['klines']: 71 | d = i.split(",") 72 | dayks.append({"day":d[0],"open":d[1],"close":d[2],"high":d[3], 73 | "low":d[4],"volume":d[5],"rise":d[8]}) 74 | data_json["data"]["dayks"] = dayks 75 | del data_json['data']['klines'] 76 | return data_json 77 | 78 | def get_stock_price_bylist(codelist): 79 | #http://qt.gtimg.cn/r=0.8409869808238q=s_sz000559,s_sz000913,s_sz002048,s_sz002085,s_sz002126,s_sz002284,s_sz002434,s_sz002472,s_sz002488 80 | url = "https://push2.eastmoney.com/api/qt/ulist.np/get" 81 | 82 | codelist = remake_code(codelist) 83 | params = { 84 | # 这里选择了一些常用字段,可根据需求调整 85 | "fields": "f12,f13,f14,f2,f3,f1", 86 | "secids": ",".join(codelist) 87 | } 88 | 89 | response = requests.get(url, params=params) 90 | try : 91 | ret = response.json()['data']['diff'] 92 | except: 93 | ret = [] 94 | 95 | return remake_result(ret) 96 | 97 | def get_stock_code_market(page=1,market=1): 98 | # market 1 上证券 99 | # 0 深圳 m:0+t:6,m:0+t:80 100 | # 128 港股通票 m:128+t:3,m:128+t:4 101 | # 116 普通港股票 m:116+t:3,m:116+t:4 102 | if market == 1: 103 | fs = "m:1+t:2,m:1+t:23" 104 | if market == 0: 105 | fs = "m:0+t:6,m:0+t:80" 106 | if market == 128: 107 | fs = "m:128+t:3,m:128+t:4" 108 | if market == 116: 109 | fs = "m:116+t:3,m:116+t:4" 110 | 111 | url = "https://push2.eastmoney.com/api/qt/clist/get" 112 | params = { 113 | "np": 1, 114 | "fltt": 1, 115 | "invt": 2, 116 | "fs": fs, 117 | "fields": "f12,f13,f14,f1,f2,f4,f3,f152,f5,f6,f7,f15,f18,f16,f17,f10,f8,f9,f23", 118 | "fid": "f12", 119 | "pn": page, # 第1页 120 | "pz": 100, # 每页100条 121 | "po": 0, 122 | "dect": 1, 123 | "ut": "fa5fd1943c7b386f172d6893dbfba10b", 124 | "wbp2u": "|0|0|0|web" 125 | } 126 | 127 | response = requests.get(url, params=params) 128 | data = response.json() 129 | print(data) 130 | 131 | #get_timeline("0.300059") 132 | #get_dayk("0.300059") 133 | #get_stock_price_bylist(["1.600519","0.300059","116.00354"]) 134 | #get_stock_code_market(page=1,market=0) 135 | -------------------------------------------------------------------------------- /hk_qq.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | 4 | def remake_code(code): 5 | code = code.lower() 6 | code = code.replace(".","") 7 | return code 8 | def remake_codelist(codelist): 9 | if isinstance(codelist,str): 10 | codelist = codelist.split(",") 11 | 12 | return [remake_code(c) for c in codelist] 13 | 14 | def remake_result(data): 15 | # 定义正则表达式模式 16 | pattern = r'v_s_([a-z]{2}\d+)="\d+~([^~]+)~\d+~([\d.]+)~[-\d.]+~([-\d.]+)~' 17 | 18 | # 使用 findall 方法查找所有匹配项 19 | matches = re.findall(pattern, data) 20 | 21 | results = [] 22 | # 遍历匹配结果并输出 23 | for match in matches: 24 | code = match[0] 25 | name = match[1] 26 | price = match[2] 27 | rise = match[3] 28 | #print(f"代码: {code}, 名称: {name}, 价格: {price},涨跌: {rise}") 29 | results.append([code,name,price,rise]) 30 | return results 31 | def search(keyword): 32 | url = "https://proxy.finance.qq.com/cgi/cgi-bin/smartbox/search?stockFlag=1&fundFlag=1&app=official_website&c=1&query=" + keyword 33 | resp = requests.get(url) 34 | results=[] 35 | data = resp.json()['stock'][:10] 36 | for d in data: 37 | results.append({"name":d['name'],"code":d['code']}) 38 | return results 39 | 40 | def qqlist(codelist): 41 | codelist = remake_codelist(codelist) 42 | url = "https://qt.gtimg.cn/?q=s_" + ",s_".join(codelist) 43 | 44 | resp = requests.get(url) 45 | result = remake_result(resp.text) 46 | return result 47 | 48 | #qqlist("sh600769,hk00354,sz002714") 49 | #print(search("0001")) 50 | -------------------------------------------------------------------------------- /incon.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asmcos/quantrader/0a6dad21b4225cefc8cd0633b578346bb49ef6fe/incon.dat -------------------------------------------------------------------------------- /joeng.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from datetime import timedelta,date 3 | import numpy as np 4 | from scipy.signal import argrelextrema 5 | import matplotlib.pyplot as plt 6 | import time 7 | 8 | data=requests.get('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=600104.SHH&outputsize=full&apikey=demo') 9 | data=data.json() 10 | prices_low,prices_high,prices_close=[],[],[] 11 | for i in range(200,1,-1): 12 | d=date.today()-timedelta(i) 13 | d=d.strftime("%Y-%m-%d") 14 | try: 15 | prices_high.append(float(data["Time Series (Daily)"][d]["2. high"])) 16 | prices_low.append(float(data["Time Series (Daily)"][d]["3. low"])) 17 | prices_close.append(float(data["Time Series (Daily)"][d]["4. close"])) 18 | except: 19 | continue 20 | 21 | prices_low=np.array(prices_low) 22 | prices_high=np.array(prices_high) 23 | 24 | 25 | local_min_idx=argrelextrema(prices_low,np.less)[0] 26 | local_max_idx=argrelextrema(prices_high,np.greater)[0] 27 | local_min_idx=np.array(local_min_idx) 28 | local_max_idx=np.array(local_max_idx) 29 | 30 | local_min=[] 31 | local_max=[] 32 | for loc in local_min_idx: 33 | local_min.append(prices_low[loc]) 34 | for loc in local_max_idx: 35 | local_max.append(prices_high[loc]) 36 | local_min=np.array(local_min) 37 | local_max=np.array(local_max) 38 | 39 | for i in range(0,len(local_max)-3): 40 | (m,c),r,_,_,_= np.polyfit(local_max_idx[i:i+3],local_max[i:i+3],1,full=True) 41 | if(m<=3 and m>=-3 and (r[0]<20 and r[0]>-20)): 42 | start=local_max_idx[i+2] 43 | for k in range(start,start+7): 44 | if(k(k*m+c)): 45 | plt.figure(figsize=(10,5)) 46 | plt.plot(local_max_idx,m*local_max_idx+c,'m') 47 | plt.plot(prices_close) 48 | plt.plot(k,prices_close[k],'bo') 49 | break 50 | 51 | plt.show() 52 | 53 | -------------------------------------------------------------------------------- /jqfinance.py: -------------------------------------------------------------------------------- 1 | ### python3 btr28.py 2 | ### 源自二八轮动策略 张翼轸 3 | 4 | from __future__ import (absolute_import, division, print_function, 5 | unicode_literals) 6 | 7 | 8 | import datetime 9 | import random 10 | import config 11 | import jqdatasdk as jq 12 | import pandas as pd 13 | import sys 14 | 15 | #默认结束日期是今天 16 | today = datetime.datetime.now() 17 | default_end = "-".join([str(today.year) , str(today.month) , str(today.day)]) 18 | 19 | # 茅台600519,青岛啤酒600600 ,格力 XSHE: 000651.XSHE 20 | code = '600519.XSHG' 21 | if len(sys.argv) > 1: 22 | code = sys.argv[1] 23 | 24 | 25 | def add_roe(df): 26 | 27 | df['roe'] = 0.0 28 | 29 | for i in range(0,len(df)): 30 | df['roe'][i] = df['pb_ratio'][i] / df['pe_ratio'][i] 31 | return df 32 | 33 | #通过聚宽网络获取 指数的周数据,并计算 本周和4周前的增长比率 34 | class jqData(): 35 | def __init__(self): 36 | jq.auth(config.jqauth['name'],config.jqauth['passwd']) 37 | 38 | def week(self,stock_code,count=380,end=default_end): 39 | fields=['date','open','high','low','close','volume'] 40 | df = jq.get_bars(stock_code,count,end_dt=end,unit='1w',fields=fields) 41 | df.index=pd.to_datetime(df.date) 42 | df['openinterest']=0 43 | df= df[['open','high','low','close','volume','openinterest']] 44 | return df 45 | 46 | def financeData(self,stock_code): 47 | q = jq.query(jq.valuation.turnover_ratio, 48 | jq.valuation.market_cap, 49 | jq.valuation.pb_ratio, 50 | jq.valuation.pe_ratio, 51 | jq.valuation.pcf_ratio, 52 | jq.indicator.eps 53 | ).filter(jq.valuation.code.in_([stock_code])) 54 | 55 | #ROE = PB/PE 56 | 57 | df = jq.get_fundamentals_continuously(q, end_date=default_end, count=20) 58 | df = add_roe(df) 59 | return df 60 | 61 | def day(self,stock_code,end=default_end): 62 | fields=['open','high','low','close','volume'] 63 | df = jq.get_price(stock_code, count = 200, end_date=end, frequency='daily', fields=fields) 64 | return df 65 | 66 | def remain_count(self): 67 | count=jq.get_query_count() 68 | return count 69 | 70 | def main(): 71 | # if dataset is None, args.data has been given 72 | # 获取数据 73 | data = jqData() 74 | 75 | df = data.financeData( code ) 76 | print(df) 77 | df = data.day( code ) 78 | print(df) 79 | 80 | print(data.remain_count()) 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /klang_bt.py: -------------------------------------------------------------------------------- 1 | from __future__ import (absolute_import, division, print_function, 2 | unicode_literals) 3 | 4 | from Klang import Kl, Klang 5 | 6 | import backtrader as bt 7 | import pandas as pd 8 | import math 9 | 10 | 11 | class LongOnly(bt.Sizer): 12 | params = (('stake', 1),) 13 | 14 | def _getsizing(self, comminfo, cash, data, isbuy): 15 | # buy 1/2 16 | cash = math.floor(cash * 95 / 100) 17 | 18 | if isbuy: 19 | divide = math.floor(cash/data.close[0]) 20 | self.p.stake = divide 21 | return self.p.stake 22 | # Sell situation 23 | position = self.broker.getposition(data) 24 | if not position.size: 25 | return 0 # do not sell if nothing is open 26 | return self.p.stake 27 | 28 | 29 | def PandasData(columns): 30 | lines = () 31 | params = ( 32 | ('datetime', None), 33 | ('open', 'open'), 34 | ('high', 'high'), 35 | ('low', 'low'), 36 | ('close', 'close'), 37 | ('volume', 'vol'), 38 | ('openinterest', None), 39 | ) 40 | 41 | for c in columns: 42 | lines = lines + (c,) 43 | params = params + ((c, -1), ) 44 | 45 | return type('PandasDataFeed', (bt.feeds.PandasData, ), {'lines': lines, 'params': params}) 46 | 47 | # Create a Stratey 48 | 49 | 50 | class KStrategy(bt.Strategy): 51 | 52 | def log(self, txt, dt=None): 53 | ''' Logging function for this strategy''' 54 | dt = dt or self.datas[0].datetime.date(0) 55 | print('%s, %s' % (dt.isoformat(), txt)) 56 | 57 | def __init__(self): 58 | # Keep a reference to the "close" line in the data[0] dataseries 59 | self.dataclose = self.datas[0].close 60 | self.order = None 61 | self.macdhist = bt.ind.MACDHisto(self.data) 62 | print(self.data) 63 | 64 | def notify_order(self, order): 65 | if order.status == order.Completed: 66 | pass 67 | 68 | if not order.alive(): 69 | self.order = None # indicate no order is pending 70 | 71 | if order.status in [order.Submitted, order.Accepted]: 72 | # Buy/Sell order submitted/accepted to/by broker - Nothing to do 73 | return 74 | 75 | if order.status in [order.Completed, order.Canceled, order.Margin]: 76 | if order.isbuy(): 77 | self.log( 78 | 'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f,value %.2f' % 79 | (order.executed.price, 80 | order.executed.value, 81 | order.executed.comm, self.broker.getvalue())) 82 | 83 | self.buyprice = order.executed.price 84 | self.buycomm = order.executed.comm 85 | else: # Sell 86 | self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f,value %.2f' % 87 | (order.executed.price, 88 | order.executed.value, 89 | order.executed.comm, self.broker.getvalue())) 90 | 91 | self.order = None 92 | 93 | def next(self): 94 | # Simply log the closing price of the series from the reference 95 | 96 | d = eval("self.datas[0]."+"digit"+"[0]") 97 | print(d) 98 | 99 | if not self.position: 100 | if self.macdhist > 0: 101 | self.order = self.buy() 102 | else: 103 | if self.macdhist < 0: 104 | self.order = self.sell() 105 | 106 | 107 | def init_btr(): 108 | cerebro = bt.Cerebro(stdstats=False) 109 | 110 | # Add a strategy 111 | cerebro.addstrategy(KStrategy) 112 | 113 | Kl.code("sh.600062") 114 | df = Kl.currentdf['df'] 115 | 116 | df.index = pd.to_datetime(df.datetime) 117 | df['openinterest'] = 0 118 | df = df[['open', 'high', 'low', 'close', 'vol', 'openinterest']] 119 | 120 | df.insert(6, "digit", [x+5.0 for x in range(200)]) 121 | 122 | PandasField = PandasData(["digit"]) 123 | data = PandasField(dataname=df) 124 | 125 | cerebro.adddata(data) 126 | 127 | cerebro.addsizer(LongOnly) 128 | cerebro.broker.setcash(100000.0) 129 | 130 | # 回撤 & 收益率 & 年化收益率 131 | cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawDown') 132 | cerebro.addanalyzer(bt.analyzers.Returns, _name='returns') 133 | cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='annualReturn') 134 | 135 | print('成本: %.2f' % cerebro.broker.getvalue()) 136 | # Run over everything 137 | result = cerebro.run() 138 | 139 | print('总剩余: %.2f' % cerebro.broker.getvalue()) 140 | 141 | dfAnnualReturn = pd.DataFrame( 142 | [result[0].analyzers.annualReturn.get_analysis()]).T 143 | dfAnnualReturn.columns = ['年化'] 144 | rnorm100 = result[0].analyzers.returns.get_analysis()['rnorm100'], # 收益率 145 | maxDrawDown = result[0].analyzers.drawDown.get_analysis()[ 146 | 'max']['drawdown'], # 最大回撤 147 | print(f'收益率:{rnorm100}') 148 | print(f'最大回撤:{maxDrawDown}') 149 | print(f'年化收益率:\n{dfAnnualReturn}') 150 | 151 | # Plot the result 152 | cerebro.plot(style='bar') 153 | 154 | 155 | if __name__ == '__main__': 156 | Klang.Klang_init() # 加载所有股票列表 157 | 158 | init_btr() 159 | -------------------------------------------------------------------------------- /klang_msg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import socketio 4 | import json 5 | import os 6 | 7 | uri = "https://klang.org.cn:8099/user" 8 | sio = socketio.Client() 9 | 10 | @sio.event 11 | def connect(): 12 | print("connected! ") 13 | sio.emit("u_cmd_event",{"content":"UPDATEALL"}) 14 | os._exit(0) 15 | 16 | sio.connect(uri) 17 | 18 | -------------------------------------------------------------------------------- /lstm_attention_predict.py: -------------------------------------------------------------------------------- 1 | # 2 | # keras-2.7.0,tensorflow 2.7.0 3 | # 使用lstm做股票二分类验证 4 | # 5 | 6 | import os 7 | import numpy as np 8 | from matplotlib import pyplot as plt 9 | import requests 10 | import pandas as pd 11 | import talib 12 | import datetime 13 | 14 | from common.framework import save_df_tohtml 15 | 16 | 17 | from tensorflow.keras import Input 18 | from tensorflow.keras.models import Sequential,Model,load_model 19 | from tensorflow.keras.layers import Dense, Dropout, Activation,LSTM,Bidirectional 20 | import tensorflow as tf 21 | import json 22 | from tensorflow.keras.layers import Attention,GlobalMaxPooling1D,Concatenate 23 | 24 | def DisplayOriginalLabel(values): 25 | cnt1 = 0 26 | cnt2 = 0 27 | for i in range(len(values)): 28 | if 1 == values[i] : 29 | cnt1 += 1 30 | else: 31 | cnt2 += 1 32 | 33 | print("origin: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)),len(values)) 34 | 35 | 36 | 37 | df_all = [] 38 | # 1. 获取数据 39 | def load_data_fromfile(filename): 40 | global df_all 41 | 42 | content = open(filename).read() 43 | df_dict = json.loads(content) 44 | for k in df_dict.keys(): 45 | df = pd.read_json(df_dict.get(k)) 46 | df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)] 47 | df_all.append(df) 48 | 49 | 50 | load_data_fromfile('lstm_train2021-12-21.csv') 51 | 52 | print(df_all[0].columns) 53 | 54 | 55 | 56 | # 准备预测的数据 57 | # 58 | 59 | sequence_len = 40 60 | prec = 10 #target 百分比 61 | fields = [ 62 | 'ma10', 63 | 'ma120', 'ma20', 'ma30', 'ma5', 'ma60', 'rise', 'risevol', 64 | 'dea', 'diff', 'macd' ,'oc','close'] 65 | 66 | X_train = [] 67 | y_train = [] 68 | X_test = [] 69 | y_test = [] 70 | 71 | def load_data(df, seq_len, ratio=0.9): 72 | 73 | df1 = df[df['date']<'2021-07-15'] 74 | df2 = df[df['date']>'2021-07-16'] 75 | 76 | label1 = df1['target'].values > prec 77 | label2 = df2['target'].values > prec 78 | 79 | datas1 = df1.loc[:,fields] 80 | datas2 = df2.loc[:,fields] 81 | 82 | sequence_length = seq_len 83 | 84 | if len(datas1) <= sequence_length or len(datas2) <= sequence_length: 85 | return 86 | 87 | for index in range(len(datas1) - sequence_length): 88 | X_train.append(datas1[index: index + sequence_length].values) 89 | y_train.append(label1[index+sequence_length-1]) 90 | 91 | for index in range(len(datas2) - sequence_length): 92 | X_test.append(datas2[index: index + sequence_length].values) 93 | y_test.append(label2[index+sequence_length-1]) 94 | 95 | 96 | 97 | for df in df_all[:100]: 98 | load_data(df,sequence_len) 99 | 100 | X_train = np.array(X_train) 101 | X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],len(fields))) 102 | y_train = np.array(y_train) 103 | 104 | X_test = np.array(X_test) 105 | X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],len(fields))) 106 | 107 | 108 | def build_model(): 109 | d = 0.2 110 | model = Sequential() 111 | 112 | # inputs: A 3D tensor with shape `[batch, timesteps, feature]`. 113 | # 输入的数据格式 是 总尺寸,时间步长,这里是 sequence_len, feature,特征维度 114 | # now model.output_shape == (None, 128) 115 | model.add(LSTM(128, return_sequences=True)) 116 | model.add(Dropout(d)) 117 | 118 | # for subsequent layers, no need to specify the input size: 119 | model.add(LSTM(64, return_sequences=False)) 120 | model.add(Dropout(d)) 121 | 122 | # fully connected layer 123 | model.add(Dense(16,activation='relu')) 124 | # 输入 1 维度 0,1 125 | model.add(Dense(1,activation='sigmoid')) 126 | 127 | lossfn = tf.keras.losses.BinaryCrossentropy( 128 | from_logits=False, 129 | label_smoothing=0.0, 130 | axis=-1, 131 | reduction="auto", 132 | name="binary_crossentropy", 133 | ) 134 | # 二分类 135 | model.compile(optimizer='rmsprop', 136 | loss=lossfn, metrics=['accuracy']) 137 | return model 138 | 139 | time_steps = X_train.shape[1] 140 | input_dim = X_train.shape[2] 141 | 142 | print(time_steps,input_dim) 143 | 144 | def build_model2(): 145 | d = 0.2 146 | 147 | model_input = Input(shape=(time_steps, input_dim)) 148 | x = Bidirectional(LSTM(128, return_sequences=True))(model_input) 149 | x = Dropout(d)(x) 150 | #x = Bidirectional(LSTM(64, return_sequences=False))(x) 151 | #x = Dropout(d)(x) 152 | a = Attention()([x,x]) 153 | out1 = GlobalMaxPooling1D()(x) 154 | out2 = GlobalMaxPooling1D()(a) 155 | merge = Concatenate()([out1,out2]) 156 | x = Dense(16,activation='relu')(merge) 157 | x = Dense(1,activation='sigmoid')(x) 158 | 159 | model = Model(model_input, x) 160 | 161 | lossfn = tf.keras.losses.BinaryCrossentropy( 162 | from_logits=False, 163 | label_smoothing=0.0, 164 | axis=-1, 165 | reduction="auto", 166 | name="binary_crossentropy", 167 | ) 168 | # 二分类 169 | model.compile(optimizer='rmsprop', 170 | loss=lossfn, metrics=['accuracy']) 171 | return model 172 | 173 | model = build_model2() 174 | 175 | log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 176 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) 177 | 178 | #X = pd.DataFrame(data = X_train, columns = fields) 179 | 180 | model.fit(X_train,y_train,batch_size=200, 181 | epochs=2,callbacks=[tensorboard_callback]) 182 | 183 | y_pred = model.predict(X_test) 184 | 185 | # 对测试集进行预测 186 | # print(tf.greater(y_pred, .5)) 187 | print(y_pred) 188 | 189 | pcnt1 = 0 190 | pcnt2 = 0 191 | for i in range(len(y_pred)): 192 | if y_pred[i][0] < 0.6 : 193 | continue 194 | 195 | if y_test[i] == True : 196 | pcnt1 += 1 197 | else: 198 | pcnt2 += 1 199 | 200 | DisplayOriginalLabel(y_test) 201 | if pcnt1+pcnt2 > 0: 202 | print("Accuracy: %.2f %% " % (100 * pcnt1 / (pcnt1 + pcnt2)),pcnt1 + pcnt2) 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /lstm_bin_predict.py: -------------------------------------------------------------------------------- 1 | # 2 | # keras-2.7.0,tensorflow 2.7.0 3 | # 使用lstm做股票二分类验证 4 | # 5 | 6 | 7 | 8 | import os 9 | import numpy as np 10 | from matplotlib import pyplot as plt 11 | import requests 12 | import pandas as pd 13 | import talib 14 | import datetime 15 | 16 | from common.framework import save_df_tohtml 17 | 18 | 19 | 20 | from tensorflow.keras.models import Sequential 21 | from tensorflow.keras.layers import Dense, Dropout, Activation,LSTM 22 | import tensorflow as tf 23 | import json 24 | 25 | def DisplayOriginalLabel(values): 26 | cnt1 = 0 27 | cnt2 = 0 28 | for i in range(len(values)): 29 | if 1 == values[i] : 30 | cnt1 += 1 31 | else: 32 | cnt2 += 1 33 | 34 | print("origin: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)),len(values)) 35 | 36 | 37 | 38 | df_all = [] 39 | # 1. 获取数据 40 | def load_data_fromfile(filename): 41 | global df_all 42 | 43 | content = open(filename).read() 44 | df_dict = json.loads(content) 45 | for k in df_dict.keys(): 46 | df = pd.read_json(df_dict.get(k)) 47 | df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)] 48 | df_all.append(df) 49 | 50 | 51 | load_data_fromfile('lstm_train2021-12-20.csv') 52 | 53 | print(df_all[0].columns) 54 | 55 | 56 | 57 | # 准备预测的数据 58 | # 59 | 60 | sequence_len = 40 61 | prec = 10 #target 百分比 62 | fields = [ 63 | 'ma10', 64 | 'ma120', 'ma20', 'ma30', 'ma5', 'ma60', 'rise', 'risevol', 65 | 'dea', 'diff', 'macd' ,'oc','close'] 66 | 67 | X_train = [] 68 | y_train = [] 69 | X_test = [] 70 | y_test = [] 71 | 72 | def load_data(df, seq_len, ratio=0.9): 73 | 74 | df1 = df[df['date']<'2021-07-15'] 75 | df2 = df[df['date']>'2021-07-16'] 76 | 77 | label1 = df1['target'].values > prec 78 | label2 = df2['target'].values > prec 79 | 80 | datas1 = df1.loc[:,fields] 81 | datas2 = df2.loc[:,fields] 82 | 83 | sequence_length = seq_len 84 | 85 | if len(datas1) <= sequence_length or len(datas2) <= sequence_length: 86 | return 87 | 88 | for index in range(len(datas1) - sequence_length): 89 | X_train.append(datas1[index: index + sequence_length].values) 90 | y_train.append(label1[index+sequence_length-1]) 91 | 92 | for index in range(len(datas2) - sequence_length): 93 | X_test.append(datas2[index: index + sequence_length].values) 94 | y_test.append(label2[index+sequence_length-1]) 95 | 96 | 97 | 98 | for df in df_all[:100]: 99 | load_data(df,sequence_len) 100 | 101 | X_train = np.array(X_train) 102 | X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],len(fields))) 103 | y_train = np.array(y_train) 104 | 105 | X_test = np.array(X_test) 106 | X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],len(fields))) 107 | 108 | 109 | def build_model(): 110 | d = 0.2 111 | model = Sequential() 112 | 113 | # inputs: A 3D tensor with shape `[batch, timesteps, feature]`. 114 | # 输入的数据格式 是 总尺寸,时间步长,这里是 sequence_len, feature,特征维度 115 | # now model.output_shape == (None, 128) 116 | model.add(LSTM(128, return_sequences=True)) 117 | model.add(Dropout(d)) 118 | 119 | # for subsequent layers, no need to specify the input size: 120 | model.add(LSTM(64, return_sequences=False)) 121 | model.add(Dropout(d)) 122 | 123 | # fully connected layer 124 | model.add(Dense(16,activation='relu')) 125 | # 输入 1 维度 0,1 126 | model.add(Dense(1,activation='sigmoid')) 127 | 128 | lossfn = tf.keras.losses.BinaryCrossentropy( 129 | from_logits=False, 130 | label_smoothing=0.0, 131 | axis=-1, 132 | reduction="auto", 133 | name="binary_crossentropy", 134 | ) 135 | # 二分类 136 | model.compile(optimizer='rmsprop', 137 | loss=lossfn, metrics=['accuracy']) 138 | return model 139 | 140 | 141 | model = build_model() 142 | 143 | log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 144 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) 145 | 146 | #X = pd.DataFrame(data = X_train, columns = fields) 147 | 148 | model.fit(X_train,y_train,batch_size=200, 149 | epochs=2,callbacks=[tensorboard_callback]) 150 | 151 | 152 | """ 153 | history = model.fit( 154 | X_train,y_train, 155 | batch_size=200, 156 | epochs=2,callbacks=[tensorboard_callback]) 157 | """ 158 | y_pred = model.predict(X_test) 159 | 160 | # 对测试集进行预测 161 | # print(tf.greater(y_pred, .5)) 162 | print(y_pred) 163 | 164 | pcnt1 = 0 165 | pcnt2 = 0 166 | for i in range(len(y_pred)): 167 | if y_pred[i][0] < 0.6 : 168 | continue 169 | 170 | if y_test[i] == True : 171 | pcnt1 += 1 172 | else: 173 | pcnt2 += 1 174 | 175 | DisplayOriginalLabel(y_test) 176 | if pcnt1+pcnt2 > 0: 177 | print("Accuracy: %.2f %% " % (100 * pcnt1 / (pcnt1 + pcnt2)),pcnt1 + pcnt2) 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /macd1.py: -------------------------------------------------------------------------------- 1 | import baostock as bs 2 | import pandas as pd 3 | import talib as ta 4 | import matplotlib.pyplot as plt 5 | 6 | def computeMACD(code,startdate,enddate): 7 | 8 | login_result = bs.login(user_id='anonymous', password='123456') 9 | print(login_result) 10 | 11 | ###获取股票日K线数据### 12 | rs = bs.query_history_k_data(code, 13 | "date,code,close,tradeStatus", 14 | start_date=startdate, end_date=enddate, 15 | frequency="d", adjustflag="3") 16 | #### 打印结果集 #### 17 | result_list = [] 18 | while (rs.error_code == '0') & rs.next(): 19 | # 获取一条记录,将记录合并在一起 20 | result_list.append(rs.get_row_data()) 21 | df = pd.DataFrame(result_list, columns=rs.fields) 22 | #剔除停盘数据 23 | df2 = df[df['tradeStatus']=='1'] 24 | #获取dif,dea,hist,它们的数据类似是tuple,且跟df2的date日期一一对应 25 | #记住了dif,dea,hist前33个为Nan,所以推荐用于计算的数据量一般为你所求日期之间数据量的3倍 26 | #这里计算的hist就是dif-dea,而很多证券商计算的MACD=hist*2=(dif-dea)*2 27 | dif, dea, hist = ta.MACD(df2['close'].astype(float).values, fastperiod=12, slowperiod=26, signalperiod=9) 28 | df3 = pd.DataFrame({'dif':dif[33:],'dea':dea[33:],'hist':hist[33:]}, 29 | index=df2['date'][33:],columns=['dif','dea','hist']) 30 | df3.plot(title='MACD') 31 | plt.show() 32 | #寻找MACD金叉和死叉 33 | datenumber = int(df3.shape[0]) 34 | for i in range(datenumber-1): 35 | if ((df3.iloc[i,0]<=df3.iloc[i,1]) & (df3.iloc[i+1,0]>=df3.iloc[i+1,1])): 36 | print("MACD金叉的日期:"+df3.index[i+1]) 37 | if ((df3.iloc[i,0]>=df3.iloc[i,1]) & (df3.iloc[i+1,0]<=df3.iloc[i+1,1])): 38 | print("MACD死叉的日期:"+df3.index[i+1]) 39 | 40 | bs.logout() 41 | return(dif,dea,hist) 42 | 43 | 44 | if __name__ == '__main__': 45 | code = 'sh.600004' 46 | startdate = '2017-03-01' 47 | enddate = '2020-04-13' 48 | (dif,dea,hist) = computeMACD(code,startdate,enddate) 49 | -------------------------------------------------------------------------------- /macd2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import talib as talib 4 | import tushare as ts 5 | from matplotlib import rc 6 | rc('mathtext', default='regular') 7 | import sys 8 | 9 | code = "600519" 10 | if len(sys.argv) > 1: 11 | code = sys.argv[1] 12 | 13 | dw = ts.get_k_data(code) 14 | close = dw.close.values 15 | dw['macd'], dw['macdsignal'], dw['macdhist'] = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9) 16 | 17 | #dw[['close','macd','macdsignal','macdhist']].plot() 18 | for i in range(0,len(dw['open'])): 19 | print(i,dw['date'][i],dw['open'][i],dw['macd'][i],dw['macdsignal'][i],dw['macdhist'][i]) 20 | -------------------------------------------------------------------------------- /pserver.js: -------------------------------------------------------------------------------- 1 | httpProxy = require('http-proxy'); 2 | 3 | var URL = 'https://hq.sinajs.cn'; 4 | 5 | 6 | server = httpProxy.createServer({ secure: false, target: URL }, function (req, res, proxy) { 7 | 8 | 9 | proxy.proxyRequest(req, res, { secure: false, target: URL }); 10 | 11 | }) 12 | 13 | 14 | server.on('proxyRes', function(proxyRes, req, res, options) { 15 | proxyRes.on('data', function () { 16 | 17 | res.setHeader('Access-Control-Allow-Origin', '*'); 18 | res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'); 19 | }); 20 | 21 | }); 22 | 23 | 24 | 25 | console.log("Listening on port 8000") 26 | 27 | server.listen(8000); 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | TA-Lib 2 | baostock 3 | backtrader 4 | numpy 5 | pandas 6 | matplotlib 7 | tushare 8 | sh 9 | backtrader_plotting 10 | sklearn 11 | tensorflow 12 | yahoo_fin 13 | demjson 14 | click 15 | pytdx 16 | scipy 17 | bokeh 18 | -------------------------------------------------------------------------------- /sendrequest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | TaskQ5_ROOT= "/home/jsh/TaskQ5-py" 3 | sys.path.insert(0,TaskQ5_ROOT) 4 | from TaskQ5.do_requests import send_request 5 | from nostrclient.log import log 6 | import json 7 | import threading 8 | 9 | condition = threading.Condition() 10 | 11 | def handle_task(request,response): 12 | def finish_task(data): 13 | nonlocal response 14 | res = json.loads(data["content"]) 15 | response['data'] = res['data'] 16 | response['status'] = res['status'] 17 | response['headers'] = res['headers'] 18 | with condition: 19 | condition.notify() 20 | 21 | send_request(request,finish_task) 22 | with condition: 23 | ret = condition.wait(timeout=20) 24 | if ret: 25 | return True 26 | log.red("Timeout") 27 | print(request) 28 | return False 29 | -------------------------------------------------------------------------------- /sendrequest_task.py: -------------------------------------------------------------------------------- 1 | import websocket 2 | import json 3 | 4 | req_task_content = { 5 | 'type':'requests', 6 | 'url':'https://www.google.com', 7 | 'headers' : {'Host':'www.google.com', 8 | 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36', 9 | 'Referer': 'https://www.google.com', 10 | }, 11 | 'Bridge':'wss://bridge.duozhutuan.com', 12 | 'clientId':'' 13 | } 14 | 15 | 16 | def handle_task(taskcontent,response): 17 | server="ws://localhost:8081/" 18 | def on_message(ws, message): 19 | 20 | message = json.loads(message) 21 | response['data'] = message['response']['data'] 22 | response['status'] = message['response']['status'] 23 | response['headers'] = message['response']['headers'] 24 | if (message['type']=='response'): 25 | print(len(response['data'])) 26 | ws.close() 27 | def on_open(ws): 28 | print("connect ok,send a new task") 29 | print(taskcontent) 30 | ws.send(json.dumps(taskcontent)) 31 | 32 | ws = websocket.WebSocketApp(server, 33 | on_open=on_open, 34 | on_message=on_message 35 | ) 36 | 37 | ws.run_forever() 38 | 39 | 40 | if __name__ == "__main__": 41 | response = {} 42 | handle_task(req_task_content,response) 43 | print(len(response['data']),response['status']) 44 | -------------------------------------------------------------------------------- /set_dayma.py: -------------------------------------------------------------------------------- 1 | # 2 | # exec script 3 | # 计算股票昨日涨跌 前200,和100日之前的涨跌对比 4 | 5 | 6 | import os 7 | import sys 8 | import signal 9 | import threading,time 10 | import queue 11 | import pandas as pd 12 | import numpy as np 13 | from datetime import datetime 14 | import json 15 | import argparse 16 | import requests 17 | import talib 18 | # 判断是否 是显示,还是重新下载数据计算 19 | # 数据每天只需要下载一次 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--endday", help="日期",default='0') 23 | args = parser.parse_args() 24 | 25 | endday = args.endday 26 | 27 | requests.adapters.DEFAULT_RETRIES = 5 28 | #################### 29 | #1. 获取股票数据 30 | #################### 31 | 32 | today = datetime.now() 33 | if endday== '0': 34 | endday = str(today.year) + str(today.month) + str(today.day) 35 | 36 | # print 打印color 表 37 | HEADER = '\033[95m' 38 | OKBLUE = '\033[94m' 39 | OKGREEN = '\033[92m' 40 | WARNING = '\033[93m' 41 | FAIL = '\033[91m' 42 | ENDC = '\033[0m' 43 | BOLD = '\033[1m' 44 | UNDERLINE = '\033[4m' 45 | 46 | 47 | # 处理异常,在出现异常的时候存盘 48 | def handler(signum, frame): 49 | print("是不是想让我退出啊") 50 | sys.exit() 51 | 52 | 53 | server="http://zhanluejia.net.cn" 54 | 55 | 56 | def save_db_server(df): 57 | 58 | df = df.set_index('date') 59 | df = df.to_json(orient='table') 60 | jsondatas = json.loads(df)['data'] 61 | 62 | requests.post(server+"/stock/updatedayMa",json=jsondatas,timeout=1000) 63 | 64 | 65 | def get_day_ma(code,name): 66 | json = requests.get(server+"/stock/getdayMa", 67 | params={"code":code,"end":endday,"limit":150},timeout=1000).json() 68 | 69 | print(json[0]) 70 | 71 | def get_day_data(code,name): 72 | try: 73 | json = requests.get("http://zhanluejia.net.cn/stock/getdayK", 74 | params={"code":code,"end":endday,"limit":150},timeout=1000).json() 75 | except: 76 | time.sleep(2) 77 | json = requests.get("http://zhanluejia.net.cn/stock/getdayK", 78 | params={"code":code,"end":endday,"limit":150},timeout=1000).json() 79 | 80 | df = pd.io.json.json_normalize(json) 81 | 82 | if len(df) < 2: 83 | return df 84 | df = df.drop(columns=['_id','codedate']) 85 | df = df.sort_values(by="date",ascending=True) 86 | 87 | 88 | return df 89 | 90 | 91 | #获取股票的名字和代码号 92 | def getstockinfo(stock): 93 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 94 | # 时间,股票代码,名称,类别 95 | d,code,name,industry,skip2,hqltsz = stock.split(',') 96 | return code,name,industry 97 | 98 | 99 | 100 | def ma(df): 101 | #通过tushare获取股票信息 102 | #提取收盘价 103 | closed=df['close'].values 104 | #获取均线的数据,通过timeperiod参数来分别获取 5,10,20 日均线的数据。 105 | ma5=talib.SMA(closed,timeperiod=5) 106 | ma10=talib.SMA(closed,timeperiod=10) 107 | ma20=talib.SMA(closed,timeperiod=20) 108 | ma30=talib.SMA(closed,timeperiod=30) 109 | ma60=talib.SMA(closed,timeperiod=60) 110 | 111 | ma5[np.isnan(ma5)] = 0 112 | ma10[np.isnan(ma10)] = 0 113 | ma20[np.isnan(ma20)] = 0 114 | ma30[np.isnan(ma30)] = 0 115 | ma60[np.isnan(ma60)] = 0 116 | 117 | df['ma5'] = ma5 118 | df['ma10'] = ma10 119 | df['ma20'] = ma20 120 | df['ma30'] = ma30 121 | df['ma60'] = ma60 122 | save_db_server(df) 123 | 124 | def get_data(): 125 | for stock in stocklist: 126 | code ,name,industry = getstockinfo(stock) 127 | print('正在获取',name,'代码',code) 128 | df = get_day_data(code,name) 129 | if len(df) > 10: 130 | ma(df) 131 | else: 132 | continue 133 | #get_day_ma(code,name) 134 | 135 | # 136 | # 程序开始,监听信号 137 | # 138 | signal.signal(signal.SIGINT, handler) 139 | signal.signal(signal.SIGHUP, handler) 140 | signal.signal(signal.SIGTERM, handler) 141 | q = queue.Queue() 142 | 143 | # 判断是否已经下载了股票分类代码 144 | 145 | if not os.path.exists('./datas/stock_industry_check.csv'): 146 | print('正在下载股票库列表....') 147 | os.system('python3 bs_get_industry_check.py') 148 | 149 | stocklist = open('./datas/stock_industry_check.csv').readlines() 150 | stocklist = stocklist[1:] #删除第一行 151 | 152 | # 判断是仅仅显示,还是需要下载数据计算 153 | if True: 154 | 155 | get_data() 156 | 157 | -------------------------------------------------------------------------------- /sina_min_kline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; py-indent-offset:4 -*- 3 | 4 | """ 从 sina 获取分钟k数据 """ 5 | import pandas as pd 6 | import click 7 | from datetime import datetime 8 | import os 9 | import json 10 | import argparse 11 | import requests 12 | import re 13 | # 判断是否 是显示,还是重新下载数据计算 14 | # 数据每天只需要下载一次 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("--offset", help="开始执行的位置",default='0') 18 | parser.add_argument("--listlen", help="每次获取的股票数",default=200) 19 | parser.add_argument("--resave", help="每次获取的股票数",default='0') 20 | args = parser.parse_args() 21 | 22 | offset = args.offset 23 | resave = args.resave 24 | listlen = int(args.listlen) 25 | 26 | float2 = lambda a:float('%.2f' % a) 27 | 28 | today = datetime.now().strftime('%Y-%m-%d') 29 | 30 | code_list = [] 31 | csv_data = [] 32 | industrydict = {} 33 | 34 | def create_clickable_code(code): 35 | code = code.replace(".","") 36 | url_template= '''{code}'''.format(code=code) 37 | return url_template 38 | def create_color_rise1(rise): 39 | url_template= '''{rise}'''.format(rise=rise) 40 | return url_template 41 | 42 | def get_data_fromjs(text): 43 | datas = [] 44 | text_list = re.findall("var hq_str_(.+?);",text,re.S|re.M) 45 | for i in text_list: 46 | code,data = i.split("=") 47 | data = data.strip('"').split(",") 48 | rise = 0 49 | if float(data[2]) != 0: 50 | rise = (float(data[3]) - float(data[2])) * 100 / float(data[2]) 51 | rise = float2(rise) 52 | datas.append([code,data[0],data[2],data[3],rise,industrydict[code]]) 53 | 54 | df = pd.DataFrame(datas,columns=['code','name','昨日收盘','当前价','涨跌','行业']) 55 | df = df.sort_values(by="涨跌",ascending=False) 56 | df['code'] = df['code'].apply(create_clickable_code) 57 | df['涨跌'] = df['涨跌'].apply(create_color_rise1) 58 | print(df.iloc[:].reset_index(drop=True).to_html(escape=False)) 59 | 60 | def get_min_kdata(code,end=0): 61 | global code_list 62 | global csv_data 63 | 64 | code_list.append(code) 65 | if len(code_list) >= listlen: 66 | codes = ",".join(code_list) 67 | resp = requests.get('https://hq.sinajs.cn/?list=%s'%codes) 68 | csv_data.append(resp.text) 69 | code_list = [] 70 | 71 | def get_min_kdata_tail( ): 72 | global code_list 73 | global csv_data 74 | if len(code_list) > 0: 75 | codes = ",".join(code_list) 76 | resp = requests.get('https://hq.sinajs.cn/?list=%s'%codes) 77 | csv_data.append(resp.text) 78 | code_list = [] 79 | 80 | #获取股票的名字和代码号 81 | def getstockinfo(stock): 82 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 83 | # 时间,股票代码,名称,类别 84 | d,code,name,industry,skip2 = stock.split(',') 85 | code=code.replace(".","") 86 | 87 | industrydict[code]=industry 88 | return code,name,industry 89 | 90 | 91 | 92 | 93 | # 判断是否已经下载了股票分类代码 94 | 95 | if not os.path.exists('./datas/stock_industry_check.csv'): 96 | print('正在下载股票库列表....') 97 | os.system('python3 bs_get_industry_check.py') 98 | 99 | stocklist = open('./datas/stock_industry_check.csv').readlines() 100 | stocklist = stocklist[1+int(offset):] #删除第一行 101 | 102 | for stock in stocklist: 103 | code ,name,industry = getstockinfo(stock) 104 | 105 | if __name__ == "__main__": 106 | 107 | if not os.path.exists('./datas/stock_min_kdata.csv') or resave == '1': 108 | 109 | 110 | for stock in stocklist: 111 | code ,name,industry = getstockinfo(stock) 112 | print('正在获取',name,'代码',code) 113 | get_min_kdata(code) 114 | get_min_kdata_tail( ) 115 | f = open('./datas/stock_min_kdata.csv','w') 116 | f.write('\n'.join(csv_data)) 117 | f.close() 118 | get_data_fromjs(open("./datas/stock_min_kdata.csv").read()) 119 | -------------------------------------------------------------------------------- /start.py: -------------------------------------------------------------------------------- 1 | # 2 | # exec script 3 | import os 4 | import sys 5 | import signal 6 | import threading,time 7 | import queue 8 | 9 | code = '600600' 10 | 11 | def handler(signum, frame): 12 | print("是不是想让我退出啊") 13 | sys.exit() 14 | 15 | 16 | def macd(code,name): 17 | 18 | os.system('rm -f ./datas/ts_' + code+'.csv') 19 | y1 = os.system('python3 ts_to_csv.py --code '+code+' --start 2019-10-01') 20 | y2 = os.system('python3 btrmacd.py --datafile ./datas/ts_'+code+'.csv' + ' --code ' + code 21 | +' --name ' + name + ' --savedb 1') 22 | 23 | 24 | 25 | def mrk(code,name): 26 | y1 = os.system('python3 btrmrk.py --datafile ./datas/ts_'+code+'.csv' + ' --code ' + code 27 | +' --name ' + name + ' --savedb 1') 28 | if y1 == 2: #ctrl+c 29 | print(y1) 30 | sys.exit() 31 | 32 | def atr(code,name): 33 | y1 = os.system('python3 btratr.py --datafile ./datas/ts_'+code+'.csv' + ' --code ' + code 34 | +' --name ' + name + ' --savedb 1') 35 | if y1 == 2: #ctrl+c 36 | print(y1) 37 | sys.exit() 38 | 39 | def get_code_cvs(code): 40 | os.system('rm -f ./datas/ts_' + code+'.csv') 41 | y1 = os.system('python3 ts_to_csv.py --code '+code+' --start 2019-10-01') 42 | if y1 == 2 : #ctrl+c 43 | print(y1) 44 | sys.exit() 45 | 46 | 47 | def getstockinfo(stock): 48 | #2019-12-09,sz.002094,青岛金王,化工,申万一级行业 49 | # 时间,股票代码,名称,类别 50 | d,code,name,skip1,skip2 = stock.split(',') 51 | code = code.split('.')[1] 52 | return code,name 53 | 54 | def get_data_thread(n): 55 | for stock in stocklist: 56 | code ,name = getstockinfo(stock) 57 | print('正在获取',name,'代码',code) 58 | get_code_cvs(code) 59 | q.put((code,name)) 60 | q.task_done() 61 | 62 | signal.signal(signal.SIGINT, handler) 63 | signal.signal(signal.SIGHUP, handler) 64 | signal.signal(signal.SIGTERM, handler) 65 | q = queue.Queue() 66 | 67 | if len(sys.argv) > 1: 68 | code = sys.argv[1] 69 | 70 | if not os.path.exists('./datas/stock_industry_check.csv'): 71 | print('正在下载股票库列表....') 72 | os.system('python3 bs_get_industry_check.py') 73 | 74 | stocklist = open('./datas/stock_industry_check.csv').readlines() 75 | stocklist = stocklist[1:] #删除第一行 76 | 77 | 78 | threading.Thread(target=get_data_thread,args=(1,)).start() 79 | 80 | 81 | while True: 82 | code,name = q.get() 83 | print('正在分析',name,'代码',code) 84 | atr(code,name) 85 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "x$1" = "x" ] 4 | then 5 | code=600500 6 | else 7 | code=$1 8 | fi 9 | 10 | rm ./datas/ts_$code.csv 11 | python3 ts_to_csv.py --code $code 12 | python3 btrmacd.py --datafile ./datas/ts_$code.csv 13 | #python3 btrstoch.py --datafile ./datas/ts_$code.csv 14 | #python3 btrrsi.py --datafile ./datas/ts_$code.csv 15 | python3 btrboll.py --datafile ./datas/ts_$code.csv 16 | 17 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from keras.models import Sequential 4 | from keras.layers import LSTM,Dense 5 | import matplotlib.pyplot as plt 6 | 7 | from keras.models import load_model 8 | 9 | look_back = 40 10 | forward_days = 10 11 | num_periods = 2 12 | 13 | 14 | ################## 15 | # 1. 加载股票数据 16 | ################## 17 | 18 | import tushare as ts 19 | 20 | #青岛啤酒 21 | code = '600600' 22 | start = '2019-04-14' 23 | 24 | df = ts.get_k_data(code,start=start,index=False,ktype='D') 25 | 26 | df = df.set_index('date').sort_index(ascending=True) 27 | 28 | df = df['close'] 29 | 30 | #df.head() 31 | 32 | """ 33 | plt.figure(figsize = (15,10)) 34 | plt.plot(df, label='Company stock') 35 | plt.legend(loc='best') 36 | plt.show() 37 | """ 38 | 39 | 40 | ############# 41 | # 2. 处理数据 42 | ############# 43 | 44 | array = df.values.reshape(df.shape[0],1) 45 | from sklearn.preprocessing import MinMaxScaler 46 | scl = MinMaxScaler() 47 | array = scl.fit_transform(array) 48 | 49 | 50 | #split in Train and Test 51 | 52 | division = len(array) - num_periods*forward_days 53 | 54 | #look_back 40 55 | array_test = array[division-look_back:] 56 | array_train = array[:division] 57 | 58 | 59 | #Get the data and splits in input X and output Y, by spliting in `n` past days as input X 60 | #and `m` coming days as Y. 61 | def processData(data, look_back, forward_days,jump=1): 62 | X,Y = [],[] 63 | for i in range(0,len(data) -look_back -forward_days +1, jump): 64 | X.append(data[i:(i+look_back)]) 65 | Y.append(data[(i+look_back):(i+look_back+forward_days)]) 66 | return np.array(X),np.array(Y) 67 | 68 | 69 | X,y = processData(array_train,look_back,forward_days) 70 | y = np.array([list(a.ravel()) for a in y]) 71 | 72 | 73 | from sklearn.model_selection import train_test_split 74 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 75 | 76 | 77 | """ 78 | print(X_train.shape) 79 | print(X_validate.shape) 80 | print(X_test.shape) 81 | print(y_train.shape) 82 | print(y_validate.shape) 83 | print(y_test.shape) 84 | """ 85 | 86 | 87 | ###################### 88 | # 建立模型,训练数据 89 | ###################### 90 | 91 | NUM_NEURONS_FirstLayer = 50 92 | NUM_NEURONS_SecondLayer = 30 93 | EPOCHS = 50 94 | 95 | #Build the model 96 | model = Sequential() 97 | model.add(LSTM(NUM_NEURONS_FirstLayer,input_shape=(look_back,1), return_sequences=True)) 98 | model.add(LSTM(NUM_NEURONS_SecondLayer,input_shape=(NUM_NEURONS_FirstLayer,1))) 99 | model.add(Dense(forward_days)) 100 | model.compile(loss='mean_squared_error', optimizer='adam') 101 | 102 | history = model.fit(X_train,y_train,epochs=EPOCHS,validation_data=(X_validate,y_validate),shuffle=True,batch_size=2, verbose=2) 103 | 104 | 105 | ################## 106 | # 预测 107 | ################## 108 | 109 | 110 | division = len(array) - num_periods*forward_days 111 | 112 | leftover = division%forward_days+1 113 | 114 | array_test = array[division-look_back:] 115 | array_train = array[leftover:division] 116 | 117 | Xtrain,ytrain = processData(array_train,look_back,forward_days,forward_days) 118 | Xtest,ytest = processData(array_test,look_back,forward_days,forward_days) 119 | 120 | Xtrain = model.predict(Xtrain) 121 | Xtrain = Xtrain.ravel() 122 | 123 | Xtest = model.predict(Xtest) 124 | Xtest = Xtest.ravel() 125 | 126 | look_back +leftover+ len(Xtrain) 127 | y = np.concatenate((ytrain, ytest), axis=0) 128 | 129 | plt.figure(figsize = (15,10)) 130 | 131 | # Data in Train/Validation 132 | plt.plot([x for x in range(look_back+leftover, len(Xtrain)+look_back+leftover)], scl.inverse_transform(Xtrain.reshape(-1,1)), color='r', label='Train') 133 | # Data in Test 134 | plt.plot([x for x in range(look_back +leftover+ len(Xtrain), len(Xtrain)+len(Xtest)+look_back+leftover)], scl.inverse_transform(Xtest.reshape(-1,1)), color='y', label='Test') 135 | 136 | #Data used 137 | plt.plot([x for x in range(look_back+leftover, look_back+leftover+len(Xtrain)+len(Xtest))], scl.inverse_transform(y.reshape(-1,1)), color='b', label='Target') 138 | 139 | 140 | plt.legend(loc='best') 141 | plt.show() 142 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from keras.models import Sequential 4 | from keras.layers import LSTM,Dense 5 | import matplotlib.pyplot as plt 6 | 7 | from keras.models import load_model 8 | 9 | look_back = 40 10 | forward_days = 10 11 | num_periods = 2 12 | 13 | 14 | ################## 15 | # 1. 加载股票数据 16 | ################## 17 | 18 | import tushare as ts 19 | 20 | #青岛啤酒 21 | code = '600600' 22 | start = '2019-04-14' 23 | 24 | df = ts.get_k_data(code,start=start,index=False,ktype='D') 25 | 26 | df = df.set_index('date').sort_index(ascending=True) 27 | 28 | df = df['close'] 29 | 30 | #df.head() 31 | 32 | """ 33 | plt.figure(figsize = (15,10)) 34 | plt.plot(df, label='Company stock') 35 | plt.legend(loc='best') 36 | plt.show() 37 | """ 38 | 39 | 40 | ############# 41 | # 2. 处理数据 42 | ############# 43 | 44 | array = df.values.reshape(df.shape[0],1) 45 | from sklearn.preprocessing import MinMaxScaler 46 | scl = MinMaxScaler() 47 | array = scl.fit_transform(array) 48 | 49 | 50 | """ 51 | array = all 52 | lb = look_back 53 | 54 | array 55 | -----------|------|------------- 56 | lb num * forward 57 | array_test 58 | ...........|------|------------- 59 | lb num*forward 60 | 61 | array_train 62 | ------------------|............. 63 | [:division] num*forward 64 | """ 65 | 66 | #split in Train and Test 67 | 68 | division = len(array) - num_periods*forward_days 69 | 70 | #look_back 40 71 | array_test = array[division-look_back:] 72 | array_train = array[:division] 73 | 74 | 75 | #Get the data and splits in input X and output Y, by spliting in `n` past days as input X 76 | #and `m` coming days as Y. 77 | def processData(data, look_back, forward_days,jump=1): 78 | X,Y = [],[] 79 | for i in range(0,len(data) -look_back -forward_days +1, jump): 80 | X.append(data[i:(i+look_back)]) 81 | Y.append(data[(i+look_back):(i+look_back+forward_days)]) 82 | return np.array(X),np.array(Y) 83 | 84 | 85 | X,y = processData(array_train,look_back,forward_days) 86 | y = np.array([list(a.ravel()) for a in y]) 87 | 88 | 89 | from sklearn.model_selection import train_test_split 90 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 91 | 92 | 93 | """ 94 | print(X_train.shape) 95 | print(X_validate.shape) 96 | print(X_test.shape) 97 | print(y_train.shape) 98 | print(y_validate.shape) 99 | print(y_test.shape) 100 | """ 101 | 102 | 103 | ###################### 104 | # 建立模型,训练数据 105 | ###################### 106 | 107 | NUM_NEURONS_FirstLayer = 50 108 | NUM_NEURONS_SecondLayer = 30 109 | EPOCHS = 50 110 | 111 | #Build the model 112 | model = Sequential() 113 | model.add(LSTM(NUM_NEURONS_FirstLayer,input_shape=(look_back,1), return_sequences=True)) 114 | model.add(LSTM(NUM_NEURONS_SecondLayer,input_shape=(NUM_NEURONS_FirstLayer,1))) 115 | model.add(Dense(forward_days)) 116 | model.compile(loss='mean_squared_error', optimizer='adam') 117 | 118 | history = model.fit(X_train,y_train,epochs=EPOCHS,validation_data=(X_validate,y_validate),shuffle=True,batch_size=2, verbose=2) 119 | 120 | 121 | ################## 122 | # 预测 123 | ################## 124 | 125 | 126 | division = len(array) - num_periods*forward_days 127 | 128 | leftover = division%forward_days+1 129 | 130 | array_test = array[division-look_back:] 131 | array_train = array[leftover:division] 132 | 133 | Xtrain,ytrain = processData(array_train,look_back,forward_days,forward_days) 134 | Xtest,ytest = processData(array_test,look_back,forward_days,forward_days) 135 | 136 | Xtrain = model.predict(Xtrain) 137 | Xtrain = Xtrain.ravel() 138 | 139 | Xtest = model.predict(Xtest) 140 | Xtest = Xtest.ravel() 141 | 142 | y = np.concatenate((ytrain, ytest), axis=0) 143 | 144 | plt.figure(figsize = (15,10)) 145 | 146 | # Data in Train/Validation 147 | plt.plot([x for x in range(look_back+leftover, len(Xtrain)+look_back+leftover)], scl.inverse_transform(Xtrain.reshape(-1,1)), color='r', label='Train') 148 | # Data in Test 149 | plt.plot([x for x in range(look_back +leftover+ len(Xtrain), len(Xtrain)+len(Xtest)+look_back+leftover)], scl.inverse_transform(Xtest.reshape(-1,1)), color='y', label='Test') 150 | 151 | #Data used 152 | plt.plot([x for x in range(look_back+leftover, look_back+leftover+len(Xtrain)+len(Xtest))], scl.inverse_transform(y.reshape(-1,1)), color='b', label='Target') 153 | 154 | 155 | plt.legend(loc='best') 156 | plt.show() 157 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from keras.models import Sequential 4 | from keras.layers import LSTM,Dense 5 | import matplotlib.pyplot as plt 6 | 7 | from keras.models import load_model 8 | 9 | look_back = 40 10 | forward_days = 10 11 | num_periods = 2 12 | predict_days = 5 13 | 14 | import tushare as ts 15 | 16 | #青岛啤酒 17 | code = '600600' 18 | start = '2019-04-14' 19 | 20 | df = ts.get_k_data(code,start=start,index=False,ktype='D') 21 | 22 | df = df.set_index('date').sort_index(ascending=True) 23 | 24 | df = df['close'] 25 | 26 | array = df.values.reshape(df.shape[0],1) 27 | 28 | 29 | from sklearn.preprocessing import MinMaxScaler 30 | scl = MinMaxScaler() 31 | array = scl.fit_transform(array) 32 | 33 | division = len(array) - num_periods*forward_days 34 | 35 | array_test = df.shift(-predict_days).values.reshape(df.shape[0],1)[division-look_back:] 36 | 37 | array_test = scl.fit_transform(array_test) 38 | 39 | print(array_test) 40 | 41 | 42 | array_train = array[:division] 43 | 44 | 45 | def processData(data, look_back, forward_days,jump=1): 46 | X,Y = [],[] 47 | for i in range(0,len(data) -look_back -forward_days +1, jump): 48 | X.append(data[i:(i+look_back)]) 49 | Y.append(data[(i+look_back):(i+look_back+forward_days)]) 50 | return np.array(X),np.array(Y) 51 | 52 | X,y = processData(array_train,look_back,forward_days) 53 | 54 | y = np.array([list(a.ravel()) for a in y]) 55 | 56 | from sklearn.model_selection import train_test_split 57 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 58 | 59 | NUM_NEURONS_FirstLayer = 50 60 | NUM_NEURONS_SecondLayer = 30 61 | EPOCHS = 50 62 | 63 | #Build the model 64 | model = Sequential() 65 | model.add(LSTM(NUM_NEURONS_FirstLayer,input_shape=(look_back,1), return_sequences=True)) 66 | model.add(LSTM(NUM_NEURONS_SecondLayer,input_shape=(NUM_NEURONS_FirstLayer,1))) 67 | model.add(Dense(forward_days)) 68 | model.compile(loss='mean_squared_error', optimizer='adam') 69 | 70 | history = model.fit(X_train,y_train,epochs=EPOCHS,validation_data=(X_validate,y_validate),shuffle=True,batch_size=2, verbose=2) 71 | 72 | 73 | 74 | Xtrain,ytrain = processData(array_train,look_back,forward_days,forward_days) 75 | Xtest,ytest = processData(array_test,look_back,forward_days,forward_days) 76 | 77 | Xtrain = model.predict(Xtrain) 78 | Xtrain = Xtrain.ravel() 79 | 80 | 81 | Xtest = model.predict(Xtest) 82 | Xtest = Xtest.ravel() 83 | 84 | 85 | y = np.concatenate((ytrain, ytest), axis=0) 86 | 87 | plt.figure(figsize = (15,10)) 88 | 89 | # Data in Train/Validation 90 | plt.plot([x for x in range(look_back, len(Xtrain)+look_back)], scl.inverse_transform(Xtrain.reshape(-1,1)), color='r', label='Train') 91 | # Data in Test 92 | plt.plot([x for x in range(look_back + len(Xtrain), len(Xtrain)+len(Xtest)+look_back)], scl.inverse_transform(Xtest.reshape(-1,1)), color='y', label='Test') 93 | 94 | #Data used 95 | #plt.plot([x for x in range(look_back , look_back+len(Xtrain)+len(Xtest))], scl.inverse_transform(y.reshape(-1,1)), color='b', label='Target') 96 | plt.plot([x for x in range(look_back , look_back+len(Xtrain)+len(Xtest))], scl.inverse_transform(array[look_back:]), color='b', label='Target') 97 | 98 | plt.legend(loc='best') 99 | plt.show() 100 | 101 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo4.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import math, time, itertools, datetime 4 | import pandas as pd 5 | 6 | from operator import itemgetter 7 | from sklearn.metrics import mean_squared_error 8 | from math import sqrt 9 | 10 | from keras.models import Sequential 11 | from keras.layers.core import Dense, Dropout, Activation 12 | from keras.layers.recurrent import LSTM 13 | 14 | def get_stock_data(normalized=0): 15 | def get_ma_day(df, index, days): 16 | #return np.round(df[index].rolling(window = days, center = False).mean(), 2) 17 | # df need to be a DataFrame 18 | if not isinstance(df, pd.DataFrame): 19 | return None 20 | col = df[index] 21 | l = len(col) 22 | return [ col[i-days+1:i+1].mean() for i in range(l)] # first days-1 will be None because of the indexing handling 23 | 24 | def get_price_change(df): 25 | close_price = df['close'] 26 | return np.log(close_price) - np.log(close_price.shift(1)) 27 | 28 | import baostock as bs 29 | bs.login() 30 | 31 | #青岛啤酒 32 | code = 'sh.600600' 33 | start = '2019-04-14' 34 | 35 | rs = bs.query_history_k_data_plus(code, 'date,open,high,low,close,volume,code,turn', start_date=start, 36 | frequency='d' ) 37 | df = rs.get_data() 38 | if len(df) < 2: 39 | return 40 | 41 | df['close'] = df['close'].astype(np.float32) 42 | df['volume'] = df['volume'].astype(np.float32) 43 | print(df.columns) 44 | df = df.set_index('date').sort_index(ascending=True) 45 | 46 | #df = df['close'] 47 | 48 | 49 | # Get 50 | #stocks = pd.read_csv(url, header=0, names=col_names) 51 | # reverse cuz it was backward 52 | stocks = df 53 | stocks = stocks[::-1] 54 | 55 | stocks['MA5'] = get_ma_day(stocks,'close',5) 56 | stocks['MA10']= get_ma_day(stocks,'close',10) 57 | stocks['MA20']= get_ma_day(stocks,'close',20) 58 | 59 | stocks['VMA5'] = get_ma_day(stocks,'volume',5) 60 | stocks['VMA10'] = get_ma_day(stocks,'volume',10) 61 | stocks['VMA20'] = get_ma_day(stocks,'volume',20) 62 | 63 | stocks['price_change'] = get_price_change(stocks) 64 | #print(stocks.head(10)) 65 | 66 | # Drop 67 | #print(stocks) 68 | stocks = stocks.drop(columns=['code'],axis=1) 69 | 70 | # Normalize 71 | df = pd.DataFrame(stocks) 72 | if normalized: 73 | df = df/df.mean() -1 74 | 75 | # drop first 19 NaN rows caused by MA/VMA 76 | return df[20:] 77 | 78 | 79 | df = get_stock_data(normalized=1) 80 | 81 | 82 | def load_data(stock, seq_len, ratio=0.9): 83 | amount_of_features = len(stock.columns) 84 | data = stock.values 85 | sequence_length = seq_len + 1 86 | result = [] 87 | for index in range(len(data) - sequence_length): 88 | result.append(data[index: index + sequence_length]) 89 | 90 | result = np.array(result) # (len(), seq, cols) contains newest date 91 | 92 | row = round(0.9 * result.shape[0]) 93 | train = result[:int(row), :] 94 | #np.random.shuffle(train) 95 | 96 | x_train = train[:, :-1] # (len(), 10, 4) drop last row(), because last row contain the label 97 | y_train = train[:, -1][:,2] # with last row, and only keep "close" column @ [Open, High,"Close", Volume,...] 98 | x_test = result[int(row):, :-1] 99 | y_test = result[int(row):, -1][:,2] 100 | 101 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features)) 102 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features)) 103 | 104 | x_train = x_train.astype('float64') 105 | y_train = y_train.astype('float64') 106 | x_test = x_test.astype('float64') 107 | y_test = y_test.astype('float64') 108 | return [x_train, y_train, x_test, y_test] 109 | 110 | sequence_len = 25 111 | X_train, y_train, X_test, y_test = load_data(df, sequence_len) 112 | 113 | print(X_train.shape) 114 | 115 | def build_model(layers): 116 | d = 0.2 117 | model = Sequential() 118 | 119 | # now model.output_shape == (None, 128) 120 | model.add(LSTM(128, input_shape=(layers[1], layers[0]), return_sequences=True)) 121 | model.add(Dropout(d)) 122 | 123 | # for subsequent layers, no need to specify the input size: 124 | model.add(LSTM(64, return_sequences=False)) 125 | model.add(Dropout(d)) 126 | 127 | # fully connected layer 128 | model.add(Dense(16,kernel_initializer='uniform',activation='relu')) 129 | model.add(Dense(1,kernel_initializer='uniform',activation='linear')) 130 | model.compile(loss='mse',optimizer='adam',metrics=['accuracy']) 131 | return model 132 | 133 | 134 | model = build_model([X_train.shape[-1],sequence_len]) 135 | 136 | history = model.fit( 137 | X_train, 138 | y_train, 139 | batch_size=512, 140 | epochs=500, 141 | validation_split=0.1, 142 | verbose=0) 143 | 144 | """ 145 | trainScore = model.evaluate(X_train, y_train, verbose=0) 146 | print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0]))) 147 | 148 | testScore = model.evaluate(X_test, y_test, verbose=0) 149 | print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0]))) 150 | """ 151 | 152 | diff=[] 153 | ratio=[] 154 | p = model.predict(X_test) 155 | for u in range(len(y_test)): 156 | pr = p[u][0] 157 | ratio.append((y_test[u]/pr)-1) 158 | diff.append(abs(y_test[u]- pr)) 159 | 160 | 161 | plt.plot(p,color='red', label='prediction') 162 | #plt.plot(ratio, color='black', label='ratio') 163 | #plt.plot(diff, color='purple', label='diff') 164 | plt.plot(y_test,color='blue', label='y_test') 165 | plt.legend(loc='best') 166 | plt.show() 167 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo5.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from keras.models import Sequential 4 | from keras.layers import LSTM,Dense 5 | import matplotlib.pyplot as plt 6 | 7 | from keras.models import load_model 8 | 9 | nsteps = 30 10 | 11 | 12 | ################## 13 | # 1. 加载股票数据 14 | ################## 15 | 16 | import tushare as ts 17 | 18 | #青岛啤酒 19 | code = '600600' 20 | start = '2019-04-14' 21 | 22 | df = ts.get_k_data(code,start=start,index=False,ktype='D') 23 | 24 | df = df.set_index('date').sort_index(ascending=True) 25 | 26 | df = df['close'] 27 | 28 | ############# 29 | # 2. 处理数据 30 | ############# 31 | 32 | array = df.values.reshape(df.shape[0],1) 33 | from sklearn.preprocessing import MinMaxScaler 34 | scl = MinMaxScaler() 35 | array = scl.fit_transform(array) 36 | 37 | #split in Train and Test 38 | division = len(array) - 2 * nsteps 39 | array_test = array[division:] 40 | array_train = array[:division] 41 | 42 | #Get the data and splits in input X and output Y, by spliting in `n` past days as input X 43 | #and `m` coming days as Y. 44 | def processData(data, nsteps,jump=1): 45 | X,Y = [],[] 46 | for i in range(0,len(data) - nsteps , jump): 47 | X.append(data[i:(i+nsteps)]) 48 | Y.append(data[(i+1):(i+1+nsteps)]) 49 | return np.array(X),np.array(Y) 50 | 51 | X,y = processData(array_train,nsteps) 52 | y = np.array([list(a.ravel()) for a in y]) 53 | 54 | from sklearn.model_selection import train_test_split 55 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 56 | 57 | 58 | 59 | ###################### 60 | # 建立模型,训练数据 61 | ###################### 62 | 63 | NUM_NEURONS_FirstLayer = 50 64 | NUM_NEURONS_SecondLayer = 30 65 | EPOCHS = 20 66 | 67 | #input_shape:(batch_size, timesteps, input_dim) 68 | #default None = batch_size = len(X) 69 | # input_shape(timesteps,dim) = (None,timesteps,input_dim) = (len(X),timesteps,input_dim) 70 | 71 | #Build the model 72 | model = Sequential() 73 | model.add(LSTM(NUM_NEURONS_FirstLayer,input_shape=(nsteps,1), return_sequences=True)) 74 | model.add(LSTM(NUM_NEURONS_SecondLayer,input_shape=(NUM_NEURONS_FirstLayer,1))) 75 | model.add(Dense(1)) 76 | model.compile(loss='mean_squared_error', optimizer='adam') 77 | 78 | history = model.fit(X_train,y_train,epochs=EPOCHS,validation_data=(X_validate,y_validate),shuffle=True,batch_size=2, verbose=2) 79 | 80 | 81 | ################## 82 | # 预测 83 | ################## 84 | 85 | 86 | Xtrain,ytrain = processData(array_train,nsteps) 87 | Xtest,ytest = processData(array_test,nsteps) 88 | 89 | Xtrain = model.predict(Xtrain) 90 | Xtrain = Xtrain.ravel() 91 | 92 | Xpred = model.predict(Xtest) 93 | Xpred = Xpred.ravel() 94 | print(scl.inverse_transform(Xtest[-1].reshape(-1, 1))) 95 | print(scl.inverse_transform(Xpred.reshape(-1, 1))) 96 | y = np.concatenate((ytrain, ytest), axis=0) 97 | 98 | plt.figure(figsize = (15,10)) 99 | 100 | """ 101 | # Data in Train/Validation 102 | plt.plot([x for x in range(nsteps+leftover, len(Xtrain)+nsteps+leftover)], scl.inverse_transform(Xtrain.reshape(-1,1)), color='r', label='Train') 103 | # Data in Test 104 | plt.plot([x for x in range(nsteps +leftover+ len(Xtrain), len(Xtrain)+len(Xtest)+nsteps+leftover)], scl.inverse_transform(Xtest.reshape(-1,1)), color='y', label='Test') 105 | 106 | #Data used 107 | plt.plot([x for x in range(nsteps+leftover, nsteps+leftover+len(Xtrain)+len(Xtest))], scl.inverse_transform(y.reshape(-1,1)), color='b', label='Target') 108 | 109 | """ 110 | plt.legend(loc='best') 111 | #plt.show() 112 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo6_svm.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import talib 3 | from sklearn import svm 4 | import sys 5 | # 获取上证指数数据 6 | #青岛啤酒 7 | 8 | code='600600' 9 | if len(sys.argv) > 1: 10 | code =sys.argv[1] 11 | #df=ts.get_hist_data(code,start='2018-07-09',end='2020-10-10') 12 | df=ts.get_hist_data(code,start='2018-07-09') 13 | close_pri=df['close'] 14 | close_pri = close_pri.sort_index(ascending=True) 15 | 16 | print(close_pri) 17 | # 定义训练数据 18 | x_train = [] 19 | y_train = [] 20 | 21 | for index in range(2,len(close_pri)): 22 | # 取数据[-2]表示使用的特征是由今天之前的数据计算得到的 23 | sma_data = talib.SMA(close_pri[:index],timeperiod=7)[-2] 24 | wma_data = talib.WMA(close_pri[:index],timeperiod=7)[-2] 25 | mom_data = talib.MOM(close_pri[:index],timeperiod=7)[-2] 26 | 27 | features = [] 28 | features.append(sma_data) 29 | features.append(wma_data) 30 | features.append(mom_data) 31 | x_train.append(features) 32 | 33 | # 对今天的交易进行打标签,涨则标记1,跌则标记-1 34 | if close_pri[index-1] < close_pri[index]: 35 | label = 1 36 | else: 37 | label = -1 38 | y_train.append(label) 39 | 40 | 41 | # 去除前7天的数据,因为部分sma/wma/mom数值为nan 42 | X_Train = x_train[7:] 43 | Y_Train = y_train[7:] 44 | 45 | # svm进行分类 46 | clf = svm.SVC() 47 | clf.fit(X_Train,Y_Train) 48 | 49 | # 数据仅仅使用了2到len(close_pri),所以最后一个数据没有参与分类,拿来试试 50 | sma_test = talib.SMA(close_pri,timeperiod=7)[-2] 51 | wma_test = talib.WMA(close_pri,timeperiod=7)[-2] 52 | mom_test = talib.MOM(close_pri,timeperiod=7)[-2] 53 | x_test = [[sma_test,wma_test,mom_test]] 54 | y_test = -1 55 | if close_pri[-2] < close_pri[-1] : 56 | y_test = 1 57 | 58 | prediction = clf.predict(x_test) 59 | print(prediction) 60 | print(prediction == y_test) 61 | 62 | # 数据仅仅使用了2到len(close_pri) 63 | sma_test = talib.SMA(close_pri,timeperiod=7)[-1] 64 | wma_test = talib.WMA(close_pri,timeperiod=7)[-1] 65 | mom_test = talib.MOM(close_pri,timeperiod=7)[-1] 66 | x_test = [[sma_test,wma_test,mom_test]] 67 | prediction = clf.predict(x_test) 68 | print(prediction) 69 | 70 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo7_lstm.py: -------------------------------------------------------------------------------- 1 | #https://github.com/soms98/Stock-Price-Prediction-Time-Series-LSTM-Model-Keras-Tensorflow/blob/master/HDFC.ipynb 2 | import pandas as pd 3 | import numpy as np 4 | 5 | 6 | ##################### 7 | # 1. 获取股票数据 8 | ##################### 9 | import tushare as ts 10 | import sys 11 | # 获取上证指数数据 12 | #青岛啤酒 13 | 14 | code='600600' 15 | if len(sys.argv) > 1: 16 | code =sys.argv[1] 17 | #df=ts.get_hist_data(code,start='2018-07-09',end='2020-10-10') 18 | df=ts.get_hist_data(code,start='2018-07-09') 19 | df1 = df['close'] 20 | df1 = df1.sort_index(ascending=True) 21 | 22 | print(df1) 23 | 24 | ####################### 25 | # 2. 处理数据 26 | ####################### 27 | 28 | from sklearn.preprocessing import MinMaxScaler 29 | scaler=MinMaxScaler(feature_range=(0,1)) 30 | df1=scaler.fit_transform(np.array(df1).reshape(-1,1)) 31 | 32 | 33 | ##splitting dataset into train and test split 34 | training_size=int(len(df1)*0.75) 35 | test_size=len(df1)-training_size 36 | train_data,test_data=df1[0:training_size,:],df1[training_size:len(df1),:1] 37 | 38 | # convert an array of values into a dataset matrix 39 | def create_dataset(dataset, time_step=1): 40 | dataX, dataY = [], [] 41 | for i in range(len(dataset)-time_step-1): 42 | a = dataset[i:(i+time_step), 0] ###i=0, 0,1,2,3-----99 100 43 | dataX.append(a) 44 | dataY.append(dataset[i + time_step, 0]) 45 | return np.array(dataX), np.array(dataY) 46 | 47 | # reshape into X=t,t+1,t+2,t+3 and Y=t+4 48 | time_step = 100 49 | X_train, y_train = create_dataset(train_data, time_step) 50 | X_test, ytest = create_dataset(test_data, time_step) 51 | 52 | 53 | print(X_train.shape), print(y_train.shape) 54 | 55 | print(X_test.shape), print(ytest.shape) 56 | 57 | # reshape input to be [samples, time steps, features] which is required for LSTM 58 | X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) 59 | X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1) 60 | 61 | 62 | ### Create the Stacked LSTM model 63 | # 3. 建立LSTM 模型 64 | ########################### 65 | from tensorflow.keras.models import Sequential 66 | from tensorflow.keras.layers import Dense 67 | from tensorflow.keras.layers import LSTM 68 | 69 | 70 | model=Sequential() 71 | model.add(LSTM(50,return_sequences=True,input_shape=(100,1))) 72 | model.add(LSTM(50,return_sequences=True)) 73 | model.add(LSTM(50)) 74 | model.add(Dense(1)) 75 | model.compile(loss='mean_squared_error',optimizer='adam') 76 | 77 | #model.summary() 78 | 79 | ############# 80 | # 4. 训练 81 | ############# 82 | model.fit(X_train,y_train,validation_data=(X_test,ytest),epochs=50,batch_size=64,verbose=1) 83 | 84 | 85 | ### Lets Do the prediction and check performance metrics 86 | # 87 | # 5. 预测 88 | ############## 89 | 90 | train_predict=model.predict(X_train) 91 | test_predict=model.predict(X_test) 92 | 93 | ##Transformback to original form 94 | train_predict=scaler.inverse_transform(train_predict) 95 | test_predict=scaler.inverse_transform(test_predict) 96 | 97 | print(train_predict,test_predict) 98 | 99 | ########### 100 | # 6. 显示 101 | ########### 102 | 103 | import matplotlib.pyplot as plt 104 | 105 | ### Plotting 106 | # shift train predictions for plotting 107 | look_back=100 108 | trainPredictPlot = np.empty_like(df1) 109 | trainPredictPlot[:, :] = np.nan 110 | trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict 111 | # shift test predictions for plotting 112 | testPredictPlot = np.empty_like(df1) 113 | testPredictPlot[:, :] = np.nan 114 | testPredictPlot[len(train_predict)+(look_back*2)+1:len(df1)-1, :] = test_predict 115 | # plot baseline and predictions 116 | plt.plot(scaler.inverse_transform(df1)) 117 | plt.plot(trainPredictPlot) 118 | plt.plot(testPredictPlot) 119 | plt.show() 120 | 121 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo8_lstm.py: -------------------------------------------------------------------------------- 1 | #https://github.com/ICEJM1020/LSTM_Stock/blob/master/Code/LSTM_stock.ipynb 2 | import tushare as ts 3 | import pandas as pd 4 | from matplotlib import pyplot as plt 5 | from datetime import datetime 6 | import sys 7 | 8 | today = datetime.now() 9 | end = str(today.year) + str(today.month) + str(today.day) 10 | # 茅台 11 | code = '600519' 12 | if len(sys.argv) > 1: 13 | code = sys.argv[1] 14 | 15 | class StockData(object): 16 | def __init__(self): 17 | self.pro = ts.pro_api('191f98ec62b6953e19200384e71e983c113f8bd1ac12d5e787323844') 18 | 19 | def get_data(self,code, start='19900101', end='20190901'): 20 | stock_code = self.tran_code(code) 21 | return self.pro.query('daily', ts_code=stock_code, start_date=start, end_date=end) 22 | 23 | def tran_code(self,code): 24 | if code[0:1] == '6': 25 | return code + '.SH' 26 | else: 27 | return code + '.SZ' 28 | 29 | 30 | stock = StockData() 31 | data = stock.get_data(code,start="20100101",end=end) 32 | 33 | # 从 34 | data_test = stock.get_data(code, start = '20190901',end = '20191201') 35 | 36 | # 按照时间进行排序 37 | data.sort_values("trade_date", inplace=True) 38 | data = data.reset_index() 39 | print(data.shape) 40 | data.tail() 41 | 42 | from sklearn import preprocessing as process 43 | # 在数据分析之前先对所有的数据进行分析 44 | # 后两项特征的数量级远大于其他项 45 | 46 | X = data.loc[:,'open':'amount'] 47 | # X = data.loc[:,'close':'vol'] 48 | # X = X.drop(columns = ['pct_chg','pre_close']) 49 | X = X.values 50 | # y = data["close"].values 51 | print(X.shape) 52 | 53 | 54 | # 训练集数据处理 55 | # _max = data['close'].max() 56 | # _min = data['close'].min() 57 | # scaler = process.MinMaxScaler(feature_range=(_min, _max)) 58 | # scaler = process.MinMaxScaler(feature_range=(-1, 1)) 59 | scaler = process.StandardScaler() 60 | scaler.fit(X) 61 | X_scalerd = scaler.transform(X) 62 | y = pd.DataFrame(X_scalerd)[3].values 63 | 64 | temp_data = pd.DataFrame(X_scalerd) 65 | temp_data = temp_data.iloc[-30:] 66 | 67 | 68 | print(X_scalerd.shape, y.shape) 69 | 70 | from keras.models import Sequential 71 | from keras.layers.core import Dense, Dropout, Activation 72 | from keras.layers.recurrent import LSTM 73 | from keras.models import load_model 74 | from keras.layers import RepeatVector 75 | import keras 76 | 77 | # 用t天的数据预测t+1天的,所以把y前移 78 | # X有一个会多出来,所以删掉X的最后一个和y的第一个 79 | import numpy as np 80 | 81 | # X_train = X_pca 82 | X_train = pd.DataFrame(X_scalerd)[[3,5,7]].values 83 | 84 | X_train = np.delete(X_train, -1, axis=0) 85 | y_train = np.delete(y, [1]) 86 | 87 | 88 | X_train = X_train.reshape(X_train.shape[0],1, X_train.shape[1]) 89 | y_train = y_train.reshape(y_train.shape[0],1, 1) 90 | print(X_train.shape, y_train.shape) 91 | 92 | model = Sequential() 93 | 94 | model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) 95 | 96 | model.add(Dense(16,kernel_initializer="uniform",activation='relu')) 97 | model.add(Dense(1,kernel_initializer="uniform",activation='linear')) 98 | 99 | adam = keras.optimizers.Adam(decay=0.2) 100 | model.compile(loss='mae', optimizer='adam', metrics=['accuracy']) 101 | model.summary() 102 | 103 | # 训练模型 104 | print(X_train.shape, y_train.shape) 105 | history = model.fit(X_train, y_train, epochs=100, verbose=2, shuffle=False) 106 | 107 | model.save("1-1.h5") 108 | 109 | 110 | model = load_model('1-1.h5') 111 | 112 | predictes_stock_price = model.predict(X_train) 113 | 114 | predictes_stock_price = predictes_stock_price.reshape(predictes_stock_price.shape[0]) 115 | y_train = y_train.reshape(y_train.shape[0]) 116 | 117 | plt.plot(predictes_stock_price[-30:], label='pre', color='red') 118 | plt.plot(y_train[-30:], label='ori', color='blue') 119 | plt.legend() 120 | 121 | 122 | # 测试集数据处理 123 | X_test = data_test.loc[:,'close':'vol'] 124 | X_test = X_test.drop(columns = ['change','pct_chg']) 125 | X_test = X_test.values 126 | 127 | scaler = process.StandardScaler() 128 | scaler.fit(X_test) 129 | X_test_scalerd = scaler.transform(X_test) 130 | y_test = pd.DataFrame(X_test_scalerd)[0].values 131 | 132 | X_test_scalerd = X_test_scalerd.reshape(X_test_scalerd.shape[0],1, X_test_scalerd.shape[1]) 133 | 134 | pre_test = model.predict(X_test_scalerd) 135 | 136 | pre_test = pre_test.reshape(pre_test.shape[0]) 137 | 138 | plt.plot(pre_test, label='pre', color='red') 139 | plt.plot(y_test, label='ori', color='blue') 140 | plt.legend() 141 | plt.show() 142 | -------------------------------------------------------------------------------- /stock-prediction/demo/demo9_lstm.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import pandas as pd 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | from datetime import datetime 6 | import sys 7 | 8 | #################### 9 | #1. 获取股票数据 10 | #################### 11 | today = datetime.now() 12 | end = str(today.year) + str(today.month) + str(today.day) 13 | 14 | # 茅台,青岛啤酒600600 15 | code = '600519' 16 | if len(sys.argv) > 1: 17 | code = sys.argv[1] 18 | 19 | df=ts.get_hist_data(code,start='2018-07-09') 20 | df1 = df['close'] 21 | df1 = df1.sort_index(ascending=True) 22 | 23 | print(df1) 24 | #for i in range(0,len(df1)): 25 | # df1[i] = i 26 | 27 | ####################### 28 | # 2. 处理数据 29 | ####################### 30 | time_step = 30 31 | epochs = 200 32 | pred_days = 5 33 | 34 | from sklearn.preprocessing import MinMaxScaler 35 | scaler=MinMaxScaler(feature_range=(0,1)) 36 | df1=scaler.fit_transform(np.array(df1).reshape(-1,1)) 37 | 38 | 39 | ##splitting dataset into train and test split 40 | training_size=int(len(df1)*0.75) 41 | test_size=len(df1)-training_size 42 | train_data,test_data=df1[0:training_size,:],df1[training_size:,:1] 43 | 44 | # convert an array of values into a dataset matrix 45 | def create_dataset(dataset, time_step=1): 46 | dataX, dataY = [], [] 47 | for i in range(len(dataset)-time_step-pred_days): 48 | a = dataset[i:(i+time_step), 0] ###i=0, 0,1,2,3-----99 100 49 | dataX.append(a) 50 | dataY.append(dataset[i + time_step:i+time_step+pred_days, 0]) 51 | return np.array(dataX), np.array(dataY) 52 | 53 | # reshape into X=t,t+1,t+2,t+3 and Y=t+4 54 | X, y = create_dataset(df1, time_step) 55 | X_test, ytest = create_dataset(test_data, time_step) 56 | 57 | # 乱序 58 | # 59 | from sklearn.model_selection import train_test_split 60 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 61 | 62 | 63 | # reshape input to be [samples, time steps, features] which is required for LSTM 64 | X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1) 65 | X_train = X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) 66 | 67 | ### Create the Stacked LSTM model 68 | # 3. 建立LSTM 模型 69 | ########################### 70 | from tensorflow.keras.models import Sequential 71 | from tensorflow.keras.layers import Dense 72 | from tensorflow.keras.layers import LSTM 73 | 74 | 75 | model=Sequential() 76 | model.add(LSTM(50,return_sequences=True,input_shape=(time_step,1))) 77 | model.add(LSTM(50,return_sequences=True)) 78 | model.add(LSTM(50)) 79 | model.add(Dense(pred_days)) 80 | model.compile(loss='mean_squared_error',optimizer='adam') 81 | 82 | #model.summary() 83 | 84 | ############# 85 | # 4. 训练 86 | ############# 87 | model.fit(X_train,y_train,validation_data=(X_test,ytest),epochs=epochs,batch_size=64,verbose=1) 88 | 89 | 90 | ### Lets Do the prediction and check performance metrics 91 | # 92 | # 5. 预测 93 | ############## 94 | 95 | test_predict=model.predict(X_test) 96 | 97 | ##Transformback to original form 98 | test_predict=scaler.inverse_transform(test_predict) 99 | print("原始数据:") 100 | print(scaler.inverse_transform( test_data)) 101 | print("预测数据") 102 | print(test_predict) 103 | 104 | -------------------------------------------------------------------------------- /stock-prediction/parameters.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from tensorflow.keras.layers import LSTM 4 | 5 | 6 | # Window size or the sequence length 7 | N_STEPS = 70 8 | # Lookup step, 1 is the next day 9 | LOOKUP_STEP = 1 10 | 11 | # test ratio size, 0.2 is 20% 12 | TEST_SIZE = 0.2 13 | # features to use 14 | #FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"] 15 | FEATURE_COLUMNS = ['open','high','low','close','volume'] 16 | # date now 17 | date_now = time.strftime("%Y-%m-%d") 18 | 19 | ### model parameters 20 | 21 | N_LAYERS = 2 22 | # LSTM cell 23 | CELL = LSTM 24 | # 256 LSTM neurons 25 | UNITS = 256 26 | # 40% dropout 27 | DROPOUT = 0.4 28 | # whether to use bidirectional RNNs 29 | BIDIRECTIONAL = False 30 | 31 | ### training parameters 32 | 33 | # mean absolute error loss 34 | # LOSS = "mae" 35 | # huber loss 36 | LOSS = "huber_loss" 37 | OPTIMIZER = "adam" 38 | BATCH_SIZE = 64 39 | EPOCHS = 400 40 | 41 | # Apple stock market 42 | ticker = "TSingtao" 43 | #青岛啤酒 44 | ticker_code = '600600' 45 | startdate = '2020-01-01' 46 | ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv") 47 | # model name to save, making it as unique as possible based on parameters 48 | model_name = f"{date_now}_{ticker}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}" 49 | if BIDIRECTIONAL: 50 | model_name += "-b" 51 | -------------------------------------------------------------------------------- /stock-prediction/test.py: -------------------------------------------------------------------------------- 1 | from stock_prediction import create_model, load_data, np 2 | from parameters import * 3 | import matplotlib.pyplot as plt 4 | from sklearn.metrics import accuracy_score 5 | import sys 6 | 7 | def plot_graph(model, data): 8 | y_test = data["y_test"] 9 | X_test = data["X_test"] 10 | y_pred = model.predict(X_test) 11 | y_test = np.squeeze(data["column_scaler"]["close"].inverse_transform(np.expand_dims(y_test, axis=0))) 12 | y_pred = np.squeeze(data["column_scaler"]["close"].inverse_transform(y_pred)) 13 | plt.plot(y_test[-200:], c='b') 14 | plt.plot(y_pred[-200:], c='r') 15 | plt.xlabel("Days") 16 | plt.ylabel("Price") 17 | plt.legend(["Actual Price", "Predicted Price"]) 18 | plt.show() 19 | 20 | 21 | def get_accuracy(model, data): 22 | y_test = data["y_test"] 23 | X_test = data["X_test"] 24 | y_pred = model.predict(X_test) 25 | y_test = np.squeeze(data["column_scaler"]["close"].inverse_transform(np.expand_dims(y_test, axis=0))) 26 | y_pred = np.squeeze(data["column_scaler"]["close"].inverse_transform(y_pred)) 27 | y_pred = list(map(lambda current, future: int(float(future) > float(current)), y_test[:-LOOKUP_STEP], y_pred[LOOKUP_STEP:])) 28 | y_test = list(map(lambda current, future: int(float(future) > float(current)), y_test[:-LOOKUP_STEP], y_test[LOOKUP_STEP:])) 29 | return accuracy_score(y_test, y_pred) 30 | 31 | 32 | def predict(model, data, classification=False): 33 | # retrieve the last sequence from data 34 | last_sequence = data["last_sequence"][:N_STEPS] 35 | # retrieve the column scalers 36 | column_scaler = data["column_scaler"] 37 | # reshape the last sequence 38 | last_sequence = last_sequence.reshape((last_sequence.shape[1], last_sequence.shape[0])) 39 | # expand dimension 40 | last_sequence = np.expand_dims(last_sequence, axis=0) 41 | # get the prediction (scaled from 0 to 1) 42 | prediction = model.predict(last_sequence) 43 | # get the price (by inverting the scaling) 44 | predicted_price = column_scaler["close"].inverse_transform(prediction)[0][0] 45 | return predicted_price 46 | 47 | 48 | # load the data 49 | data = load_data(ticker_code, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, 50 | feature_columns=FEATURE_COLUMNS, shuffle=False) 51 | 52 | # construct the model 53 | model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS, 54 | dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL) 55 | 56 | 57 | model_path = os.path.join("results", model_name) + ".h5" 58 | if len(sys.argv) > 1: 59 | print (sys.argv[1]) 60 | model_path = sys.argv[1] 61 | model.load_weights(model_path) 62 | 63 | # evaluate the model 64 | mse, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0) 65 | # calculate the mean absolute error (inverse scaling) 66 | mean_absolute_error = data["column_scaler"]["close"].inverse_transform([[mae]])[0][0] 67 | print("Mean Absolute Error:", mean_absolute_error) 68 | # predict the future price 69 | future_price = predict(model, data) 70 | print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$") 71 | print("Accuracy Score:", get_accuracy(model, data)) 72 | plot_graph(model, data) 73 | -------------------------------------------------------------------------------- /stock-prediction/train.py: -------------------------------------------------------------------------------- 1 | from stock_prediction import create_model, load_data 2 | from tensorflow.keras.layers import LSTM 3 | from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard 4 | import os 5 | import pandas as pd 6 | from parameters import * 7 | 8 | 9 | # create these folders if they does not exist 10 | if not os.path.isdir("results"): 11 | os.mkdir("results") 12 | 13 | if not os.path.isdir("logs"): 14 | os.mkdir("logs") 15 | 16 | if not os.path.isdir("data"): 17 | os.mkdir("data") 18 | 19 | # load the data 20 | data = load_data(ticker_code, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS) 21 | 22 | # save the dataframe 23 | data["df"].to_csv(ticker_data_filename) 24 | 25 | # construct the model 26 | model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS, 27 | dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL) 28 | 29 | # some tensorflow callbacks 30 | checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1) 31 | tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name)) 32 | 33 | history = model.fit(data["X_train"], data["y_train"], 34 | batch_size=BATCH_SIZE, 35 | epochs=EPOCHS, 36 | validation_data=(data["X_test"], data["y_test"]), 37 | callbacks=[checkpointer, tensorboard], 38 | verbose=1) 39 | 40 | model.save(os.path.join("results", model_name) + ".h5") 41 | -------------------------------------------------------------------------------- /stock_prediction_lstmV1.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import pandas as pd 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | from datetime import datetime 6 | import sys 7 | 8 | #################### 9 | #1. 获取股票数据 10 | #################### 11 | #today = datetime.now() 12 | #end = str(today.year) + str(today.month) + str(today.day) 13 | 14 | # 茅台600519,青岛啤酒600600 15 | code = '600519' 16 | if len(sys.argv) > 1: 17 | code = sys.argv[1] 18 | 19 | #建议使用pro版本 20 | #通过tushare接口获取股票数据 从2018年1月1日到今天 21 | df=ts.get_hist_data(code,start='2018-01-01') 22 | df1 = df['close'] 23 | df1 = df1.sort_index(ascending=True) 24 | #for i in range(0,len(df1)): #test data 25 | # df1[i] = i 26 | print(df1) 27 | 28 | ####################### 29 | # 2. 处理数据 30 | ####################### 31 | time_step = 30 #每个数据段为30天,这样数据打乱也没有关系 32 | epochs = 200 #200次重复训练 33 | pred_days = 5 #每次预测未来的天数,例如预测未来5天的。 34 | 35 | #已知 前time_step(30)天的数据,预测pred_days(5)天的数据 36 | 37 | #归化数据到0-1之间 38 | from sklearn.preprocessing import MinMaxScaler 39 | scaler=MinMaxScaler(feature_range=(0,1)) 40 | df1=scaler.fit_transform(np.array(df1).reshape(-1,1)) 41 | 42 | #取 25%的数据做最后的预测test_data 43 | ##splitting dataset into train and test split 44 | training_size=int(len(df1)*0.75) 45 | test_size=len(df1)-training_size 46 | train_data,test_data=df1[0:training_size,:],df1[training_size:,:1] 47 | 48 | # convert an array of values into a dataset matrix 49 | #dataX 是训练的数据,dataY是目标预测的数据 50 | #基本是time_step(30)天,预测pred_days(5)天 51 | def create_dataset(dataset, time_step=1): 52 | dataX, dataY = [], [] 53 | for i in range(len(dataset)-time_step-pred_days): 54 | a = dataset[i:(i+time_step),0] ###i=0, 0,1,2,3-----99 100 55 | dataX.append(a) 56 | dataY.append(dataset[i + time_step:i+time_step+pred_days,0]) 57 | return np.array(dataX), np.array(dataY) 58 | 59 | def create_dataset_pred(dataset, time_step=1): 60 | dataX = [] 61 | for i in range(len(dataset)-time_step): 62 | a = dataset[i:(i+time_step), 0] ###i=0, 0,1,2,3-----99 100 63 | dataX.append(a) 64 | return np.array(dataX) 65 | 66 | 67 | # reshape into X=t,t+1,t+2,t+3 and Y=t+4 68 | X, y = create_dataset(df1, time_step) 69 | 70 | X_test = create_dataset_pred(test_data, time_step) 71 | 72 | # 数据乱序 73 | # 74 | from sklearn.model_selection import train_test_split 75 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 76 | 77 | 78 | # reshape input to be [samples, time steps, features] which is required for LSTM 79 | X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1) 80 | X_train = X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) 81 | X_validate = X_validate.reshape(X_validate.shape[0],X_validate.shape[1] , 1) 82 | ### Create the Stacked LSTM model 83 | # 3. 建立LSTM 模型 84 | ########################### 85 | from tensorflow.keras.models import Sequential 86 | from tensorflow.keras.layers import Dense 87 | from tensorflow.keras.layers import LSTM 88 | 89 | #采用了50个cell单元做hidden,3层lstm计算, 90 | #最后输出pred_days(5)天的结果 91 | model=Sequential() 92 | model.add(LSTM(50,return_sequences=True,input_shape=(time_step,1))) 93 | model.add(LSTM(50,return_sequences=True)) 94 | model.add(LSTM(50)) 95 | model.add(Dense(pred_days)) 96 | model.compile(loss='mean_squared_error',optimizer='adam') 97 | 98 | #model.summary() 99 | 100 | ############# 101 | # 4. 训练 102 | ############# 103 | model.fit(X_train,y_train,validation_data=(X_validate,y_validate),epochs=epochs,batch_size=64,verbose=1) 104 | 105 | 106 | ### Lets Do the prediction and check performance metrics 107 | # 108 | # 5. 预测 109 | ############## 110 | 111 | test_predict=model.predict(X_test) 112 | 113 | ##Transformback to original form 114 | #inverse_transform 还原数据 115 | test_predict=scaler.inverse_transform(test_predict) 116 | print("原始数据:") 117 | print(scaler.inverse_transform( test_data)) 118 | print("预测数据") 119 | print(test_predict) 120 | 121 | -------------------------------------------------------------------------------- /stock_prediction_lstmV2.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import pandas as pd 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | from datetime import datetime 6 | import sys 7 | 8 | #################### 9 | #1. 获取股票数据 10 | #################### 11 | #today = datetime.now() 12 | #end = str(today.year) + str(today.month) + str(today.day) 13 | 14 | # 茅台600519,青岛啤酒600600 15 | code = '600519' 16 | if len(sys.argv) > 1: 17 | code = sys.argv[1] 18 | 19 | #建议使用pro版本 20 | #通过tushare接口获取股票数据 从2018年1月1日到今天 21 | df=ts.get_hist_data(code,start='2018-01-01') 22 | df1 = df['close'] 23 | df1 = df1.sort_index(ascending=True) 24 | df2 = df1 25 | #for i in range(0,len(df1)): #test data 26 | # df1[i] = i 27 | print(df1) 28 | 29 | ####################### 30 | # 2. 处理数据 31 | ####################### 32 | time_step = 30 #每个数据段为30天,这样数据打乱也没有关系 33 | epochs = 50 #200次重复训练 34 | pred_days = 5 #每次预测未来的天数,例如预测未来5天的。 35 | 36 | #已知 前time_step(30)天的数据,预测pred_days(5)天的数据 37 | 38 | #归化数据到0-1之间 39 | from sklearn.preprocessing import MinMaxScaler 40 | scaler=MinMaxScaler(feature_range=(0,1)) 41 | df1=scaler.fit_transform(np.array(df1).reshape(-1,1)) 42 | 43 | #取 25%的数据做最后的预测test_data 44 | ##splitting dataset into train and test split 45 | training_size=int(len(df1)*0.75) 46 | test_size=len(df1)-training_size 47 | train_data,test_data=df1[0:training_size,:],df1[training_size:,:1] 48 | 49 | # convert an array of values into a dataset matrix 50 | #dataX 是训练的数据,dataY是目标预测的数据 51 | #基本是time_step(30)天,预测pred_days(5)天 52 | def create_dataset(dataset, time_step=1): 53 | dataX, dataY = [], [] 54 | for i in range(len(dataset)-time_step-pred_days): 55 | a = dataset[i:(i+time_step),0] ###i=0, 0,1,2,3-----99 100 56 | dataX.append(a) 57 | dataY.append(dataset[i + time_step:i+time_step+pred_days,0]) 58 | return np.array(dataX), np.array(dataY) 59 | 60 | def create_dataset_pred(dataset, time_step=1): 61 | dataX = [] 62 | for i in range(len(dataset)-time_step): 63 | a = dataset[i:(i+time_step), 0] ###i=0, 0,1,2,3-----99 100 64 | dataX.append(a) 65 | return np.array(dataX) 66 | 67 | 68 | # reshape into X=t,t+1,t+2,t+3 and Y=t+4 69 | X, y = create_dataset(df1, time_step) 70 | 71 | X_test = create_dataset_pred(test_data, time_step) 72 | 73 | # 数据乱序 74 | # 75 | from sklearn.model_selection import train_test_split 76 | X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.20, random_state=42) 77 | 78 | 79 | # reshape input to be [samples, time steps, features] which is required for LSTM 80 | X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1) 81 | X_train = X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) 82 | X_validate = X_validate.reshape(X_validate.shape[0],X_validate.shape[1] , 1) 83 | ### Create the Stacked LSTM model 84 | # 3. 建立LSTM 模型 85 | ########################### 86 | from tensorflow.keras.models import Sequential 87 | from tensorflow.keras.layers import Dense 88 | from tensorflow.keras.layers import LSTM 89 | 90 | #采用了50个cell单元做hidden,3层lstm计算, 91 | #最后输出pred_days(5)天的结果 92 | model=Sequential() 93 | model.add(LSTM(50,return_sequences=True,input_shape=(time_step,1))) 94 | model.add(LSTM(50,return_sequences=True)) 95 | model.add(LSTM(50)) 96 | model.add(Dense(pred_days)) 97 | model.compile(loss='mean_squared_error',optimizer='adam') 98 | 99 | #model.summary() 100 | 101 | ############# 102 | # 4. 训练 103 | ############# 104 | 105 | model.fit(X_train,y_train,validation_data=(X_validate,y_validate),epochs=epochs,batch_size=64,verbose=1) 106 | 107 | 108 | ### Lets Do the prediction and check performance metrics 109 | # 110 | # 5. 预测 111 | ############## 112 | 113 | test_predict=model.predict(X_test) 114 | 115 | ##Transformback to original form 116 | #inverse_transform 还原数据 117 | test_predict=scaler.inverse_transform(test_predict) 118 | print("原始数据:") 119 | print(scaler.inverse_transform( test_data)) 120 | print("预测数据") 121 | print(test_predict) 122 | 123 | 124 | def mouse_move(event): 125 | try: 126 | line_y.set_xdata(event.xdata) 127 | line_y.figure.canvas.draw() 128 | if 0 < int(event.xdata) < len(df): 129 | print(df.index[int(event.xdata)],df2[int(event.xdata)]) 130 | annotate.set_position ((event.xdata,df2[int(event.xdata)])) 131 | annotate.set_text("%s, %.1f" % (df2.index[int(event.xdata)],df2[int(event.xdata)])) 132 | annotate.figure.canvas.draw() 133 | except Exception as e: 134 | print(e) 135 | pass 136 | 137 | fig = plt.figure() 138 | ax = fig.add_subplot(111) 139 | line_y = "" 140 | annotate = "" 141 | fig.canvas.mpl_connect('motion_notify_event', mouse_move) 142 | 143 | 144 | def show(): 145 | global line_y,annotate 146 | bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9) 147 | 148 | lines = ax.plot(df2.values, 'g',label="stock data") 149 | ax.plot(range(training_size+time_step,len(test_predict)+training_size+time_step), #x 150 | [i[0] for i in test_predict], #y 151 | 'r',label="predict") 152 | 153 | line_y = ax.axvline(x=10,color='skyblue',linestyle=':') 154 | 155 | annotate = ax.annotate('(%s, %.1f)'%("date", 0.0), 156 | (1,1), xycoords='figure pixels',xytext=(1,1), 157 | textcoords='offset pixels', 158 | bbox=bbox_props) 159 | 160 | ax.legend(loc='upper left') 161 | plt.gcf().autofmt_xdate() 162 | plt.show() 163 | 164 | show() 165 | -------------------------------------------------------------------------------- /swingtrade.py: -------------------------------------------------------------------------------- 1 | #波段交易 2 | 3 | from common.framework import * 4 | 5 | result_list = [] 6 | 7 | #近似值,考虑有一点浮动 8 | approx = 0.0 9 | 10 | def up(datas): 11 | a = datas[-1][2] 12 | b = datas[-2][2] 13 | c = datas[-3][2] 14 | if (a > b and b > c): 15 | return True 16 | else : 17 | return False 18 | 19 | def swing(code,name,datas): 20 | print(code,name) 21 | mlist = [] #max 22 | nlist = [] #min 23 | 24 | if len(datas)<7: 25 | return 26 | mnlist = get_mnlist(datas,7) 27 | mnlist1 = [] 28 | length = len(mnlist) 29 | 30 | # 寻找一个强上升趋势 上升空间 20% 31 | for i in range(0,len(mnlist)-1): 32 | if mnlist[i][0] == 0 and mnlist[i+1][0] == 1: 33 | a = mnlist[i][2] #close value 34 | b = mnlist[i+1][2] # close value 35 | if (b / a) > 1.4: 36 | print(OKRED,b / a ,a,b,ENDC) 37 | mnlist1 = mnlist[i:] 38 | break 39 | 40 | if len(mnlist1) < 3: 41 | return 42 | 43 | #将列表分离 最大 和最小分开 44 | for i in mnlist1: 45 | if i[0] == 1: 46 | mlist.append(i) 47 | if i[0] == 0: 48 | nlist.append(i) 49 | 50 | if len(nlist) < 3: 51 | return 52 | 53 | 54 | #查找最小值是不是趋势向上的 55 | count = 0 56 | for i in range(2,len(nlist)): 57 | if up(nlist[i-2:i+1]): 58 | count +=1 59 | else: 60 | count = 0 61 | if count > 1: 62 | print(OKBLUE,name,code,nlist[i-2][2],nlist[i-1][2],nlist[i][2],ENDC) 63 | turn = nlist[i][3].turn 64 | volume = nlist[i][3].volume 65 | date = nlist[i][3].date 66 | close = nlist[i][2] 67 | hqltsz = float(close) * float(volume) / float(turn) / 1000000 68 | hqltsz = float('%.2f' % hqltsz) 69 | result_list.append([name,code,date,hqltsz]) 70 | def display(): 71 | for i in result_list: 72 | print(i) 73 | 74 | def save(): 75 | df = pd.DataFrame(result_list ,columns = ['name','code','date','流通股值']) 76 | save_df_tohtml('./datas/stock_'+endday+"swing.html",df) 77 | 78 | if __name__ == "__main__": 79 | init_stock_list() 80 | loop_all(swing) 81 | display() 82 | save() 83 | -------------------------------------------------------------------------------- /talib_docs.py: -------------------------------------------------------------------------------- 1 | # 2 | # python3 talib demo使用用例 3 | # 4 | # talib 是非常有价值的股票计算分析工具 5 | # 有很多写好的公式可以直接使用 6 | # 本例子源代码在github上 7 | # https://github.com/asmcos/quantrader 8 | 9 | import talib 10 | from common.framework import * 11 | 12 | # 1. 获取股票的K线,代码和日期输入正确就行 13 | # 例如:sz.000100 TCL 14 | # 大家可以用常见的 python库就可以,baostock,tushare 15 | # 例子中是我自己的网站的数据 16 | #df = get_day_data("TCL",'sz.000100','2021-04-18','') 17 | 18 | df = get_day_data("隆基",'sh.601012','2021-04-18','') 19 | ######### 20 | """ 21 | 5 2021-04-23 22 | close code date high low name open turn volume 23 | 0 89.61 sh.601012 2021-04-19 90.16 84.77 隆基股份 85.90 1.9453 75210468 24 | 1 92.40 sh.601012 2021-04-20 94.50 88.86 隆基股份 89.60 2.1712 83943905 25 | 2 92.44 sh.601012 2021-04-21 95.57 90.66 隆基股份 94.00 2.2525 87086859 26 | 3 90.61 sh.601012 2021-04-22 93.88 88.38 隆基股份 93.51 1.5086 58328043 27 | 4 91.99 sh.601012 2021-04-23 93.30 90.70 隆基股份 90.70 1.1907 46036028 28 | """ 29 | ######### 30 | print(df) 31 | 32 | closes = df['close'] 33 | 34 | # 2. max value 35 | # max,min 使用方法类似 36 | # 所以我写了一个例子 37 | max1 = talib.MAX(closes,len(closes)) 38 | """ 39 | 0 NaN 40 | 1 NaN 41 | 2 NaN 42 | 3 NaN 43 | 4 92.44 44 | 含义就是 第5天的收盘价最高(从0开始计数的) 45 | """ 46 | print(max1) 47 | 48 | # 3. SMA 49 | # N日平均值 50 | # MA 国际上SMA=MA 51 | # 国内SMA有自己的算法不一样 52 | 53 | sma = talib.SMA(closes.values,3) 54 | print("3日平均:\n",sma) 55 | #ma = talib.MA(closes.values,3) 56 | # 结果sma = ma 不再重复执行 57 | 58 | 59 | # 4. EMA 60 | # 指数移动平均线 61 | ema3 = talib.EMA(closes,3) 62 | print("ema3:\n",ema3) 63 | 64 | # 双均 65 | # talib.DEMA(closes, timeperiod = 30) 66 | # 考夫曼 67 | # talib.KAMA(closes, timeperiod = 30) 68 | # 三重指数移动平均线 69 | # talib.TEMA(closes, timeperiod=30) 70 | 71 | # 阶段中点价格 72 | # 73 | #midpoint = talib.MIDPOINT(closes,3) 74 | 75 | # 移动加权平均 76 | # talib.WMA(closes, timeperiod = 30) 77 | 78 | # 5.布林线BBANDS 79 | # 参数说明:talib.BBANDS(close, timeperiod, matype) 80 | # close:收盘价;timeperiod:周期;matype:平均方法(bolling线的middle线 = MA,用于设定哪种类型的MA) 81 | # MA_Type: 0=SMA, 1=EMA, 2=WMA, 3=DEMA, 4=TEMA, 5=TRIMA, 6=KAMA, 7=MAMA, 8=T3 (Default=SMA) 82 | upper, middle, lower = talib.BBANDS(closes,5,matype = talib.MA_Type.EMA) 83 | print("布林线: ",upper,middle,lower) 84 | 85 | # 6 .macd 86 | # 注意这个日线周期要长,大家可以调整获取k线数据的周期 例如:2021-01-01 87 | # 另外:macd是相对的值,周期不一样,结果也不一样 88 | # 例子,我使用了常见的参数12,26,9 89 | # diff, dea, macd=df1['macd']*2 90 | # talib name: macd, macdsignal,macdhist 91 | df1 = get_day_data("隆基",'sh.601012','2020-02-18','') 92 | df1['diff'], df1['dea'], df1['macd'] = talib.MACD(df1['close'], fastperiod=12, slowperiod=26, signalperiod=9) 93 | print("MACD 数据必须大于26天:\n",df1) 94 | 95 | print("################\n# 波动量指标\n################") 96 | 97 | # 7. ATR:真实波动幅度均值 98 | # 99 | atr = talib.ATR(df1['high'], df1['low'], df1['close'], timeperiod=14) 100 | print("ATR\n",atr) 101 | 102 | # 8. AD 量价指标 103 | ad = talib.AD(df1['high'],df1['low'],df1['close'],df1['volume']) 104 | print("AD\n",ad) 105 | 106 | # 9. OBV:能量潮 107 | obv = talib.OBV(df1['close'],df1['volume']) 108 | print("OBV\n",obv) 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /tcn_predict.py: -------------------------------------------------------------------------------- 1 | # 2 | # keras-2.7.0,tensorflow 2.7.0 3 | # 使用lstm做股票二分类验证 4 | # 5 | 6 | import os 7 | import numpy as np 8 | from matplotlib import pyplot as plt 9 | import requests 10 | import pandas as pd 11 | import talib 12 | import datetime 13 | 14 | from common.framework import save_df_tohtml 15 | 16 | 17 | from tensorflow.keras import Input 18 | from tensorflow.keras.models import Sequential,Model,load_model 19 | from tensorflow.keras.layers import Dense, Dropout, Activation,LSTM,Bidirectional 20 | import tensorflow as tf 21 | import json 22 | from tensorflow.keras.layers import Attention,GlobalMaxPooling1D,Concatenate 23 | 24 | from tcn import TCN 25 | 26 | def DisplayOriginalLabel(values): 27 | cnt1 = 0 28 | cnt2 = 0 29 | for i in range(len(values)): 30 | if 1 == values[i] : 31 | cnt1 += 1 32 | else: 33 | cnt2 += 1 34 | 35 | print("origin: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)),len(values)) 36 | 37 | 38 | 39 | df_all = [] 40 | # 1. 获取数据 41 | def load_data_fromfile(filename): 42 | global df_all 43 | 44 | content = open(filename).read() 45 | df_dict = json.loads(content) 46 | for k in df_dict.keys(): 47 | df = pd.read_json(df_dict.get(k)) 48 | df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)] 49 | df_all.append(df) 50 | 51 | 52 | load_data_fromfile('lstm_train2021-12-20.csv') 53 | 54 | print(df_all[0].columns) 55 | 56 | 57 | 58 | # 准备预测的数据 59 | # 60 | 61 | sequence_len = 40 62 | prec = 10 #target 百分比 63 | fields = [ 64 | 'ma10', 65 | 'ma120', 'ma20', 'ma30', 'ma5', 'ma60', 'rise', 'risevol', 66 | 'dea', 'diff', 'macd' ,'oc','close'] 67 | 68 | X_train = [] 69 | y_train = [] 70 | X_test = [] 71 | y_test = [] 72 | 73 | def load_data(df, seq_len, ratio=0.9): 74 | 75 | df1 = df[df['date']<'2021-07-15'] 76 | df2 = df[df['date']>'2021-07-16'] 77 | 78 | label1 = df1['target'].values > prec 79 | label2 = df2['target'].values > prec 80 | 81 | datas1 = df1.loc[:,fields] 82 | datas2 = df2.loc[:,fields] 83 | 84 | sequence_length = seq_len 85 | 86 | if len(datas1) <= sequence_length or len(datas2) <= sequence_length: 87 | return 88 | 89 | for index in range(len(datas1) - sequence_length): 90 | X_train.append(datas1[index: index + sequence_length].values) 91 | y_train.append(label1[index+sequence_length-1]) 92 | 93 | for index in range(len(datas2) - sequence_length): 94 | X_test.append(datas2[index: index + sequence_length].values) 95 | y_test.append(label2[index+sequence_length-1]) 96 | 97 | 98 | 99 | for df in df_all[:100]: 100 | load_data(df,sequence_len) 101 | 102 | X_train = np.array(X_train) 103 | X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],len(fields))) 104 | y_train = np.array(y_train) 105 | 106 | X_test = np.array(X_test) 107 | X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],len(fields))) 108 | 109 | 110 | def build_model(): 111 | d = 0.2 112 | model = Sequential() 113 | 114 | # inputs: A 3D tensor with shape `[batch, timesteps, feature]`. 115 | # 输入的数据格式 是 总尺寸,时间步长,这里是 sequence_len, feature,特征维度 116 | # now model.output_shape == (None, 128) 117 | model.add(LSTM(128, return_sequences=True)) 118 | model.add(Dropout(d)) 119 | 120 | # for subsequent layers, no need to specify the input size: 121 | model.add(LSTM(64, return_sequences=False)) 122 | model.add(Dropout(d)) 123 | 124 | # fully connected layer 125 | model.add(Dense(16,activation='relu')) 126 | # 输入 1 维度 0,1 127 | model.add(Dense(1,activation='sigmoid')) 128 | 129 | lossfn = tf.keras.losses.BinaryCrossentropy( 130 | from_logits=False, 131 | label_smoothing=0.0, 132 | axis=-1, 133 | reduction="auto", 134 | name="binary_crossentropy", 135 | ) 136 | # 二分类 137 | model.compile(optimizer='rmsprop', 138 | loss=lossfn, metrics=['accuracy']) 139 | return model 140 | 141 | time_steps = X_train.shape[1] 142 | input_dim = X_train.shape[2] 143 | 144 | print(time_steps,input_dim) 145 | 146 | def build_model2(): 147 | d = 0.2 148 | 149 | model_input = Input(shape=(time_steps, input_dim)) 150 | 151 | x = TCN(input_shape=(time_steps, input_dim),return_sequences=False)(model_input) 152 | 153 | x = Dense(1,activation='sigmoid')(x) 154 | 155 | model = Model(model_input, x) 156 | 157 | lossfn = tf.keras.losses.BinaryCrossentropy( 158 | from_logits=False, 159 | label_smoothing=0.0, 160 | axis=-1, 161 | reduction="auto", 162 | name="binary_crossentropy", 163 | ) 164 | # 二分类 165 | model.compile(optimizer='rmsprop', 166 | loss=lossfn, metrics=['accuracy']) 167 | return model 168 | 169 | model = build_model2() 170 | 171 | log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 172 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) 173 | 174 | #X = pd.DataFrame(data = X_train, columns = fields) 175 | 176 | model.fit(X_train,y_train,batch_size=200, 177 | epochs=2,callbacks=[tensorboard_callback]) 178 | 179 | y_pred = model.predict(X_test) 180 | 181 | # 对测试集进行预测 182 | # print(tf.greater(y_pred, .5)) 183 | print(y_pred) 184 | 185 | pcnt1 = 0 186 | pcnt2 = 0 187 | for i in range(len(y_pred)): 188 | if y_pred[i][0] < 0.6 : 189 | continue 190 | 191 | if y_test[i] == True : 192 | pcnt1 += 1 193 | else: 194 | pcnt2 += 1 195 | 196 | DisplayOriginalLabel(y_test) 197 | if pcnt1+pcnt2 > 0: 198 | print("Accuracy: %.2f %% " % (100 * pcnt1 / (pcnt1 + pcnt2)),pcnt1 + pcnt2) 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /tdx.py: -------------------------------------------------------------------------------- 1 | #pytdx 2 | 3 | from pytdx.hq import TdxHq_API 4 | import pandas as pd 5 | from common.common import * 6 | 7 | api = TdxHq_API() 8 | 9 | block_list = [] 10 | 11 | filename = './datas/stock_tdx_block'+endday+'.html' 12 | 13 | def get_block(): 14 | all_list = api.get_security_list(1, 0) 15 | for i in all_list: 16 | code = int (i['code']) 17 | if (code >= 880300) and (code <=880999) and (code != 880650): 18 | print(i['code'],i['name']) 19 | block_list.append([i['code'],i['name']]) 20 | dayK_list = [] 21 | def get_bar(): 22 | for i in block_list: 23 | code = i[0] 24 | name = i[1] 25 | datas = api.get_index_bars(9,1, code, 0, 20) 26 | if datas == None or len(datas)<20: 27 | continue 28 | c1 = datas[-1]['close'] 29 | d1 = datas[-1]['datetime'] 30 | c2 = datas[-2]['close'] 31 | c5 = datas[-5]['close'] 32 | c10 = datas[-10]['close'] 33 | c20 = datas[-20]['close'] 34 | print(name,code,d1,c1,(c1-c2)*100/c2,(c1-c5)*100/c5,(c1-c10)*100/c10,(c1-c20)*100/c20) 35 | dayK_list.append([name,code,d1,c1,(c1-c2)*100/c2,(c1-c5)*100/c5,(c1-c10)*100/c10,(c1-c20)*100/c20]) 36 | 37 | df = pd.DataFrame(dayK_list,columns=['name','code','date','close','今日涨幅','周涨幅','半月涨幅','月涨幅']) 38 | df = df.sort_values(by='今日涨幅',ascending=False).reset_index() 39 | del df['index'] 40 | 41 | content = df.to_html(escape=False,float_format='%.2f') 42 | 43 | content +='周涨幅排序:\n' 44 | 45 | df = df.sort_values(by='周涨幅',ascending=False).reset_index() 46 | del df['index'] 47 | 48 | content += df.to_html(escape=False,float_format='%.2f') 49 | print("save file",filename) 50 | save_file(filename,content) 51 | 52 | 53 | 54 | tdxblockdf = '' 55 | 56 | def QA_fetch_get_tdx_industry() -> pd.DataFrame: 57 | import random 58 | import tempfile 59 | import shutil 60 | import os 61 | from urllib.request import urlopen 62 | global tdxblockdf 63 | 64 | def gettempdir(): 65 | tmpdir_root = tempfile.gettempdir() 66 | subdir_name = 'tdx_base' #+ str(random.randint(0, 1000000)) 67 | tmpdir = os.path.join(tmpdir_root, subdir_name) 68 | if not os.path.exists(tmpdir): 69 | os.makedirs(tmpdir) 70 | 71 | return tmpdir 72 | 73 | def download_tdx_file(tmpdir) -> str: 74 | url = 'http://www.tdx.com.cn/products/data/data/dbf/base.zip' 75 | try: 76 | file = tmpdir + '/' + 'base.zip' 77 | f = urlopen(url) 78 | data = f.read() 79 | with open(file, 'wb') as code: 80 | code.write(data) 81 | f.close() 82 | shutil.unpack_archive(file, extract_dir=tmpdir) 83 | os.remove(file) 84 | except: 85 | pass 86 | return tmpdir 87 | 88 | def read_industry(folder:str) -> pd.DataFrame: 89 | incon = folder + '/incon.dat' # tdx industry file 90 | hy = folder + '/tdxhy.cfg' # tdx stock file 91 | 92 | # tdx industry file 93 | with open(incon, encoding='GB18030', mode='r') as f: 94 | incon = f.readlines() 95 | incon_dict = {} 96 | for i in incon: 97 | if i[0] == '#' and i[1] != '#': 98 | j = i.replace('\n', '').replace('#', '') 99 | incon_dict[j] = [] 100 | else: 101 | if i[1] != '#': 102 | incon_dict[j].append(i.replace('\n', '').split(' ')[0].split('|')) 103 | 104 | incon = pd.concat([pd.DataFrame.from_dict(v).assign(type=k) for k,v in incon_dict.items()]) \ 105 | .rename({0: 'code', 1: 'name'}, axis=1).reset_index(drop=True) 106 | 107 | with open(hy, encoding='GB18030', mode='r') as f: 108 | hy = f.readlines() 109 | hy = [line.replace('\n', '') for line in hy] 110 | hy = pd.DataFrame(line.split('|') for line in hy) 111 | # filter codes 112 | hy = hy[~hy[1].str.startswith('9')] 113 | hy = hy[~hy[1].str.startswith('2')] 114 | 115 | hy1 = hy[[1, 2]].set_index(2).join(incon.set_index('code')).set_index(1)[['name', 'type']] 116 | hy2 = hy[[1, 3]].set_index(3).join(incon.set_index('code')).set_index(1)[['name', 'type']] 117 | # join tdxhy and swhy 118 | df = hy.set_index(1) \ 119 | .join(hy1.rename({'name': hy1.dropna()['type'].values[0], 'type': hy1.dropna()['type'].values[0]+'_type'}, axis=1)) \ 120 | .join(hy2.rename({'name': hy2.dropna()['type'].values[0], 'type': hy2.dropna()['type'].values[0]+'_type'}, axis=1)).reset_index() 121 | 122 | df.rename({0: 'sse', 1: 'code', 2: 'TDX_code', 3: 'SW_code'}, axis=1, inplace=True) 123 | df = df[[i for i in df.columns if not isinstance(i, int) and '_type' not in str(i)]] 124 | df.columns = [i.lower() for i in df.columns] 125 | 126 | #shutil.rmtree(folder, ignore_errors=True) 127 | return df 128 | folder = gettempdir() 129 | dirpath = folder 130 | print(os.path.exists(folder + '/incon.dat')) 131 | print(os.path.exists(folder + '/tdxhy.cfg')) 132 | if not os.path.exists(folder + '/incon.dat') or not os.path.exists(folder + '/tdxhy.cfg'): 133 | print("Save file to ",folder) 134 | download_tdx_file(folder) 135 | print("Read file from ",folder) 136 | df = read_industry(folder) 137 | 138 | tdxblockdf = df 139 | return df 140 | 141 | 142 | 143 | 144 | 145 | 146 | if api.connect('119.147.212.81', 7709): 147 | # 获取板块 148 | df = QA_fetch_get_tdx_industry() 149 | hy = df.loc[df['code'] == '601012',:] 150 | for i in range(0,len(hy)): 151 | print(hy.iloc[i]) 152 | 153 | #get_block() 154 | #get_bar() 155 | -------------------------------------------------------------------------------- /tdx_block.py: -------------------------------------------------------------------------------- 1 | from tdxhy import get_code_list 2 | from common.common import * 3 | import os 4 | 5 | parser.add_argument('--bkname', type=str, default="环境保护", help='板块名称') 6 | parser.add_argument('--bkcode', type=str, default="880465", help='板块代码') 7 | 8 | args = parser.parse_args() 9 | bkname = args.bkname 10 | bkcode = args.bkcode 11 | get_code_list(bkname,bkcode) 12 | 13 | filename = './datas/stock_tdx_block'+endday+ bkcode+'.html' 14 | 15 | from tdxhy import content 16 | print("save to ", 'file://'+os.getcwd()+ '/' + filename) 17 | save_file(filename,content) 18 | -------------------------------------------------------------------------------- /tdx_day.py: -------------------------------------------------------------------------------- 1 | #pytdx 2 | 3 | from pytdx.hq import TdxHq_API 4 | import pandas as pd 5 | from common.framework import * 6 | from common.common import endday 7 | import json 8 | 9 | api = TdxHq_API(auto_retry=True) 10 | 11 | hostname="https://klang.org.cn/api" 12 | #hostname="http://klang.zhanluejia.net.cn" 13 | 14 | filename_sl = os.path.expanduser("~/.klang_stock_list.csv") 15 | 16 | session = "" 17 | 18 | def updatestocklist(stname=filename_sl): 19 | 20 | json = requests.get(hostname+"/industries").json() 21 | for i in json: 22 | cm_dict[i['code']] = i.get('chouma','50') 23 | df = pd.json_normalize(json) 24 | df = df.drop(columns=['updatedAt','id','createdAt']) 25 | # 结果集输出到csv文件 26 | df.to_csv(stname, index=False,columns=['updateDate','code','code_name','industry','industryClassification','tdxbk','tdxgn']) 27 | 28 | 29 | def get_bar(name,code): 30 | zone,code1 = code.split('.') 31 | 32 | if zone == "sz": 33 | zone = 0 34 | if zone == "sh": 35 | zone = 1 36 | 37 | print(name,code1) 38 | datas = api.get_security_bars(9,zone,code1, 0, 5) 39 | info = api.get_finance_info(zone, code1) 40 | datas = api.to_df(datas) 41 | if len(datas) < 2: 42 | return 43 | 44 | liutonggu = float(info['liutongguben']) 45 | datas = datas.assign(date=datas['datetime'].apply(lambda x: str(x)[0:10])).drop(['year', 'month', 'day', 'hour', 'minute', 'datetime'], axis=1) 46 | datas.rename(columns={'vol':'volume'},inplace = True) 47 | 48 | print(len(datas),datas.iloc[-1].date) 49 | datas = datas [datas['volume'] > 0] 50 | df = datas.to_json(orient='table') 51 | jsondatas = json.loads(df)['data'] 52 | for d in jsondatas: 53 | d['name'] = name 54 | d['code'] = code 55 | d['volume'] = float("%.4f" % (d['volume'] * 100)) #股 = 手*100 56 | d['turn'] = float("%.4f" %(d['volume']*100 / liutonggu)) 57 | del d['index'] 58 | #print(jsondatas) 59 | #print(datas.iloc[-1],liutonggu,d) 60 | try: 61 | resp = session.post(hostname+"/dayks/updates",json=jsondatas,timeout=2000) 62 | except: 63 | time.sleep(2) 64 | session.post(hostname+"/dayks/updates",json=jsondatas,timeout=2000) 65 | 66 | if api.connect('119.147.212.81', 7709): 67 | 68 | updatestocklist() 69 | 70 | init_stock_list() 71 | 72 | from common.framework import stocklist 73 | 74 | session = requests.Session() 75 | for stock in stocklist[:]: 76 | code ,name ,tdxbk,tdxgn = getstockinfo(stock) 77 | get_bar(name,code) 78 | -------------------------------------------------------------------------------- /tdx_features.py: -------------------------------------------------------------------------------- 1 | #pytdx 2 | 3 | from pytdx.hq import TdxHq_API 4 | import pandas as pd 5 | from common.framework import * 6 | from common.common import endday 7 | import json 8 | import talib 9 | import numpy as np 10 | 11 | api = TdxHq_API(auto_retry=True) 12 | 13 | hostname="http://klang.org.cn" 14 | hostname="http://klang.zhanluejia.net.cn" 15 | #hostname="http://127.0.0.1:1337" 16 | 17 | filename_sl = os.path.expanduser("~/.klang_stock_list.csv") 18 | filename_st = os.path.expanduser("~/.klang_stock_trader.csv") 19 | 20 | def updatestocklist(stname=filename_sl): 21 | 22 | json = requests.get(hostname+"/industries").json() 23 | #json = requests.get('http://klang.org.cn'+"/industries").json() 24 | for i in json: 25 | cm_dict[i['code']] = i.get('chouma','50') 26 | df = pd.json_normalize(json) 27 | df = df.drop(columns=['_id','updatedAt','id','createdAt']) 28 | # 结果集输出到csv文件 29 | df.to_csv(stname, index=False,columns=['updateDate','code','code_name','industry','industryClassification','tdxbk','tdxgn']) 30 | 31 | 32 | def get_bar(name,code): 33 | zone,code1 = code.split('.') 34 | 35 | if zone == "sz": 36 | zone = 0 37 | if zone == "sh": 38 | zone = 1 39 | 40 | print(name,code1) 41 | datas = api.get_security_bars(9,zone,code1, 0, 300) 42 | datas = api.to_df(datas) 43 | if len(datas) < 2: 44 | return 45 | 46 | datas = datas.assign(date=datas['datetime'].apply(lambda x: str(x)[0:10])).drop(['year', 'month', 'day', 'hour', 'minute', 'datetime'], axis=1) 47 | 48 | C = datas.close 49 | ma5 = talib.MA(datas.close,5) / C 50 | ma10 = talib.MA(datas.close,10) / C 51 | ma20 = talib.MA(datas.close,20) / C 52 | ma30 = talib.MA(datas.close,30) / C 53 | ma60 = talib.MA(datas.close,60) / C 54 | ma120 = talib.MA(datas.close,120) / C 55 | rise = (datas['close'].values[1:]/datas['close'].values[:-1] - 1) * 100 56 | rise = np.insert(rise,0,np.NaN) 57 | 58 | mavol10 = talib.MA(datas.vol,10) 59 | risevol = datas.vol / mavol10 60 | 61 | diff,dea,macd = talib.MACD(datas.close,fastperiod=12, slowperiod=26, signalperiod=9) 62 | macd = macd * 2 63 | 64 | func = lambda name :getattr(talib,name)(datas.open, datas.high, datas.low, datas.close) 65 | """ 66 | talibdict = {i:func(i) for i in ['CDL2CROWS','CDL3BLACKCROWS','CDL3INSIDE','CDL3LINESTRIKE','CDL3OUTSIDE','CDL3STARSINSOUTH','CDL3WHITESOLDIERS', 67 | 'CDLABANDONEDBABY','CDLADVANCEBLOCK','CDLBELTHOLD','CDLBREAKAWAY','CDLCLOSINGMARUBOZU','CDLCONCEALBABYSWALL', 68 | 'CDLCOUNTERATTACK','CDLDARKCLOUDCOVER','CDLDOJI','CDLDOJISTAR','CDLDRAGONFLYDOJI','CDLENGULFING','CDLEVENINGDOJISTAR', 69 | 'CDLEVENINGSTAR','CDLGAPSIDESIDEWHITE','CDLGRAVESTONEDOJI','CDLHAMMER','CDLHANGINGMAN','CDLHARAMI', 70 | 'CDLHARAMICROSS','CDLHIGHWAVE','CDLHIKKAKE','CDLHIKKAKEMOD','CDLHOMINGPIGEON','CDLIDENTICAL3CROWS', 71 | 'CDLINNECK','CDLINVERTEDHAMMER','CDLKICKING','CDLKICKINGBYLENGTH','CDLLADDERBOTTOM','CDLLONGLEGGEDDOJI', 72 | 'CDLLONGLINE','CDLMARUBOZU','CDLMATCHINGLOW','CDLMATHOLD','CDLMORNINGDOJISTAR','CDLMORNINGSTAR', 73 | 'CDLONNECK','CDLPIERCING','CDLRICKSHAWMAN','CDLRISEFALL3METHODS','CDLSEPARATINGLINES','CDLSHOOTINGSTAR', 74 | 'CDLSHORTLINE','CDLSPINNINGTOP','CDLSTALLEDPATTERN','CDLSTICKSANDWICH','CDLTAKURI','CDLTASUKIGAP', 75 | 'CDLTHRUSTING','CDLTRISTAR','CDLUNIQUE3RIVER','CDLUPSIDEGAP2CROWS','CDLXSIDEGAP3METHODS', 76 | ]} 77 | 78 | """ 79 | #个性化的 80 | talibdict = { 81 | 'ma5':ma5, 82 | 'ma10':ma10, 83 | 'ma20':ma20, 84 | 'ma30':ma30, 85 | 'ma60':ma60, 86 | 'ma120':ma120, 87 | 'rise':rise, 88 | 'risevol':risevol, 89 | 'diff':diff, 90 | 'dea':dea, 91 | 'macd':macd, 92 | 'date':datas.date} 93 | 94 | print(len(datas),datas.iloc[-1].date) 95 | 96 | datas1 = pd.DataFrame(talibdict) 97 | print(datas1) 98 | df = datas1.to_json(orient='table') 99 | jsondatas = json.loads(df)['data'] 100 | for d in jsondatas: 101 | d['name'] = name 102 | d['code'] = code 103 | del d['index'] 104 | #print(jsondatas) 105 | try: 106 | requests.post(hostname+"/features/updates",json=jsondatas,timeout=2000) 107 | except: 108 | time.sleep(2) 109 | requests.post(hostname+"/features/updates",json=jsondatas,timeout=2000) 110 | 111 | if api.connect('119.147.212.81', 7709): 112 | 113 | updatestocklist() 114 | 115 | init_stock_list() 116 | 117 | from common.framework import stocklist 118 | 119 | for stock in stocklist : 120 | code ,name ,tdxbk,tdxgn = getstockinfo(stock) 121 | get_bar(name,code) 122 | -------------------------------------------------------------------------------- /tdx_info.py: -------------------------------------------------------------------------------- 1 | from pytdx.hq import TdxHq_API 2 | import struct 3 | import sys 4 | 5 | code = '601012' 6 | market = 1 7 | 8 | if len(sys.argv) > 1: 9 | zone, code = sys.argv[1].split('.') 10 | if zone == 'sz': 11 | market = 0 12 | 13 | api = TdxHq_API() 14 | 15 | api.connect('119.147.212.81', 7709) 16 | 17 | info = api.get_company_info_category(market,code) 18 | 19 | for i in info: 20 | #for k in i.keys(): 21 | #print(k,i[k]) 22 | #name,filename,start,length 23 | info2 = api.get_company_info_content(market,code,i['filename'],i['start'],i['length']) 24 | print(info2) 25 | 26 | def get_data(fname): 27 | content = fname.decode('utf-8') 28 | print(content) 29 | 30 | 31 | def get_and_parse_block_info(client, blockfile): 32 | try: 33 | meta = client.get_block_info_meta(blockfile) 34 | except Exception as e: 35 | return None 36 | 37 | if not meta: 38 | return None 39 | 40 | print(meta) 41 | 42 | size = meta['size'] 43 | one_chunk = 0x7530 44 | 45 | 46 | chuncks = size // one_chunk 47 | if size % one_chunk != 0: 48 | chuncks += 1 49 | 50 | file_content = bytearray() 51 | for seg in range(chuncks): 52 | start = seg * one_chunk 53 | piece_data = client.get_block_info(blockfile, start, size) 54 | file_content.extend(piece_data) 55 | 56 | #print(len(file_content)) 57 | 58 | #print(file_content) 59 | get_data(file_content) 60 | 61 | def block(filename): 62 | ret = api.get_and_parse_block_info(filename) 63 | for i in ret: 64 | #if i['blockname'] == '多晶硅': 65 | #print(i) 66 | pass 67 | ''' 68 | block("block_fg.dat") 69 | block("block_zs.dat") 70 | block("block_gn.dat") 71 | block("block.dat") 72 | block("incon.dat") 73 | block("tdxhy.cfg") 74 | ''' 75 | -------------------------------------------------------------------------------- /tdxbk.py: -------------------------------------------------------------------------------- 1 | #pytdx 2 | 3 | from pytdx.hq import TdxHq_API 4 | import pandas as pd 5 | from common.common import * 6 | import json 7 | import sys 8 | 9 | api = TdxHq_API() 10 | 11 | block_list = [] 12 | 13 | 14 | def get_block(): 15 | all_list = api.get_security_list(1, 0) 16 | for i in all_list: 17 | code = int (i['code']) 18 | if (code >= 880300) and (code <=880999) and (code != 880650): 19 | print(i['code'],i['name']) 20 | block_list.append([i['code'],i['name']]) 21 | 22 | dayK_list = [] 23 | block_list= [ 24 | 25 | ['电气设备', '880446'], ['食品饮料', '880372'], ['汽车类', '880390'], ['农林牧渔', '880360'], ['日用化工', '880355'], ['矿物制品', '880351'], ['航空', '880430'], ['工业机械', '880440'], ['酿酒', '880380'], ['酒店餐饮', '880423'], ['通用机械', '880437'], ['家用电器', '880387'], ['水务', '880454'], ['电力', '880305'], ['有色', '880324'], ['化工', '880335'], ['环境保护', '880456'], ['石油', '880310'], ['医疗保健', '880398'], ['钢铁', '880318'], ['家居用品', '880399'], ['煤炭', '880301'], ['造纸', '880350'], ['旅游', '880424'], ['化纤', '880330'], ['文教休闲', '880422'], ['建材', '880344'], ['医药', '880400'], ['船舶', '880431'], ['传媒娱乐', '880418'], ['纺织服饰', '880367'], ['商业连锁', '880406'], ['运输设备', '880432'], ['工程机械', '880447'], ['电器仪表', '880448'], ['公共交通', '880453'], ['半导体', '880491'], ['元器件', '880492'], ['商贸代理', '880414'], ['供气供热', '880455'], ['广告包装', '880421'], ['电信运营', '880452'], ['交通设施', '880465'], ['运输服务', '880459'], ['通信设备', '880490'], ['IT设备', '880489'], ['仓储物流', '880464'], ['银行', '880471'], ['保险', '880473'], ['软件服务', '880493'], ['互联网', '880494'], ['综合类', '880497'], ['证券', '880472'], ['多元金融', '880474'], ['房地产', '880482'], ['建筑', '880476'] 26 | 27 | ] 28 | def get_bar(): 29 | global dayK_list 30 | 31 | content = "" 32 | dayK_list = [] 33 | for line in block_list: 34 | name = line[0] 35 | code = line[1] 36 | print(code,name) 37 | jsondatas = get_block_bar(code,name) 38 | if jsondatas is None: 39 | continue 40 | 41 | dayK_list.append({code:jsondatas}) 42 | 43 | save_file(sys.path[0]+"/bk.json",json.dumps(dayK_list)) 44 | 45 | def _get_block_bar (code,name): 46 | 47 | datas = api.get_index_bars(9,1, code, 0, 400) 48 | 49 | datas = api.to_df(datas) 50 | 51 | if len(datas)<20: 52 | return None 53 | datas = datas.assign(date=datas['datetime'].apply(lambda x: str(x)[0:10])).drop(['year', 'month', 'day', 'hour', 'minute', 'datetime'], axis=1) 54 | datas.rename(columns={'vol':'volume'},inplace = True) 55 | 56 | print(len(datas),datas.iloc[-1].date) 57 | df = datas.to_json(orient='table') 58 | jsondatas = json.loads(df)['data'] 59 | for d in jsondatas: 60 | d['name'] = name 61 | d['code'] = code 62 | d['volume'] = float("%.4f" % (d['volume'] * 100)) #股 = 手*100 63 | del d['index'] 64 | 65 | return jsondatas 66 | 67 | def connect(): 68 | api.connect('119.147.212.81', 7709) 69 | get_bar() 70 | 71 | def get_block_bar(code,name): 72 | try : 73 | return _get_block_bar(code,name) 74 | except: 75 | return None; 76 | api.connect('119.147.212.81', 7709) 77 | if __name__ == "__main__": 78 | #get_block() 79 | get_bar() 80 | 81 | -------------------------------------------------------------------------------- /test_fm.py: -------------------------------------------------------------------------------- 1 | from common.framework import * 2 | 3 | 4 | def testcb(code,name,datas): 5 | print(code,name,datas) 6 | 7 | if __name__ == "__main__": 8 | init_stock_list() 9 | loop_all(testcb) 10 | -------------------------------------------------------------------------------- /testtdx.py: -------------------------------------------------------------------------------- 1 | from pytdx.hq import TdxHq_API 2 | import struct 3 | 4 | 5 | 6 | api = TdxHq_API() 7 | 8 | api.connect('119.147.212.81', 7709) 9 | 10 | ''' 11 | code = '601012' 12 | market = 1 13 | info = api.get_company_info_category(market,code) 14 | 15 | for i in info: 16 | #for k in i.keys(): 17 | #print(k,i[k]) 18 | #name,filename,start,length 19 | info2 = api.get_company_info_content(market,code,i['filename'],i['start'],i['length']) 20 | print(info2) 21 | ''' 22 | 23 | def get_data(fname): 24 | content = fname.decode('utf-8') 25 | print(content) 26 | 27 | 28 | def get_and_parse_block_info(client, blockfile): 29 | try: 30 | meta = client.get_block_info_meta(blockfile) 31 | except Exception as e: 32 | return None 33 | 34 | if not meta: 35 | return None 36 | 37 | size = meta['size'] 38 | one_chunk = 0x7530 39 | 40 | 41 | chuncks = size // one_chunk 42 | if size % one_chunk != 0: 43 | chuncks += 1 44 | 45 | file_content = bytearray() 46 | for seg in range(chuncks): 47 | start = seg * one_chunk 48 | piece_data = client.get_block_info(blockfile, start, size) 49 | file_content.extend(piece_data) 50 | 51 | #print(len(file_content)) 52 | 53 | #print(file_content) 54 | get_data(file_content) 55 | 56 | def block(filename): 57 | ret = get_and_parse_block_info(api,filename) 58 | for i in ret: 59 | #if i['blockname'] == '多晶硅': 60 | #print(i) 61 | pass 62 | ''' 63 | block("block_fg.dat") 64 | block("block_zs.dat") 65 | block("block_gn.dat") 66 | block("block.dat") 67 | ''' 68 | #block("incon.dat") 69 | block("tdxhy.cfg") 70 | -------------------------------------------------------------------------------- /tf-lstm-stock.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.preprocessing import MinMaxScaler 5 | from tensorflow.keras.models import Sequential 6 | from tensorflow.keras.layers import LSTM, Dense, Activation 7 | 8 | 9 | from matplotlib.dates import DateFormatter, WeekdayLocator, DayLocator, MONDAY,YEARLY 10 | #import matplotlib 11 | import tushare as ts 12 | import pandas as pd 13 | import matplotlib.pyplot as plt 14 | from matplotlib.pylab import date2num 15 | import datetime 16 | import numpy as np 17 | from pandas import DataFrame 18 | from numpy import row_stack,column_stack 19 | import pandas 20 | 21 | df=ts.get_hist_data('601857',start='2016-06-15',end='2018-01-12') 22 | dd=df[['open','high','low','close']] 23 | 24 | #print(dd.values.shape[0]) 25 | 26 | dd1=dd .sort_index() 27 | 28 | dd2=dd1.values.flatten() 29 | 30 | dd3=pandas.DataFrame(dd1['close']) 31 | 32 | def load_data(df, sequence_length=10, split=0.8): 33 | 34 | #df = pd.read_csv(file_name, sep=',', usecols=[1]) 35 | #data_all = np.array(df).astype(float) 36 | 37 | data_all = np.array(df).astype(float) 38 | scaler = MinMaxScaler() 39 | data_all = scaler.fit_transform(data_all) 40 | data = [] 41 | for i in range(len(data_all) - sequence_length - 1): 42 | data.append(data_all[i: i + sequence_length + 1]) 43 | reshaped_data = np.array(data).astype('float64') 44 | #np.random.shuffle(reshaped_data) 45 | # 对x进行统一归一化,而y则不归一化 46 | x = reshaped_data[:, :-1] 47 | y = reshaped_data[:, -1] 48 | split_boundary = int(reshaped_data.shape[0] * split) 49 | train_x = x[: split_boundary] 50 | test_x = x[split_boundary:] 51 | 52 | train_y = y[: split_boundary] 53 | test_y = y[split_boundary:] 54 | 55 | return train_x, train_y, test_x, test_y, scaler 56 | 57 | 58 | def build_model(): 59 | # input_dim是输入的train_x的最后一个维度,train_x的维度为(n_samples, time_steps, input_dim) 60 | model = Sequential() 61 | 62 | 63 | model.add(LSTM(100, return_sequences=False)) 64 | model.add(Dense(1)) 65 | model.add(Activation('linear')) 66 | 67 | model.compile(loss='mse', optimizer='rmsprop') 68 | return model 69 | 70 | 71 | def train_model(train_x, train_y, test_x, test_y): 72 | model = build_model() 73 | 74 | try: 75 | model.fit(train_x, train_y, batch_size=512, epochs=300, validation_split=0.1) 76 | predict = model.predict(test_x) 77 | predict = np.reshape(predict, (predict.size, )) 78 | except KeyboardInterrupt: 79 | print(predict) 80 | print(test_y) 81 | print(predict) 82 | print(test_y) 83 | try: 84 | fig = plt.figure(1) 85 | plt.plot(predict, 'r:') 86 | plt.plot(test_y, 'g-') 87 | plt.legend(['predict', 'true']) 88 | except Exception as e: 89 | print(e) 90 | return predict, test_y 91 | 92 | 93 | if __name__ == '__main__': 94 | #train_x, train_y, test_x, test_y, scaler = load_data('international-airline-passengers.csv') 95 | train_x, train_y, test_x, test_y, scaler =load_data(dd3, sequence_length=10, split=0.8) 96 | train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1)) 97 | test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1)) 98 | predict_y, test_y = train_model(train_x, train_y, test_x, test_y) 99 | predict_y = scaler.inverse_transform([[i] for i in predict_y]) 100 | test_y = scaler.inverse_transform(test_y) 101 | fig2 = plt.figure(2) 102 | plt.plot(predict_y, 'g:') 103 | plt.plot(test_y, 'r-') 104 | plt.show() 105 | 106 | -------------------------------------------------------------------------------- /transverse_tree.py: -------------------------------------------------------------------------------- 1 | from Klang import (Kl, 2 | C,O,V,H,L, CLOSE,HIGH,DATETIME, 3 | MA,CROSS,BARSLAST,HHV,LLV,COUNT,BARSLASTFIND, 4 | MAX,MIN,MACD,TRANSVERSE) 5 | from Klang.common import end as today 6 | import talib 7 | 8 | import sys 9 | import linecache 10 | import pandas as pd 11 | 12 | 13 | def PrintException(): 14 | exc_type, exc_obj, tb = sys.exc_info() 15 | f = tb.tb_frame 16 | lineno = tb.tb_lineno 17 | filename = f.f_code.co_filename 18 | linecache.checkcache(filename) 19 | line = linecache.getline(filename, lineno, f.f_globals) 20 | print ('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)) 21 | 22 | all_list = [] 23 | pred_data = 0 24 | def main_loop(start,endday): 25 | offset = 60 #要计算MA60,所以头60天不能计算 26 | check_day = 10 #后10天(未来)用来预判 是否增长,需要做对比 27 | #因此在train和test的时候,需要留后10天数据 28 | 29 | # 如果是预测,需要计算当天,不做未来数据对比 30 | # 当天实际上是买点所以不能预留未来数据空间 31 | if pred_data == 1: 32 | check_day = 0 33 | 34 | #for df in Kl.df_all[:500]: 35 | for df in Kl.df_all: 36 | 37 | Kl.code(df["code"]) 38 | 39 | if start is None: 40 | Kl.date(end=endday) 41 | else: 42 | Kl.date(start=start,end=endday) 43 | try: 44 | #如果数据太小,或者是空数据就跳过 45 | if len(C) < 70 : 46 | continue 47 | 48 | allC = C.data 49 | allV = V.data 50 | allDate = DATETIME.data 51 | 52 | datelist = [] 53 | 54 | for i in range(check_day,len(C)-offset): 55 | datelist.append(str(DATETIME[i])) 56 | 57 | except: 58 | pass 59 | 60 | for i in range(0,len(datelist)): 61 | d = datelist[i] 62 | Kl.date(end=d) 63 | try: 64 | valc1 = ((C-C[1]) / C[1]) * 100 65 | ma60 = MA(C,60) 66 | if(valc1 < 8) or V[1] == 0 or V == 0 or C < ma60: 67 | continue 68 | 69 | ma5 = MA(C,5) 70 | ma10 = MA(C,10) 71 | ma30 = MA(C,30) 72 | v40 = MA(V,40) 73 | valv40 = V / v40 74 | valo1 = ((O-C[1]) / C[1]) * 100 75 | valh1 = ((H-C[1]) / C[1]) * 100 76 | vall1 = ((L-C[1]) / C[1]) * 100 77 | valv1 = ((V-V[1]) / V[1]) * 100 78 | valc5 = C[1] / ma5 79 | valc10 = C[1] / ma10 80 | valc30 = C[1] / ma30 81 | valc60 = C[1] / ma60 82 | 83 | 84 | 85 | r5 = (C[1] - LLV(C,5))/LLV(C,5) * 100 86 | diff,dea,macd = MACD(C) 87 | HDAY = BARSLASTFIND(C,HHV(C,45)) 88 | if pred_data == 0: 89 | if i == 0: 90 | maxc10 = talib.MAX(allC[-10:],10)[-1] 91 | else : 92 | maxc10 = talib.MAX(allC[-10-i:-i],10)[-1] 93 | target = ((maxc10- allC[-11-i] ) / allC[-i-11] )* 100 94 | else: 95 | target = 0 96 | 97 | tran = TRANSVERSE(C,O,60) #60日波动,<15判定为横盘震荡 98 | 99 | if target > 10: 100 | label = 1 101 | else: 102 | label = 0 103 | #print(C.data[-1],allC[-11-i],maxc10) 104 | print(Kl.currentdf['name'],Kl.currentdf['code'],d,valc5,valc10,valc30,valc60,valc1,valh1,valo1,vall1,valv1,valv40,tran,macd,r5,HDAY,label) 105 | all_list.append([Kl.currentdf['name'],Kl.currentdf['code'],d,valc5,valc10,valc30,valc60,valc1,valh1,valo1,vall1,valv1,valv40,tran,macd,r5,HDAY,label]) 106 | except : 107 | print("Klang ERROR",df['code'],df['name']) 108 | 109 | PrintException() 110 | 111 | fields = ['name','code','日期','5日均线比','10日均线比','30日均线比','60日均线比','C涨幅','H涨幅','O涨幅','L涨幅','V涨幅','40日量比','60日震荡','macd','5日涨幅','45日新高','是否涨幅10%'] 112 | 113 | 114 | main_loop(start=None,endday='2021-07-01') 115 | df = pd.DataFrame(all_list,columns=fields) 116 | df.to_csv('transverse_train'+today+'.csv',index=False) 117 | 118 | 119 | all_list = [] 120 | main_loop(start='2021-07-15',endday=today) 121 | df = pd.DataFrame(all_list,columns=fields) 122 | df.to_csv('transverse_test'+today+'.csv',index=False) 123 | 124 | all_list = [] 125 | pred_data = 1 126 | main_loop(start='2021-07-15',endday=today) 127 | df = pd.DataFrame(all_list,columns=fields) 128 | df.to_csv('transverse_pred'+today+'.csv',index=False) 129 | 130 | -------------------------------------------------------------------------------- /ts_to_csv.py: -------------------------------------------------------------------------------- 1 | import tushare as ts 2 | import argparse 3 | 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument("--code", help="股票代码",default='000856') 6 | parser.add_argument("--start", help="开始日期",default='2019-06-01') 7 | 8 | args = parser.parse_args() 9 | 10 | code = args.code 11 | start = args.start 12 | 13 | df = ts.get_k_data(code,start=start,index=False,ktype='D') 14 | 15 | #选择保存 default first is date 16 | #按照事件升序排序 17 | df = df.set_index('date').sort_index(ascending=True) 18 | 19 | 20 | df.to_csv('./datas/ts_'+code+'.csv',columns=['open', 'high', 'low', 'close', 'volume']) 21 | -------------------------------------------------------------------------------- /wave_abc.py: -------------------------------------------------------------------------------- 1 | # 2 | # python3 talib demo使用用例 3 | # 4 | # talib 是非常有价值的股票计算分析工具 5 | # 有很多写好的公式可以直接使用 6 | # 本例子源代码在github上 7 | # https://github.com/asmcos/quantrader 8 | # python3 wave_abc.py --start="2020-02-18" 9 | import talib 10 | from common.framework import * 11 | import numpy as np 12 | 13 | #金叉 14 | def CrossUp(a,b): 15 | if a[-1] >= b[-1] and a[-2] < b[-2]: 16 | return True 17 | return False 18 | 19 | resultlist = [] 20 | def wave_abc(code,name,datas): 21 | 22 | print(code,name) 23 | 24 | df1 = datas #股票数据 25 | # 数据太少 macd 计算不准确 26 | if len(datas) < 50: 27 | return 28 | 29 | df1['diff'], df1['dea'], df1['macd'] = talib.MACD(df1['close'], fastperiod=12, slowperiod=26, signalperiod=9) 30 | 31 | #计算 0 轴附近,40个周期内的macd最大值 * 0.2 32 | macd0_approx = talib.MAX(np.abs(df1['macd'].values),30)*0.2 33 | #计算 MACD 高点附近 34 | macd1_approx = talib.MAX(np.abs(df1['macd'].values),30)*0.7 35 | 36 | status = 0 37 | for i in range(1,len(df1)-1): 38 | dk = df1.iloc[i-1:i+1] #交叉后一个交易周期 39 | if CrossUp(dk['diff'].values,dk['dea'].values): 40 | if abs(df1['diff'].iloc[i]) < abs(macd0_approx[i]*2) and status == 4: 41 | print(OKBLUE,"0Cross轴金叉",code,name,df1.date.iloc[i],abs(df1['diff'].iloc[i]),ENDC) 42 | resultlist.append([code,name,df1.date.iloc[i]]) 43 | else: 44 | status = 1 45 | #死叉 46 | if CrossUp(dk['dea'].values,dk['diff'].values): 47 | if abs(df1['diff'].iloc[i]) > abs(macd1_approx[i]*2) : 48 | print("高位死叉",df1.date.iloc[i],abs(df1['diff'].iloc[i])) 49 | status = 4 50 | else: 51 | status = 1 52 | 53 | def display(): 54 | for i in resultlist: 55 | print(i) 56 | 57 | if __name__ == "__main__": 58 | init_stock_list() 59 | loop_all(wave_abc) 60 | display() 61 | -------------------------------------------------------------------------------- /xgb_class_predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xgboost as xgb 3 | from xgboost import XGBClassifier 4 | import numpy as np 5 | from xgboost import plot_importance 6 | from matplotlib import pyplot as plt 7 | import requests 8 | import pandas as pd 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.model_selection import GridSearchCV 11 | from sklearn.metrics import accuracy_score 12 | import talib 13 | 14 | from common.framework import save_df_tohtml 15 | 16 | def DisplayOriginalLabel(values): 17 | cnt1 = 0 18 | cnt2 = 0 19 | for i in range(len(values)): 20 | if 1 == values[i] : 21 | cnt1 += 1 22 | else: 23 | cnt2 += 1 24 | 25 | print("origin: %.2f %% " % (100 * cnt1 / (cnt1 + cnt2)),len(values)) 26 | 27 | 28 | 29 | 30 | # 1. 获取数据 31 | 32 | df = pd.read_csv('transverse_train2021-12-14.csv') 33 | df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)] 34 | print(df.columns) 35 | 36 | df1 = df[df['date']<'2021-07-15'] 37 | df2 = df[df['date']>'2021-07-30'] 38 | 39 | 40 | datas = df1 41 | prec = 10 #target 百分比 42 | label = datas['target'].values > prec 43 | label2 = df2['target'].values > prec 44 | print(label) 45 | 46 | DisplayOriginalLabel(label) 47 | 48 | 49 | fields = [ 50 | 'ma10', 51 | 'ma120', 'ma20', 'ma30', 'ma5', 'ma60', 'rise', 'risevol', 52 | 'dea', 'diff', 'macd' ,'oc' ] 53 | 54 | datas = datas.loc[:,fields] 55 | print(datas) 56 | 57 | # 准备预测的数据 58 | # 59 | 60 | #使用sklearn数据 61 | X_train, X_test, y_train, y_test = train_test_split(datas, label, test_size=0.2, random_state=0) 62 | X2_train, X2_test, y2_train, y2_test = train_test_split(df2, label2, test_size=0.4, random_state=0) 63 | 64 | ### fit model for train data 65 | model = XGBClassifier(learning_rate=0.01, 66 | use_label_encoder=False, 67 | booster='gbtree', # 分类树 68 | n_estimators=300, # 树的个数--1000棵树建立xgboost 69 | max_depth= 6, # 树的深度 70 | min_child_weight = 1, # 叶子节点最小权重 71 | gamma=0., # 惩罚项中叶子结点个数前的参数 72 | subsample=0.8, # 随机选择80%样本建立决策树 73 | objective='reg:squarederror', # 指定损失函数 74 | scale_pos_weight=2, # 解决样本个数不平衡的问题 75 | random_state=27, # 随机数 76 | colsample_bytree=0.7, 77 | ) 78 | model.fit(X_train, 79 | y_train, 80 | eval_set = [(X_test,y_test)], 81 | eval_metric=['rmse'], 82 | early_stopping_rounds = 10, 83 | verbose = False) 84 | 85 | # 对测试集进行预测 86 | 87 | ans = model.predict_proba(X2_test.loc[:,fields]) 88 | y_pred = model.predict(X2_test.loc[:,fields]) 89 | accuracy = accuracy_score(y2_test, y_pred) 90 | print("Accuracy: %.2f%%" % (accuracy * 100.0)) 91 | print(y_pred) 92 | print(ans) 93 | 94 | pcnt1 = 0 95 | pcnt2 = 0 96 | for i in range(len(y_pred)): 97 | 98 | if y_pred[i] == 0 or ans[i][1] < 0.5 : 99 | continue 100 | 101 | print(ans[i][1],X2_test['date'].values[i],X2_test['code'].values[i]) 102 | if y_pred[i] == y2_test[i]: 103 | pcnt1 += 1 104 | else: 105 | pcnt2 += 1 106 | DisplayOriginalLabel(y2_test) 107 | print("Accuracy: %.2f %% " % (100 * pcnt1 / (pcnt1 + pcnt2))) 108 | 109 | plot_importance(model) 110 | plt.show() 111 | 112 | 113 | """ 114 | png = xgb.to_graphviz(model,num_trees=0) 115 | png.view("stock.png") 116 | 117 | 118 | preds = pd.read_csv('transverse_pred'+end+'.csv') 119 | preds1 = preds.loc[:,fields] 120 | y_pred = model.predict(preds1) 121 | ans = model.predict_proba(preds1) 122 | pred_list = [] 123 | for i in range(0,len(y_pred)): 124 | if y_pred[i] == 1 and ans[i][1] > 0.6: #and preds['日期'].values[i] > '2021-11-01': 125 | print(preds['name'].values[i],preds['code'].values[i],preds['日期'].values[i],y_pred[i]) 126 | pred_list.append([preds['name'].values[i],preds['code'].values[i],preds['日期'].values[i]]) 127 | 128 | df_pred = pd.DataFrame(pred_list,columns=['name','code','日期']) 129 | 130 | print('file://'+os.getcwd()+ '/' + './datas/tree_pred'+end+'.html' ) 131 | save_df_tohtml('./datas/tree_pred'+end+'.html',df_pred) 132 | 133 | 134 | 135 | #显示 136 | #plot_importance(model) 137 | #plt.show() 138 | """ 139 | -------------------------------------------------------------------------------- /zigzag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from scipy import signal 4 | import numpy as np 5 | import pandas as pd 6 | 7 | ## Klang 8 | from Klang import Kl,Klang 9 | Klang.Klang_init() 10 | ## 11 | 12 | # 13 | # load stock data by code 14 | # 15 | import sys 16 | 17 | codename = "sh.600010" 18 | 19 | if len(sys.argv)>1: 20 | codename = sys.argv[1] 21 | display = 1 22 | if len(sys.argv)>2: 23 | display = int(sys.argv[2]) 24 | 25 | Kl.code(codename) 26 | print(codename,Kl.cur_name) 27 | loaded_data = Kl.day_df 28 | 29 | #datetime types 30 | loaded_data['datetime'] = pd.to_datetime(loaded_data['datetime']) 31 | 32 | 33 | # Create zigzag trendline. 34 | ######################################## 35 | # Find peaks(max). 36 | data_x = loaded_data['datetime'].values 37 | data_y = loaded_data['close'].values 38 | peak_indexes = signal.argrelextrema(data_y, np.greater) 39 | peak_indexes = peak_indexes[0] 40 | 41 | # Find valleys(min). 42 | valley_indexes = signal.argrelextrema(data_y, np.less) 43 | valley_indexes = valley_indexes[0] 44 | 45 | 46 | # Merge peaks and valleys data points using pandas. 47 | df_peaks = pd.DataFrame({'datetime': data_x[peak_indexes], 'zigzag_y': data_y[peak_indexes]}) 48 | 49 | df_valleys = pd.DataFrame({'datetime': data_x[valley_indexes], 'zigzag_y': data_y[valley_indexes]}) 50 | 51 | 52 | from bokeh.plotting import figure, output_file, show 53 | 54 | 55 | TOOLTIPS = [ 56 | ("index", "$index"), 57 | ("(y)", "($y)"), 58 | ] 59 | 60 | output_file("images/" + codename+"_zigzag.html",title="Klang zigzag") 61 | graph = figure(title = codename + "-" + Kl.cur_name + ' Prices - ZigZag trendline', 62 | x_axis_type="datetime",width=1200, height=400,toolbar_location="above",tooltips = TOOLTIPS) 63 | 64 | # name of the x-axis 65 | graph.xaxis.axis_label = "日期" 66 | 67 | # name of the y-axis 68 | graph.yaxis.axis_label = "价格" 69 | 70 | graph.line(data_x, data_y,line_dash = "dotdash",line_color="black") 71 | 72 | graph.line(df_valleys['datetime'].values, df_valleys['zigzag_y'].values,line_color="blue") 73 | 74 | graph.line(df_peaks['datetime'].values, df_peaks['zigzag_y'].values,line_color="red") 75 | 76 | show(graph) 77 | 78 | #from bokeh.io import export_png 79 | 80 | #export_png(graph, filename="images/" + codename+"_zigzag.png") 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /zigzag_lib.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Numba can be a pain to install. If you do not have numba, the functions 4 | # will be left in their original, uncompiled form. 5 | try: 6 | from numba import jit 7 | except ImportError: 8 | def jit(_): 9 | def _f(f): 10 | return f 11 | return _f 12 | 13 | PEAK, VALLEY = 1, -1 14 | 15 | 16 | @jit('i8(f8[:],f8,f8)') 17 | def _identify_initial_pivot(X, up_thresh, down_thresh): 18 | """Quickly identify the X[0] as a peak or valley.""" 19 | x_0 = X[0] 20 | max_x = x_0 21 | max_t = 0 22 | min_x = x_0 23 | min_t = 0 24 | up_thresh += 1 25 | down_thresh += 1 26 | 27 | for t in range(1, len(X)): 28 | x_t = X[t] 29 | 30 | if x_t / min_x >= up_thresh: 31 | return VALLEY if min_t == 0 else PEAK 32 | 33 | if x_t / max_x <= down_thresh: 34 | return PEAK if max_t == 0 else VALLEY 35 | 36 | if x_t > max_x: 37 | max_x = x_t 38 | max_t = t 39 | 40 | if x_t < min_x: 41 | min_x = x_t 42 | min_t = t 43 | 44 | t_n = len(X)-1 45 | return VALLEY if x_0 < X[t_n] else PEAK 46 | 47 | 48 | @jit('i1[:](f8[:],f8,f8)') 49 | def peak_valley_pivots(X, up_thresh, down_thresh): 50 | """ 51 | Finds the peaks and valleys of a series. 52 | 53 | Parameters 54 | ---------- 55 | X : This is your series. 56 | up_thresh : The minimum relative change necessary to define a peak. 57 | down_thesh : The minimum relative change necessary to define a valley. 58 | 59 | Returns 60 | ------- 61 | an array with 0 indicating no pivot and -1 and 1 indicating valley and peak 62 | respectively 63 | 64 | Using Pandas 65 | ------------ 66 | For the most part, X may be a pandas series. However, the index must 67 | either be [0,n) or a DateTimeIndex. Why? This function does X[t] to access 68 | each element where t is in [0,n). 69 | 70 | The First and Last Elements 71 | --------------------------- 72 | The first and last elements are guaranteed to be annotated as peak or 73 | valley even if the segments formed do not have the necessary relative 74 | changes. This is a tradeoff between technical correctness and the 75 | propensity to make mistakes in data analysis. The possible mistake is 76 | ignoring data outside the fully realized segments, which may bias analysis. 77 | """ 78 | if down_thresh > 0: 79 | raise ValueError('The down_thresh must be negative.') 80 | 81 | initial_pivot = _identify_initial_pivot(X, up_thresh, down_thresh) 82 | 83 | t_n = len(X) 84 | pivots = np.zeros(t_n, dtype='i1') 85 | pivots[0] = initial_pivot 86 | 87 | # Adding one to the relative change thresholds saves operations. Instead 88 | # of computing relative change at each point as x_j / x_i - 1, it is 89 | # computed as x_j / x_1. Then, this value is compared to the threshold + 1. 90 | # This saves (t_n - 1) subtractions. 91 | up_thresh += 1 92 | down_thresh += 1 93 | 94 | trend = -initial_pivot 95 | last_pivot_t = 0 96 | last_pivot_x = X[0] 97 | for t in range(1, len(X)): 98 | x = X[t] 99 | r = x / last_pivot_x 100 | 101 | if trend == -1: 102 | if r >= up_thresh: 103 | pivots[last_pivot_t] = trend 104 | trend = 1 105 | last_pivot_x = x 106 | last_pivot_t = t 107 | elif x < last_pivot_x: 108 | last_pivot_x = x 109 | last_pivot_t = t 110 | else: 111 | if r <= down_thresh: 112 | pivots[last_pivot_t] = trend 113 | trend = -1 114 | last_pivot_x = x 115 | last_pivot_t = t 116 | elif x > last_pivot_x: 117 | last_pivot_x = x 118 | last_pivot_t = t 119 | 120 | if last_pivot_t == t_n-1: 121 | pivots[last_pivot_t] = trend 122 | elif pivots[t_n-1] == 0: 123 | pivots[t_n-1] = -trend 124 | 125 | return pivots 126 | 127 | 128 | def compute_segment_returns(X, pivots): 129 | """Return a numpy array of the pivot-to-pivot returns for each segment.""" 130 | pivot_points = X[pivots != 0] 131 | return pivot_points[1:] / pivot_points[:-1] - 1.0 132 | 133 | 134 | @jit('f8(f8[:])') 135 | def max_drawdown(X): 136 | """ 137 | Return the absolute value of the maximum drawdown of sequence X. 138 | 139 | Note 140 | ---- 141 | If the sequence is strictly increasing, 0 is returned. 142 | """ 143 | mdd = 0 144 | peak = X[0] 145 | for x in X: 146 | if x > peak: 147 | peak = x 148 | dd = (peak - x) / peak 149 | if dd > mdd: 150 | mdd = dd 151 | return mdd 152 | 153 | 154 | @jit('i1[:](i1[:])') 155 | def pivots_to_modes(pivots): 156 | """ 157 | Translate pivots into trend modes. 158 | 159 | Parameters 160 | ---------- 161 | pivots : the result of calling peak_valley_pivots 162 | 163 | Returns 164 | ------- 165 | A numpy array of trend modes. That is, between (VALLEY, PEAK] it is 1 and 166 | between (PEAK, VALLEY] it is -1. 167 | """ 168 | modes = np.zeros(len(pivots), dtype='i1') 169 | modes[0] = pivots[0] 170 | mode = -modes[0] 171 | for t in range(1, len(pivots)): 172 | x = pivots[t] 173 | if x != 0: 174 | modes[t] = mode 175 | mode = -x 176 | else: 177 | modes[t] = mode 178 | return modes 179 | -------------------------------------------------------------------------------- /zigzag_plt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import matplotlib 3 | 4 | # Avoid FutureWarning: Pandas will require you to explicitly register matplotlib converters. 5 | from pandas.plotting import register_matplotlib_converters 6 | register_matplotlib_converters() 7 | 8 | 9 | from scipy import signal 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import matplotlib.dates as mdates 13 | 14 | import pandas as pd 15 | 16 | ## Klang 17 | from Klang import Kl,Klang 18 | Klang.Klang_init() 19 | ## 20 | 21 | #设置字体 ,显示股票中文名称 22 | matplotlib.rcParams["font.sans-serif"] = ['AR PL UKai CN'] 23 | 24 | 25 | # 26 | # load stock data by code 27 | # 28 | import sys 29 | 30 | codename = "sh.600010" 31 | 32 | if len(sys.argv)>1: 33 | codename = sys.argv[1] 34 | show = 1 35 | if len(sys.argv)>2: 36 | show = int(sys.argv[2]) 37 | 38 | Kl.code(codename) 39 | print(codename,Kl.cur_name) 40 | loaded_data = Kl.day_df 41 | #print(loaded_data) 42 | 43 | # Convert 'Timestamp' to 'float'. 44 | # Need time be in float days format - see date2num. 45 | loaded_data['datetime'] = [mdates.date2num(np.datetime64(d)) for d in loaded_data['datetime']] 46 | 47 | 48 | 49 | # Create zigzag trendline. 50 | ######################################## 51 | # Find peaks(max). 52 | data_x = loaded_data['datetime'].values 53 | data_y = loaded_data['close'].values 54 | peak_indexes = signal.argrelextrema(data_y, np.greater) 55 | peak_indexes = peak_indexes[0] 56 | 57 | # Find valleys(min). 58 | valley_indexes = signal.argrelextrema(data_y, np.less) 59 | valley_indexes = valley_indexes[0] 60 | 61 | # Instantiate axes. 62 | (fig, ax) = plt.subplots( figsize=(21, 7) ) 63 | 64 | # Merge peaks and valleys data points using pandas. 65 | df_peaks = pd.DataFrame({'datetime': data_x[peak_indexes], 'zigzag_y': data_y[peak_indexes]}) 66 | 67 | df_valleys = pd.DataFrame({'datetime': data_x[valley_indexes], 'zigzag_y': data_y[valley_indexes]}) 68 | 69 | # Plot zigzag trendline. 70 | ax.plot(df_peaks['datetime'].values, df_peaks['zigzag_y'].values, 71 | color='red', label="zigzag_peak") 72 | 73 | ax.plot(df_valleys['datetime'].values, df_valleys['zigzag_y'].values, 74 | color='blue', label="zigzag_valley") 75 | # Plot close price line. 76 | ax.plot(data_x, data_y, linestyle='dashed', color='black', label="Close Price", linewidth=1) 77 | 78 | 79 | 80 | # Customize graph. 81 | ########################## 82 | plt.xlabel('Date') 83 | plt.ylabel('Price') 84 | plt.title( codename + "-" + Kl.cur_name + ' Prices - ZigZag trendline') 85 | 86 | # Format time. 87 | ax.xaxis_date() 88 | ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) 89 | 90 | plt.gcf().autofmt_xdate() # Beautify the x-labels 91 | plt.autoscale(tight=True) 92 | 93 | plt.legend(loc='best') 94 | plt.grid(True, linestyle='dashed') 95 | 96 | plt.savefig("images/" + codename+"_zigzag.png",dpi=200,bbox_inches='tight') 97 | 98 | if show == 1: 99 | plt.show() 100 | 101 | display = 0 102 | if display == 1: 103 | from bokeh.plotting import figure, output_file, show 104 | 105 | output_file("images/" + codename+"_zigzag.html") 106 | graph = figure(title = codename + "-" + Kl.cur_name + ' Prices - ZigZag trendline',width=1200,height=400) 107 | 108 | # name of the x-axis 109 | graph.xaxis.axis_label = "日期" 110 | 111 | # name of the y-axis 112 | graph.yaxis.axis_label = "价格" 113 | 114 | graph.line(data_x, data_y,line_dash = "dotdash",line_color="black") 115 | 116 | graph.line(df_valleys['datetime'].values, df_valleys['zigzag_y'].values,line_color="blue") 117 | 118 | graph.line(df_peaks['datetime'].values, df_peaks['zigzag_y'].values,line_color="red") 119 | show(graph) 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /zigzag_stock.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from zigzag_lib import peak_valley_pivots, max_drawdown, compute_segment_returns, pivots_to_modes 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib 6 | import math 7 | #设置字体 ,显示股票中文名称 8 | matplotlib.rcParams["font.sans-serif"] = ['AR PL UKai CN'] 9 | 10 | ## Klang 11 | from Klang import Kl,Klang,APPROX 12 | Klang.Klang_init() 13 | ## 14 | 15 | # 16 | # load stock data by code 17 | # 18 | import sys 19 | 20 | codename = "sh.600010" 21 | 22 | if len(sys.argv)>1: 23 | codename = sys.argv[1] 24 | display = 1 25 | if len(sys.argv)>2: 26 | display = int(sys.argv[2]) 27 | 28 | offset = 100 29 | if len(sys.argv)>3: 30 | offset = int(sys.argv[3]) 31 | 32 | Kl.code(codename) 33 | print(codename,Kl.cur_name) 34 | loaded_data = Kl.day_df.iloc[offset:] 35 | 36 | 37 | # Instantiate axes. 38 | (fig, ax) = plt.subplots( figsize=(21, 7) ) 39 | 40 | def plot_pivots(X, pivots): 41 | plt.xlim(0, len(X)) 42 | plt.ylim(X.min()*0.99, X.max()*1.01) 43 | ax.plot(np.arange(len(X)), X, 'k:', alpha=0.5) 44 | ax.plot(np.arange(len(X))[pivots != 0], X[pivots != 0], 'k-') 45 | plt.scatter(np.arange(len(X))[pivots == 1], X[pivots == 1], color='r') 46 | plt.scatter(np.arange(len(X))[pivots == -1], X[pivots == -1], color='g') 47 | 48 | 49 | def create_index(pivots): 50 | index_list = [] 51 | for i in range(0,len(pivots)): 52 | if pivots[i] != 0: 53 | index_list.append(i) 54 | return index_list 55 | 56 | def calc_data(data_x): 57 | pivots = peak_valley_pivots(data_x, 0.02, -0.02) 58 | plot_pivots(data_x,pivots) 59 | return pivots 60 | 61 | # 尾部为上升趋势 62 | def pattern_tail_rise(): 63 | if pivots[pv_index[-1]] == 1 and pivots[pv_index[-2]] == -1: 64 | return 1 65 | return 0 66 | 67 | # 杯和柄模式 68 | def pattern_cup_handle(): 69 | if len(pv_index) < 6: 70 | return 0 71 | close = loaded_data['close'].values 72 | ret = 0 73 | for i in range(0,len(pv_index)-6): 74 | x1 = pv_index[i] 75 | a = pv_index[i+1] 76 | b = pv_index[i+2] 77 | c = pv_index[i+3] 78 | d = pv_index[i+4] 79 | e = pv_index[i+5] 80 | # a,c 杯沿差不多高,杯底b,比杯柄低,回调不能超过杯底部1/3 81 | ab = close[a] - close[b] 82 | cb = close[c] - close[b] 83 | cd = close[c] - close[d] 84 | if pivots[x1] == -1 and abs(ab-cb)/cb < 0.15 and \ 85 | close[b] < close[d] and \ 86 | cb / 3 > cd: 87 | ax.text(a+1,close[a],"<-A") 88 | ax.text(c+1,close[c],"<-C") 89 | ax.annotate('', xy=(c, close[c]),xytext=(b, close[b]),\ 90 | arrowprops=dict(color='blue', arrowstyle='-',connectionstyle="arc3,rad=0.4")) 91 | ax.annotate('', xy=(b, close[b]),xytext=(a, close[a]),\ 92 | arrowprops=dict(color='blue', arrowstyle='-',connectionstyle="arc3,rad=0.4")) 93 | ret = 1 94 | 95 | return ret 96 | 97 | # W 底部 98 | def pattern_w_bottom(): 99 | if len(pv_index) < 5: 100 | return 0 101 | ret = 0 102 | close = loaded_data['close'].values 103 | for i in range(0,len(pv_index)-5): 104 | a = pv_index[i] 105 | b = pv_index[i+1] 106 | c = pv_index[i+2] 107 | d = pv_index[i+3] 108 | e = pv_index[i+4] 109 | ab = close[a] - close[b] 110 | ad = close[a] - close[d] 111 | 112 | # b,d 为双底,a,e为顶 113 | if pivots[a] == 1 and APPROX(ab,ad,0.05) and \ 114 | ab / close[b] > 0.2 : 115 | ax.text(b+1,close[b],"<-B") 116 | ax.text(d+1,close[d],"<-D") 117 | ret = 1 118 | return ret 119 | 120 | # 三次底部 121 | def pattern_triple_bottom(): 122 | if len(pv_index) < 6: 123 | return 0 124 | ret = 0 125 | close = loaded_data['close'].values 126 | for i in range(0,len(pv_index)-6): 127 | a = pv_index[i] 128 | b = pv_index[i+1] 129 | c = pv_index[i+2] 130 | d = pv_index[i+3] 131 | e = pv_index[i+4] 132 | f = pv_index[i+5] 133 | ab = close[a] - close[b] 134 | ad = close[a] - close[d] 135 | af = close[a] - close[f] 136 | 137 | # b,d,f 为三底,a,e为顶 138 | if pivots[a] == 1 and APPROX(ab,ad,0.05) and \ 139 | APPROX(ab,af,0.05) and ab / close[b] > 0.2 : 140 | ax.text(b+1,close[b],"<-B") 141 | ax.text(d+1,close[d],"<-D") 142 | ax.text(f+1,close[d],"<-F") 143 | ret = 1 144 | return ret 145 | 146 | pivots = calc_data(loaded_data['close'].values) 147 | pv_index = create_index(pivots) 148 | 149 | if pattern_triple_bottom() == 1: 150 | plt.title( codename + "-" + Kl.cur_name + ' Prices - ZigZag trendline') 151 | plt.grid(True, linestyle='dashed') 152 | plt.savefig("images/" + codename + "_" + str(len(loaded_data['close'].values))+ "_zigzag.png",dpi=100,bbox_inches='tight') 153 | 154 | if display : 155 | plt.show() 156 | 157 | --------------------------------------------------------------------------------