├── # 验证小市值结合盈利指标过滤策略.ipynb ├── .gitignore ├── 002475.csv ├── 600256.csv ├── README.md ├── fft-filter.ipynb ├── getstockcsv.py ├── newfeature.py ├── singlelstm.ipynb ├── singlelstm2.ipynb ├── sklearn机器学习单票回测.ipynb ├── xmm.csv └── 保存K线数据到数据库.ipynb /# 验证小市值结合盈利指标过滤策略.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 验证小市值结合盈利指标过滤策略" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2019-10-07T01:19:15.137015Z", 16 | "start_time": "2019-10-07T01:19:13.865910Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "#常规库引用\n", 22 | "import datetime\n", 23 | "import tushare as ts\n", 24 | "# import rqalpha_data\n", 25 | "import pandas as pd\n", 26 | "import numpy as np\n", 27 | "import tushare as ts\n", 28 | "import baostock as bs\n", 29 | "import talib as talib\n", 30 | "import seaborn as sns\n", 31 | "import math\n", 32 | "import statsmodels.tsa.stattools as stattool\n", 33 | "import matplotlib.pyplot as plt \n", 34 | "plt.rcParams['figure.figsize'] = (18.0, 6.0)\n", 35 | "from sqlalchemy import create_engine\n", 36 | "\n", 37 | "engine = create_engine(\"mysql+pymysql://{}:{}@{}/{}?charset={}\".format('root', '12345678', '127.0.0.1:3306', 'finance','UTF8MB4'))\n", 38 | "conn = engine.connect()#创建连接" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 30, 44 | "metadata": { 45 | "ExecuteTime": { 46 | "end_time": "2019-10-07T02:01:50.459824Z", 47 | "start_time": "2019-10-07T02:00:38.800072Z" 48 | } 49 | }, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "(1332913, 24)\n", 56 | "(228430, 24)\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "datas=pd.DataFrame()\n", 62 | "#flowmkt<=40,大概是50%分位数,为了减少数据量提前过滤\n", 63 | "for sscode in ['000','001','002','300','600','601','603'][:]:\n", 64 | " datas=pd.concat([datas,pd.read_sql('select * from k'+sscode+' where date>\"2016-01-01\" \\\n", 65 | " AND isST=0 AND eprofit>1 AND flowmkt<=60 AND esale>0 \\\n", 66 | " AND pctChg>-9.7 AND pctChg<=9.7 AND turn>=0.5 order by date \\\n", 67 | " ',con=conn)],axis=0)\n", 68 | "print(datas.shape)\n", 69 | "datas=datas[datas.groupby('date')['flowmkt'].rank()<=250]\n", 70 | "print(datas.shape)\n" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 31, 76 | "metadata": { 77 | "ExecuteTime": { 78 | "end_time": "2019-10-07T02:01:51.961805Z", 79 | "start_time": "2019-10-07T02:01:51.635186Z" 80 | } 81 | }, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "(228430, 15)\n", 88 | "(182782, 15) (228430, 15)\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "n=200\n", 94 | "sdata=datas\n", 95 | "# sdata['amo']=sdata.close/sdata.emoney\n", 96 | "sdata['ape']=sdata.close/sdata.eprofit\n", 97 | "sdata['asa']=sdata.close/sdata.esale\n", 98 | "sdata['result2']=sdata.result\n", 99 | "sdata.drop(['close','open','preclose','high','low','amount','volume','isST','eb','emoney','esale','result'],axis=1,inplace=True)\n", 100 | "print(datas.shape)\n", 101 | "datas=datas[datas.groupby('date')['ape'].rank()<=n]\n", 102 | "print(datas.shape,sdata.shape)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 39, 108 | "metadata": { 109 | "ExecuteTime": { 110 | "end_time": "2019-10-07T02:03:06.363597Z", 111 | "start_time": "2019-10-07T02:03:05.816182Z" 112 | } 113 | }, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "[-193.11, -110.04, -40.03, 88.52]\n", 120 | "[343.01, 399.55, 122.52, 94.81]\n", 121 | "[396.47, 439.14, 184.96, 81.99]\n", 122 | "[419.43, 486.3, 126.51, 72.1]\n", 123 | "[249.88, 350.18, 53.26, 74.1]\n" 124 | ] 125 | }, 126 | { 127 | "data": { 128 | "image/png": "\n", 129 | "text/plain": [ 130 | "
" 131 | ] 132 | }, 133 | "metadata": { 134 | "needs_background": "light" 135 | }, 136 | "output_type": "display_data" 137 | } 138 | ], 139 | "source": [ 140 | "stock_data=datas[:-40].copy()\n", 141 | "\n", 142 | "output=pd.DataFrame()\n", 143 | "output['monthresult']=sdata.groupby('date')['result5'].mean()\n", 144 | "# print(stock_data.shape,stock_data.columns)\n", 145 | "i,j,k=int(n*0.95),0.3,2\n", 146 | "turnmin,turnmax,volrmin,volrmax=2,8,6,20\n", 147 | "stock_data=stock_data[stock_data['turn']>turnmin] \n", 148 | "stock_data=stock_data[stock_data['turn']volrmin*0.1] \n", 150 | "stock_data=stock_data[stock_data['volratio']\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mstock_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'turn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mturnmax\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mstock_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'volratio'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0mvolrmin\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mstock_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'volratio'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mvolrmax\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0mstock_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'date'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'asa'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrank\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mstock_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstock_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'date'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'ape'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrank\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 217 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "for turnmin in range(0,4,1):#4\n", 223 | " for turnmax in range(8,20,3):#4\n", 224 | " for volrmin in range(5,10,1):#4\n", 225 | " for volrmax in range(20,28,2):#4\n", 226 | " results=[]\n", 227 | " for i in [0.5,0.6,0.7,0.8,0.9]:#8\n", 228 | " for j in [0.5,0.6,0.7,0.8,0.9]:#5\n", 229 | " for k in range(1,5,1): #4\n", 230 | "\n", 231 | " stock_data=datas[:-40].copy()\n", 232 | " stock_data=stock_data[stock_data['turn']>turnmin] \n", 233 | " stock_data=stock_data[stock_data['turn']volrmin*0.1] \n", 235 | " stock_data=stock_data[stock_data['volratio']\n", 117 | "\n", 130 | "\n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | "
datecodeopenhighlowclosepreclosevolumeamount
02015-03-02sz.00000114.030014.090013.870014.030013.99001018797001423603264.0000
12015-03-03sz.00000113.980013.980013.590013.600014.03001059476291457305040.0000
22015-03-04sz.00000113.620013.720013.500013.570013.6000814973041108145424.0000
32015-03-05sz.00000113.500013.530013.290013.380013.5700828604851109151968.0000
42015-03-06sz.00000113.370013.510013.350013.440013.380055704774748959120.0000
\n", 208 | "" 209 | ], 210 | "text/plain": [ 211 | " date code open high low close preclose \\\n", 212 | "0 2015-03-02 sz.000001 14.0300 14.0900 13.8700 14.0300 13.9900 \n", 213 | "1 2015-03-03 sz.000001 13.9800 13.9800 13.5900 13.6000 14.0300 \n", 214 | "2 2015-03-04 sz.000001 13.6200 13.7200 13.5000 13.5700 13.6000 \n", 215 | "3 2015-03-05 sz.000001 13.5000 13.5300 13.2900 13.3800 13.5700 \n", 216 | "4 2015-03-06 sz.000001 13.3700 13.5100 13.3500 13.4400 13.3800 \n", 217 | "\n", 218 | " volume amount \n", 219 | "0 101879700 1423603264.0000 \n", 220 | "1 105947629 1457305040.0000 \n", 221 | "2 81497304 1108145424.0000 \n", 222 | "3 82860485 1109151968.0000 \n", 223 | "4 55704774 748959120.0000 " 224 | ] 225 | }, 226 | "execution_count": 29, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "code='000001'\n", 233 | "start_date='2015-03-01'\n", 234 | "end_date='2019-03-01'\n", 235 | "price=getDataFromBaostock(code,start_date,end_date)\n", 236 | "price.head()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 73, 242 | "metadata": { 243 | "ExecuteTime": { 244 | "end_time": "2019-08-07T23:58:08.995969Z", 245 | "start_time": "2019-08-07T23:58:08.766082Z" 246 | } 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "" 253 | ] 254 | }, 255 | "execution_count": 73, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | }, 259 | { 260 | "data": { 261 | "image/png": "\n", 262 | "text/plain": [ 263 | "
" 264 | ] 265 | }, 266 | "metadata": { 267 | "needs_background": "light" 268 | }, 269 | "output_type": "display_data" 270 | } 271 | ], 272 | "source": [ 273 | "from scipy import signal\n", 274 | "data=pd.to_numeric(price['close'])\n", 275 | "b, a = signal.butter(1, 0.1, 'lowpass') \n", 276 | "# b1, a1 = signal.butter(8, 0.1, 'lowpass') \n", 277 | "# wn=2*滤波频率/采样频率,如果是五日平均,则采用2*1/5=0.4\n", 278 | "meandata=data.rolling(10).mean().shift(-5)\n", 279 | "filtedData = signal.filtfilt(b, a, data) #data为要过滤的信号\n", 280 | "filtedData1 = signal.filtfilt(b, a, data[:-10]) #data为要过滤的信号\n", 281 | "plt.plot(filtedData[-100:],label='filter')\n", 282 | "plt.plot(filtedData1[-90:],label='filter')\n", 283 | "plt.plot(meandata.values[-100:],label='mean')\n", 284 | "plt.plot(data.values[-100:],label='orig')\n", 285 | "plt.legend()" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "ExecuteTime": { 293 | "end_time": "2019-08-07T23:27:43.750018Z", 294 | "start_time": "2019-08-07T23:27:43.495314Z" 295 | } 296 | }, 297 | "outputs": [], 298 | "source": [] 299 | } 300 | ], 301 | "metadata": { 302 | "hide_input": false, 303 | "kernelspec": { 304 | "display_name": "Python 3", 305 | "language": "python", 306 | "name": "python3" 307 | }, 308 | "language_info": { 309 | "codemirror_mode": { 310 | "name": "ipython", 311 | "version": 3 312 | }, 313 | "file_extension": ".py", 314 | "mimetype": "text/x-python", 315 | "name": "python", 316 | "nbconvert_exporter": "python", 317 | "pygments_lexer": "ipython3", 318 | "version": "3.7.4" 319 | }, 320 | "latex_envs": { 321 | "LaTeX_envs_menu_present": true, 322 | "autoclose": false, 323 | "autocomplete": true, 324 | "bibliofile": "biblio.bib", 325 | "cite_by": "apalike", 326 | "current_citInitial": 1, 327 | "eqLabelWithNumbers": true, 328 | "eqNumInitial": 1, 329 | "hotkeys": { 330 | "equation": "Ctrl-E", 331 | "itemize": "Ctrl-I" 332 | }, 333 | "labels_anchors": false, 334 | "latex_user_defs": false, 335 | "report_style_numbering": false, 336 | "user_envs_cfg": false 337 | }, 338 | "toc": { 339 | "base_numbering": 1, 340 | "nav_menu": {}, 341 | "number_sections": true, 342 | "sideBar": true, 343 | "skip_h1_title": false, 344 | "title_cell": "Table of Contents", 345 | "title_sidebar": "Contents", 346 | "toc_cell": false, 347 | "toc_position": {}, 348 | "toc_section_display": true, 349 | "toc_window_display": false 350 | }, 351 | "varInspector": { 352 | "cols": { 353 | "lenName": 16, 354 | "lenType": 16, 355 | "lenVar": 40 356 | }, 357 | "kernels_config": { 358 | "python": { 359 | "delete_cmd_postfix": "", 360 | "delete_cmd_prefix": "del ", 361 | "library": "var_list.py", 362 | "varRefreshCmd": "print(var_dic_list())" 363 | }, 364 | "r": { 365 | "delete_cmd_postfix": ") ", 366 | "delete_cmd_prefix": "rm(", 367 | "library": "var_list.r", 368 | "varRefreshCmd": "cat(var_dic_list()) " 369 | } 370 | }, 371 | "types_to_exclude": [ 372 | "module", 373 | "function", 374 | "builtin_function_or_method", 375 | "instance", 376 | "_Feature" 377 | ], 378 | "window_display": false 379 | } 380 | }, 381 | "nbformat": 4, 382 | "nbformat_minor": 2 383 | } 384 | -------------------------------------------------------------------------------- /getstockcsv.py: -------------------------------------------------------------------------------- 1 | import newfeature 2 | import numpy as np 3 | import pandas as pd 4 | import tushare as ts 5 | import baostock as bs 6 | import talib 7 | import time 8 | import bcolz 9 | 10 | today=time.strftime('%Y-%m-%d',time.localtime(time.time())) 11 | 12 | def get_quantiles(data,x): 13 | bins=np.r_[-1e100,[np.round(np.quantile(data, i/x),3) for i in range(1,x)],1e100] # [负无穷,data中result这列的1,1/2,1/3...,正无穷] 14 | return bins 15 | 16 | def precode(c): #在代码后载入sz,sh,6开头代表上证,加sh.前缀 17 | return 'sh.'+c if c[0:1]=='6' else 'sz.'+c 18 | 19 | 20 | def cut_to1(alpha,minx=-1,maxx=1): 21 | alpha=alpha.copy() 22 | alpha[alpha>maxx]=maxx # 涨幅超过多少,则截取, 应当是在对异常值进行处理 23 | alpha[alpha50: 38 | 39 | result=result.sort_values('date').set_index('date').fillna(0) 40 | result=result[result['volume']!='0'] # 剔除交易量为0数据(即停牌日期数据) 41 | result=result[result['volume']!=''] 42 | result=result.astype('float32') 43 | if index==0: # 拿个股数据,才执行 44 | result=result[result['psTTM']>0] # 剔除动态市盈率小于0数据 45 | 46 | # 特征工程,通过老特征组合,获得新增特征 47 | # ??????? 48 | result['flowmkt']=0.637*np.arctan(np.log(0.00000001*result['close']*result['volume']/result['turn'])) 49 | result['everyprofit']=0.637*np.arctan(10/result.peTTM) 50 | result['turn']=0.637*np.arctan((result.turn/10)**0.2) 51 | result['everypb']=0.637*np.arctan(2/result.pbMRQ) 52 | result['eversale']=0.637*np.arctan(2/np.sqrt(result.psTTM)) 53 | result['evermoney']=0.637*np.arctan(20/result.pcfNcfTTM) 54 | result.fillna(0,inplace=True) 55 | # result.fillna(0,inplace=True) 56 | 57 | # result['p_change']=result['pctChg'] 58 | # print(symbol,result.shape) 59 | 60 | return result 61 | else: 62 | return [] 63 | 64 | def saveData(path,symbol,data,szzs,days,binscount,r=1,isfeature=1): 65 | """ 66 | path:文件存储路径 67 | symbol:股票代码 68 | data:个股数据 69 | szzs:上证指数数据 70 | days:默认为5,代表交易日 71 | 72 | 73 | """ 74 | #如果是用talib、newfeatures计算的数据,需要解除前120条数据 75 | data.dropna(axis=0,inplace=True) 76 | # data['amount']=0.25*(data.open+data.close+data.high+data.low)*data['volume'] 77 | data_price=data.values 78 | 79 | # 特征工程,组合生成新特征(有些特征往上证指数szzs数据加,又通过szzs新特征算出个股新特征,有些往个股数据加) 80 | data['szpctchg1']=18*szzs['close']/szzs['close'].shift(1)-18 # 今日收盘/昨日收盘 - 1 即18*(当日涨幅 - 1) 81 | data['szpctchg2']=19*szzs['close'].rolling(2).mean()/szzs['close'].shift(2)-19 # 19*(前3日收盘价均值(含当日)/前2日收盘价 - 1) 82 | data['szpctchg4']=10*szzs['close'].rolling(2).mean()/szzs['close'].shift(4)-10 # 10*(前3日收盘价均值(含当日)/前4日收盘价 - 1) 83 | data['szpctchg8']=10*szzs['close'].rolling(4).mean()/szzs['close'].shift(8)-10 # 10*(前4日收盘价均值(含当日)/前8日收盘价 - 1) 84 | data['szpctchg15']=8*szzs['close'].rolling(5).mean()/szzs['close'].shift(15)-8 85 | data['szpctchg30']=6*szzs['close'].rolling(10).mean()/szzs['close'].shift(30)-6 86 | 87 | 88 | szzs['sma5']=szzs['close'].rolling(5).mean() # 大盘5日均线 89 | szzs['sma10']=szzs['close'].rolling(10).mean() 90 | szzs['sma20']=szzs['close'].rolling(20).mean() 91 | szzs['sma40']=szzs['close'].rolling(40).mean() 92 | szzs['sma80']=szzs['close'].rolling(80).mean() 93 | 94 | szzs['max20']=szzs['high'].rolling(20).max().shift() # 大盘20日最高价 95 | szzs['max60']=szzs['high'].rolling(60).max().shift() 96 | szzs['max120']=szzs['high'].rolling(120).max().shift() 97 | 98 | szzs['min20']=szzs['low'].rolling(20).min().shift() # 大盘20日最低价 99 | szzs['min60']=szzs['low'].rolling(60).min().shift() 100 | szzs['min120']=szzs['low'].rolling(120).min().shift() 101 | 102 | data['zs20_min']=cut_to1(2*szzs['close']/szzs['min20']-2)### 大盘2倍的20日内较最低价的涨幅(涨幅超50截取,或跌幅超50%被截取) 103 | data['zs60_min']=cut_to1(2*szzs['close']/szzs['min60']-2)### 104 | data['zs120_min']=cut_to1(2*szzs['close']/szzs['min120']-2)### 105 | 106 | data['zs20_max']=cut_to1(4*szzs['close']/szzs['max20']-4)### 大盘4倍的20日内较最高价的跌幅(涨幅超过25%截取,跌幅超过25%截取) 107 | data['zs60_max']=cut_to1(4*szzs['close']/szzs['max60']-4)### 108 | data['zs120_max']=cut_to1(4*szzs['close']/szzs['max120']-4)### 109 | 110 | data['zs5_d']=cut_to1(20*szzs['sma5'].diff()/szzs['sma5'].shift()) # 大盘5日均线差分/昨日5日均线值,涨跌幅限制-5% ~ 5%截取 111 | data['zs10_d']=cut_to1(40*szzs['sma10'].diff()/szzs['sma10'].shift()) 112 | data['zs20_d']=cut_to1(60*szzs['sma20'].diff()/szzs['sma20'].shift()) 113 | data['zs40_d']=cut_to1(80*szzs['sma40'].diff()/szzs['sma40'].shift()) 114 | data['zs80_d']=cut_to1(100*szzs['sma80'].diff()/szzs['sma80'].shift()) 115 | 116 | data['zsr5']=cut_to1(10*szzs['close']/szzs['sma5']-10) # 大盘收盘价偏离5日均线的幅度,涨跌幅限制-10% ~ 10%截取 117 | data['zsr10']=cut_to1(8*szzs['close']/szzs['sma10']-8) 118 | data['zsr20']=cut_to1(6*szzs['close']/szzs['sma20']-6) 119 | data['zsr40']=cut_to1(6*szzs['close']/szzs['sma40']-6) 120 | data['zsr80']=cut_to1(4*szzs['close']/szzs['sma80']-4) 121 | 122 | szzs['r5_20']=(4*szzs['sma5']/szzs['sma20']-4) # 大盘5日均线偏离20日均线幅度,涨跌幅限制-25% ~ 25%截取 123 | szzs['r10_40']=(4*szzs['sma10']/szzs['sma40']-4) 124 | szzs['r20_80']=(4*szzs['sma20']/szzs['sma80']-4) 125 | szzs['r5_40']=(4*szzs['sma5']/szzs['sma40']-4) 126 | 127 | data['zsr5_20_d']=cut_to1(10*szzs['r5_20'].diff()) # 大盘5日均线较20日均线的偏移幅度的变化值,反应5日和20日均偏离的剧烈程度 128 | data['zsr10_40_d']=cut_to1(20*szzs['r10_40'].diff()) 129 | data['zsr20_80_d']=cut_to1(20*szzs['r20_80'].diff()) 130 | data['zsr5_20']=cut_to1(szzs['r5_20']) 131 | data['zsr10_40']=cut_to1(1*szzs['r10_40']) 132 | data['zsr20_80']=cut_to1(1*szzs['r20_80']) 133 | data['zsr5_40']=cut_to1(1*szzs['r5_40']) 134 | 135 | 136 | szzs['v5']=(10*szzs['volume']/(0.1+szzs['volume'].rolling(5).mean())) # 大盘交易量较5日均交易量均值偏移幅度,0.1是考虑到rolling前几个值为null 137 | szzs['v10']=(10*szzs['volume']/(0.1+szzs['volume'].rolling(10).mean())) # 衡量交易量较平均值异常增减的情况 138 | szzs['v20']=(10*szzs['volume']/(0.1+szzs['volume'].rolling(20).mean())) 139 | szzs['v40']=(10*szzs['volume']/(0.1+szzs['volume'].rolling(40).mean())) 140 | szzs['v80']=(10*szzs['volume']/(0.1+szzs['volume'].rolling(80).mean())) 141 | 142 | data['zsv3_9']=(10*szzs['volume'].rolling(3).mean()/(0.1+szzs['volume'].rolling(9).mean())) # 大盘3日交易量较9日均交易量偏移幅度 143 | data['zsv5_20']=(10*szzs['volume'].rolling(5).mean()/(0.1+szzs['volume'].rolling(20).mean())) 144 | data['zsv9_50']=(10*szzs['volume'].rolling(9).mean()/(0.1+szzs['volume'].rolling(50).mean())) 145 | 146 | data['zsv5_d']=0.6366*np.arctan(0.2*szzs['v5'].diff()) # 交易量较交易量均线的偏置幅度变化,衡量交易量的变动情况 147 | data['zsv10_d']=0.6366*np.arctan(0.1*szzs['v10'].diff()) # arctan()用于归一化,把任何取值范围数据压缩到0-1 148 | data['zsv20_d']=0.6366*np.arctan(0.2*szzs['v20'].diff()) # 为何用0.6366?????? 149 | data['zsv40_d']=0.6366*np.arctan(0.2*szzs['v40'].diff()) 150 | data['zsv80_d']=0.6366*np.arctan(0.2*szzs['v80'].diff()) 151 | 152 | data['zsv3_9_d']=0.6366*np.arctan(2*data['zsv3_9'].diff()) 153 | data['zsv5_20_d']=0.6366*np.arctan(2*data['zsv5_20'].diff()) 154 | data['zsv9_50_d']=0.6366*np.arctan(2*data['zsv9_50'].diff()) 155 | 156 | data['zsv3_9']=0.6366*np.arctan(0.2*data['zsv3_9']) 157 | data['zsv5_20']=0.6366*np.arctan(0.2*data['zsv5_20']) 158 | data['zsv9_50']=0.6366*np.arctan(0.2*data['zsv9_50']) 159 | data['zsv5']=0.6366*np.arctan(0.2*szzs['v5']) 160 | data['zsv10']=0.6366*np.arctan(0.2*szzs['v10']) 161 | data['zsv20']=0.6366*np.arctan(0.2*szzs['v20']) 162 | data['zsv40']=0.6366*np.arctan(0.2*szzs['v40']) 163 | data['zsv80']=0.6366*np.arctan(0.2*szzs['v80']) 164 | 165 | # 大盘收盘价macd 166 | szzs['dif'], szzs['dea'],szzs['hist'] = talib.MACD(szzs['close'].astype(float).values, fastperiod=12, slowperiod=26, signalperiod=9) 167 | data['zsdif']=cut_to1(0.01*szzs['dif']) # macd的dif线,上下限100截取 168 | data['zsdea']=cut_to1(0.01*szzs['dea']) 169 | 170 | data['zshist_d']=cut_to1(0.1*szzs['hist'].diff()) # macd红绿柱的差分 171 | data['zshist']=cut_to1(0.02*szzs['hist']) 172 | 173 | 174 | # 提取标签值 175 | for i in [2,4,7,10]:, 176 | data['result']=data['close'].shift(-i*r)/data['close']-1 # r=1,个股未来2日涨幅(收盘价算),4日后涨幅,7日后涨幅,10日后涨幅 177 | # data['result2zs']=data['result']-szzs['close'].shift(-i*r)/szzs['close'] 178 | data['resultmax']=data['close'].rolling(i*r).max().shift(-i*r)/data['close']-1 # 个股未来(2,4,7,10日)最高涨幅(收盘价算) 179 | data['resultmin']=data['close'].rolling(i*r).min().shift(-i*r)/data['close']-1 # 个股未来(2,4,7,10日)最大跌幅 180 | data['resultrel']=data['resultmax']+data['resultmin'] # 个股未来(2,4,7,10日)振幅 181 | remax=data['close'].rolling(i*r).max().shift(-i*r) # 个股未来(2,4,7,10日)最高价 182 | remin=data['close'].rolling(i*r).min().shift(-i*r) # 个股未来(2,4,7,10日)最低价 183 | reavg=remax-remin # 个股未来(2,4,7,10日)最高最低价差 184 | 185 | if i>2: 186 | # 每种标签值,对应不同的买入信号 187 | # 最高最低价均分5份or3份,收盘价低于最低价到1/5的值的权重为4,而1/5到1/3的值是2,中间是0,2/3到5/4的值是-2,4/5到1的值是-4 188 | buysignal=2*(data['close']<(remin+reavg/5))+2*(data['close']<(remin+reavg/3))-2*(data['close']>(remax-reavg/3))-2*(data['close']>(remax-reavg/5)) 189 | # 进一步约束买入信号,振幅要大于3%。然后如果振幅大于5%,且收盘价还在高低价差下方五分之一内,则buysignal所有权重再加1 190 | # 最前面加4,因为权重最小值就是4,相当于把权重全部转为正数 191 | data['resultbuy'+str(i)]=4+(reavg/data['close']>0.03)*(buysignal)+1*(reavg/data['close']>0.05)*(data['close']<(remin+reavg/5)) 192 | # data[(data['resultmax']-data['resultrel']/4)<0].loc[:,'buyorsale'+str(i)]=-1 193 | # 获取result的1,1/2,1/3...1/9分位值 194 | bins=get_quantiles(data['result'].dropna(),10) # data["result"]之前是依据shift获取,因此有null值 195 | data['resultclass'+str(i)]=pd.cut(data['result'], bins, right=False,labels=False) # 新建结果类别列,以历史涨幅走势来分类别 196 | # bins=get_quantiles(data['resultmin'].dropna(),10) 197 | # data['resultmin'+i]=pd.cut(data['resultmin'], bins, right=False,labels=False) 198 | # bins=get_quantiles(data['resultmax'].dropna(),10) 199 | # data['resultmax'+i]=pd.cut(data['resultmax'], bins, right=False,labels=False) 200 | bins=get_quantiles(data['resultrel'].dropna(),10) 201 | data['resultrelclass'+str(i)]=pd.cut(data['resultrel'], bins, right=False,labels=False) # 新建结果类别列,以历振幅走势来分类别 202 | 203 | if isfeature==0: 204 | ###这是不用feature的操作 205 | start=0 206 | data=data[start:].drop(['ma5','ma10','ma20','result','resultmax','resultmin','resultrel','peTTM','pbMRQ','psTTM', 207 | 'pcfNcfTTM','p_change','price_change'],axis=1) 208 | 209 | 210 | ###下面是用features的操作 211 | elif isfeature==1: 212 | start=120 213 | data=newfeature.getStockCharacter(data).astype('float16') 214 | # data=data[start:].drop(['ma5','ma10','ma20','v_ma5','v_ma10','v_ma20','result','p_change','price_change'],axis=1) 215 | data=data[start:].drop(['peTTM','pbMRQ','psTTM','pcfNcfTTM','result','resultmax','resultmin','resultrel'],axis=1) 216 | # data.drop(['pctChg'],axis=1,inplace=True) 217 | return data 218 | 219 | import os, tarfile 220 | #一次性打包整个根目录。空子目录会被打包。 221 | #如果只打包不压缩,将"w:gz"参数改为"w:"或"w"即可。 222 | def make_targz(output_filename, source_dir): 223 | with tarfile.open(output_filename, "w:gz") as tar: 224 | tar.add(source_dir, arcname=os.path.basename(source_dir)) 225 | #逐个添加文件打包,未打包空子目录。可过滤文件。 226 | #如果只打包不压缩,将"w:gz"参数改为"w:"或"w"即可。 227 | def make_targz_one_by_one(output_filename, source_dir): 228 | tar = tarfile.open(output_filename,"w:gz") 229 | for root,dir,files in os.walk(source_dir): 230 | for file in files: 231 | pathfile = os.path.join(root, file) 232 | tar.add(pathfile) 233 | tar.close() 234 | 235 | 236 | bs.login() 237 | 238 | 239 | 240 | path='/www/stocks/' 241 | path='/Users/hongyuouyang/python/finance/stockdata/' 242 | days=5 243 | r=1 244 | sequence_length=10 245 | stocks=ts.get_stock_basics().sort_index() 246 | stocks.to_csv(path+'stocks.csv') 247 | stocks=stocks.index.values 248 | 249 | # szzs=ts.get_hist_data('sh').sort_values('date') 250 | szzs=getbaostock('sh.000001','2016-01-01', today, 'D',1) 251 | j=0 252 | for symbol in stocks[:]: # symbol代表6位股票代码 253 | if not(os.path.exists(path+symbol)): 254 | 255 | # data=ts.get_hist_data(symbol) 256 | 257 | 258 | #拼合数据生成bcolz文件 259 | data=getbaostock(precode(symbol),'2016-01-01', today, 'D',0) # 返回该股票2016年至今的日K数据,df格式,包含十多项指标 260 | if data is None: 261 | print(symbol,'no data') 262 | else: 263 | if len(data) > 300: 264 | print(j,symbol,data.shape) 265 | j += 1 266 | data=data.sort_values('date') # 升序排列,日期从2016-现在排序 267 | datanew=saveData(path,symbol,data,szzs,days,10,r).astype('float16') 268 | if j==1: 269 | datanew.head(1).to_csv(path+'data.csv') 270 | 271 | datanew.to_csv(path+symbol+'.csv') # 保存每一只股票的数据到各自的csv文件(包含各种指标及特征) 272 | 273 | 274 | # make_targz('/www/wwwroot/fina.ouyanghome.com/public/stockdata.tar.gz', path) 275 | # print('data done') 276 | # bs.logout() 277 | 278 | #存磁盘 -------------------------------------------------------------------------------- /newfeature.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | import pandas as pd 3 | import talib 4 | # import ta 5 | import numpy as np 6 | 7 | from scipy.stats import rankdata 8 | import scipy as sp 9 | 10 | from sklearn import preprocessing 11 | abs_scaler = preprocessing.MaxAbsScaler() 12 | import copy 13 | 14 | 15 | def sigmoid(X,useStatus): 16 | if useStatus: 17 | return 1.0 / (1 + np.exp((0.001*X))); 18 | else: 19 | return (X) 20 | 21 | 22 | # TSRANK 函数 23 | def func_rank(na): 24 | return rankdata(na)[-1]/rankdata(na).max() 25 | 26 | 27 | # DECAYLINEAR函数 28 | def func_decaylinear(na): 29 | n = len(na) 30 | decay_weights = np.arange(1,n+1,1) 31 | decay_weights = decay_weights / decay_weights.sum() 32 | 33 | return (na * decay_weights).sum() 34 | 35 | 36 | # HIGHDAY 函数 37 | def func_highday(na): 38 | return len(na) - na.argmax() 39 | 40 | 41 | # LOWDAY 函数 42 | def func_lowday(na): 43 | return len(na) - na.argmin() 44 | 45 | 46 | def cut_to1(alpha,minx=-1,maxx=1): 47 | alpha=alpha.copy() 48 | alpha[alpha>maxx]=maxx 49 | alpha[alphadf['open']) 105 | return df['sunup'] 106 | 107 | 108 | def upratio(df,days): 109 | df['sunup']=sunup(df) 110 | upratio=df['sunup'].rolling(days).sum()/days 111 | return upratio 112 | 113 | 114 | def trans_features(stockk): 115 | """该函数主要用于继续新增各种个股的特征,增加了各种常用技术指标入EMA,布林线等""" 116 | # stockk就是融合了各种特征列,标签列的df格式的data数据 117 | # stockk[['open','close','low','high']]=stockk[['open','close','low','high']]/stockk['close'].iloc[-1] 118 | 119 | # 特征工程,继续增加新特征 120 | stockk['preclose']=stockk['close'].shift() 121 | stockk['volume']=stockk['volume']/1000000 # 个股交易量数据缩放 122 | stockk['amount']=stockk['amount']/1000000 123 | stockk['pctChg']=cut_to1(8*stockk['close'].diff()/stockk['close'].shift()) # 当日涨幅 124 | # stockk['pctChg2']=(1+stockk['pctChg']).rolling(2).cumprod()-1 125 | stockk['pctChg2']=cut_to1(5*(stockk['close']/stockk['close'].shift(2))-5) # 过去2日涨幅 126 | stockk['pctChg4']=cut_to1(4*(stockk['close']/stockk['close'].shift(5))-4) 127 | stockk['pctChg8']=cut_to1(2*(stockk['close']/stockk['close'].shift(8))-2) 128 | stockk['pctChg15']=cut_to1(2*(stockk['close']/stockk['close'].shift(15))-2) 129 | stockk['pctChg30']=cut_to1(2*(stockk['close']/stockk['close'].shift(15))-2) 130 | stockk['avg_price']=stockk['amount']/(0.01+stockk['volume']) # 平均成交价,成交额/交易量 131 | 132 | stockk['sma5']=stockk['close'].rolling(5).mean() # 个股前5日均价 133 | stockk['sma10']=stockk['close'].rolling(10).mean() 134 | stockk['sma20']=stockk['close'].rolling(20).mean() 135 | stockk['sma40']=stockk['close'].rolling(40).mean() 136 | stockk['sma80']=stockk['close'].rolling(80).mean() 137 | 138 | stockk['max20']=stockk['high'].rolling(20).max().shift() # 个股前20日最高价(不含当日) 139 | stockk['max60']=stockk['high'].rolling(60).max().shift() 140 | stockk['max120']=stockk['high'].rolling(120).max().shift() 141 | 142 | stockk['min20']=stockk['low'].rolling(20).min().shift() # 个股前20日最低价(不含当日) 143 | stockk['min60']=stockk['low'].rolling(60).min().shift() 144 | stockk['min120']=stockk['low'].rolling(120).min().shift() 145 | 146 | stockk['20_min']=cut_to1(1*stockk['close']/stockk['min20']-0.9)### (个股20日内最大涨幅 + 10%),涨幅超110%,则截断。----10%有何用?????? 147 | stockk['60_min']=cut_to1(1*stockk['close']/stockk['min60']-0.9)### 148 | stockk['120_min']=cut_to1(1*stockk['close']/stockk['min120']-0.9)### 149 | 150 | stockk['20_max']=cut_to1(2*stockk['close']/stockk['max20']-2)### 2*(个股20日内最大跌幅), 跌幅超50%,则截断。 151 | stockk['60_max']=cut_to1(2*stockk['close']/stockk['max60']-2)### (即便昨天是max20,今天最多涨10%,不会截断) 152 | stockk['120_max']=cut_to1(2*stockk['close']/stockk['max120']-2)### 153 | 154 | stockk['5_d']=cut_to1(20*stockk['sma5'].diff()/stockk['sma5'].shift()) # 个股5日均线涨幅,±5%截断 155 | stockk['10_d']=cut_to1(40*stockk['sma10'].diff()/stockk['sma10'].shift()) 156 | stockk['20_d']=cut_to1(60*stockk['sma20'].diff()/stockk['sma20'].shift()) 157 | stockk['40_d']=cut_to1(80*stockk['sma40'].diff()/stockk['sma40'].shift()) 158 | stockk['80_d']=cut_to1(100*stockk['sma80'].diff()/stockk['sma80'].shift()) 159 | 160 | stockk['r5']=cut_to1(6*stockk['close']/stockk['sma5']-6) # 个股收盘价较5日均线偏移幅度(±16.6%截断) 161 | stockk['r10']=cut_to1(4*stockk['close']/stockk['sma10']-4) 162 | stockk['r20']=cut_to1(4*stockk['close']/stockk['sma20']-4) 163 | stockk['r40']=cut_to1(3*stockk['close']/stockk['sma40']-3) 164 | stockk['r80']=cut_to1(2*stockk['close']/stockk['sma80']-2) 165 | 166 | stockk['r5_20']=(3*stockk['sma5']/stockk['sma20']-3) # 个股5日均线较20日均线偏移幅度(下面语句会进行±33.3%截断) 167 | stockk['r10_40']=(2*stockk['sma10']/stockk['sma40']-2) 168 | stockk['r20_80']=(2*stockk['sma20']/stockk['sma80']-2) 169 | stockk['r5_40']=(2*stockk['sma5']/stockk['sma40']-2) 170 | 171 | stockk['r5_20_d']=cut_to1(10*stockk['r5_20'].diff()) # 5日20日均线偏移幅度变动量(±10%截断)----不太可能一天变动10%吧! 172 | stockk['r10_40_d']=cut_to1(20*stockk['r10_40'].diff()) 173 | stockk['r20_80_d']=cut_to1(20*stockk['r20_80'].diff()) 174 | 175 | stockk['r5_20']=cut_to1(stockk['r5_20']) # ±33.3%截断 176 | stockk['r10_40']=cut_to1(stockk['r10_40']) 177 | stockk['r20_80']=cut_to1(stockk['r20_80']) 178 | stockk['r5_40']=cut_to1(stockk['r5_40']) 179 | 180 | 181 | stockk['day_price'] = 0.5*(stockk['open']+stockk['close'])/stockk['volume'] # (开盘价-收盘价)的平均值除以交易量 182 | 183 | stockk['v5']=(10*stockk['volume']/(0.1+stockk['volume'].rolling(5).mean())) # 交易量/5日交易量均值,0.1是避免出现0交易量,导致报错 184 | stockk['v10']=(10*stockk['volume']/(0.1+stockk['volume'].rolling(10).mean())) 185 | stockk['v20']=(10*stockk['volume']/(0.1+stockk['volume'].rolling(20).mean())) 186 | stockk['v40']=(10*stockk['volume']/(0.1+stockk['volume'].rolling(40).mean())) 187 | stockk['v80']=(10*stockk['volume']/(0.1+stockk['volume'].rolling(80).mean())) 188 | 189 | stockk['v3_9']=(10*stockk['volume'].rolling(3).mean()/(0.1+stockk['volume'].rolling(9).mean())) # 交易量3日均/交易量9日均 190 | stockk['v5_20']=(10*stockk['volume'].rolling(5).mean()/(0.1+stockk['volume'].rolling(20).mean())) 191 | stockk['v9_50']=(10*stockk['volume'].rolling(9).mean()/(0.1+stockk['volume'].rolling(50).mean())) 192 | 193 | stockk['v5_d']=0.6366*np.arctan(0.1*stockk['v5'].diff()) # 用arctan()进行数据归一化,交易量过大,用0.1压一压,避难arctan()饱和 194 | stockk['v10_d']=0.6366*np.arctan(0.1*stockk['v10'].diff()) 195 | stockk['v20_d']=0.6366*np.arctan(0.1*stockk['v20'].diff()) 196 | stockk['v40_d']=0.6366*np.arctan(0.1*stockk['v40'].diff()) 197 | stockk['v80_d']=0.6366*np.arctan(0.1*stockk['v80'].diff()) 198 | 199 | stockk['v3_9_d']=0.6366*np.arctan(1*stockk['v3_9'].diff()) 200 | stockk['v5_20_d']=0.6366*np.arctan(1*stockk['v5_20'].diff()) 201 | stockk['v9_50_d']=0.6366*np.arctan(1*stockk['v9_50'].diff()) 202 | 203 | stockk['v3_9']=0.6366*np.arctan(0.1*stockk['v3_9']) 204 | stockk['v5_20']=0.6366*np.arctan(0.1*stockk['v5_20']) 205 | stockk['v9_50']=0.6366*np.arctan(0.1*stockk['v9_50']) 206 | 207 | stockk['v5']=0.6366*np.arctan(0.1*stockk['v5']) 208 | stockk['v10']=0.6366*np.arctan(0.1*stockk['v10']) 209 | stockk['v20']=0.6366*np.arctan(0.1*stockk['v20']) 210 | stockk['v40']=0.6366*np.arctan(0.1*stockk['v40']) 211 | stockk['v80']=0.6366*np.arctan(0.1*stockk['v80']) 212 | 213 | # 个股macd(12,26,9) 214 | stockk['dif'], stockk['dea'],stockk['hist'] = talib.MACD(stockk['close'].astype(float).values, fastperiod=12, slowperiod=26, signalperiod=9) 215 | stockk['dif']=cut_to1(0.7*stockk['dif']) # 个股dif截断怎么用1.42吗,超过1.42的dif就没了??? 216 | stockk['dea']=cut_to1(0.7*stockk['dea']) 217 | stockk['hist']=(1*stockk['hist']) 218 | 219 | stockk['hist_d']=cut_to1(4*stockk['hist'].diff()) 220 | stockk['hist']=cut_to1(stockk['hist']) 221 | 222 | 223 | #计算几个指标相对前收盘的涨幅 224 | stockk['highratio']=cut_to1(10*(stockk['high'])/stockk['close'].shift()-10.2) # 当日最高涨幅(为何设10.2???) 225 | stockk['lowratio']=cut_to1(10*(stockk['low'])/stockk['close'].shift()-9.8) 226 | stockk['openratio']=cut_to1(20*(stockk['open'])/stockk['close'].shift()-20) # 当日开盘涨幅(±5%截断,不合适吧,开盘涨停的没了???) 227 | 228 | #talib均线指标 229 | stockk['tema']=0.6366*np.arctan(0.1*talib.TEMA(stockk['close'],timeperiod=30)**0.5) # 三重移动平均线 230 | 231 | stockk['trima']=0.6366*np.arctan(0.1*talib.TRIMA(stockk['close'],timeperiod=30)**0.5) # 三角移动平均线 232 | 233 | stockk['wma']=0.6366*np.arctan(0.1*talib.WMA(stockk['close'],timeperiod=30)**0.5) # 加权移动平均线 234 | 235 | stockk['t3']=0.6366*np.arctan(0.1*talib.T3(stockk['close'],timeperiod=5, vfactor=0)**0.5) # 超短线使用的三重移动平均线 236 | 237 | #talib量价指标 238 | stockk['natr']=cut_to1(-0.2+0.1*talib.NATR(stockk['high'],stockk['low'],stockk['close'],timeperiod=14)) 239 | stockk['adline'] = 0.6366*np.arctan(0.005*talib.AD(stockk['high'],stockk['low'],stockk['close'],stockk['volume'])-0.5) 240 | stockk['adosc'] = 0.6366*np.arctan(0.1*talib.ADOSC(stockk['high'],stockk['low'],stockk['close'],stockk['volume'], fastperiod=3, slowperiod=10)) 241 | stockk['obv'] =0.6366*np.arctan(0.001*talib.OBV(stockk['close'],stockk['volume'])) 242 | 243 | ##静态指标 244 | stockk['beta'] = 0.6366*np.arctan(talib.BETA(stockk['high'],stockk['low'], timeperiod=5)) 245 | stockk['correl'] = cut_to1(-3*talib.CORREL(stockk['high'],stockk['low'], timeperiod=30)+3) 246 | stockk['linearreg'] = 0.6366*np.arctan(0.1*talib.LINEARREG(stockk['close'], timeperiod=14)-1) 247 | stockk['lineangle'] = 0.6366*np.arctan(0.1*talib.LINEARREG_ANGLE(stockk['close'], timeperiod=14)) 248 | # stockk['lineslope'] =0.1*talib.LINEARREG_SLOPE(stockk['close'], timeperiod=14) 249 | stockk['var'] = 0.6366*np.arctan(30*talib.VAR(stockk['close'], timeperiod=5,nbdev=1)) 250 | stockk['TSF'] = 0.6366*np.arctan(2*talib.TSF(stockk['close'], timeperiod=14)) 251 | stockk['lineintercept'] = 0.6366*np.arctan(0.05*talib.LINEARREG_INTERCEPT(stockk['close'], timeperiod=14)) 252 | stockk['stddev'] =cut_to1((talib.STDDEV(stockk['close'], timeperiod=5,nbdev=1))**0.25-1) 253 | 254 | ###动量指标 255 | stockk['sar'] = 0.6366*np.arctan(0.1*talib.SAR(stockk['high'], stockk['low'], acceleration=0, maximum=0)) 256 | stockk['adx'] = cut_to1(0.015*talib.ADX(stockk['high'],stockk['low'],stockk['close'], timeperiod=14)) 257 | stockk['adxr'] = cut_to1(0.015*talib.ADXR(stockk['high'],stockk['low'],stockk['close'],timeperiod=14)) 258 | stockk['apo'] = cut_to1(0.5*talib.APO(stockk['close'], fastperiod=12, slowperiod=26, matype=0)) 259 | stockk['ardown'], stockk['arup']= talib.AROON(stockk['high'],stockk['low'],timeperiod=14) 260 | stockk['ardown']=0.02*stockk['ardown']-1 261 | stockk['arup']=0.02*stockk['arup']-1 262 | stockk['arosc'] = 0.01*talib.AROONOSC(stockk['high'],stockk['low'],timeperiod=14) 263 | stockk['bop'] = talib.BOP(stockk['open'],stockk['high'],stockk['low'],stockk['close']) 264 | stockk['cci'] = 0.004*talib.CCI(stockk['high'],stockk['low'],stockk['close'],timeperiod=14) 265 | stockk['cci_d']=0.637*np.arctan(1*stockk['cci'].diff()/(0.1+stockk['cci'].shift())) 266 | 267 | return stockk 268 | 269 | 270 | def getStockCharacter(stock_data): 271 | # stockk=getK(stock_data) 272 | stockk=trans_features(stock_data) 273 | 274 | stockk=stockk.drop(['max20','max60','max120','min20','min60', 275 | 'min120','sma5','sma10','sma20','sma40','sma80', 276 | "volume",'preclose','amount','avg_price','day_price'],axis=1) 277 | #,'HO','OL','uppwer','downpwer' 278 | 279 | return stockk -------------------------------------------------------------------------------- /保存K线数据到数据库.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## 保存数据到mysql数据库,包含全部个股数据" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "ExecuteTime": { 14 | "end_time": "2019-08-08T11:36:30.955410Z", 15 | "start_time": "2019-08-08T11:36:30.952804Z" 16 | } 17 | }, 18 | "source": [ 19 | "### 基础引用初始化" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "ExecuteTime": { 27 | "end_time": "2019-10-20T07:15:14.676285Z", 28 | "start_time": "2019-10-20T07:15:14.195976Z" 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "#常规引用\n", 34 | "import time\n", 35 | "import tushare as ts\n", 36 | "import pandas as pd\n", 37 | "import numpy as np\n", 38 | "import baostock as bs\n", 39 | "\n", 40 | "today=time.strftime('%Y-%m-%d',time.localtime(time.time()))\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 10, 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2019-10-20T07:15:15.947809Z", 49 | "start_time": "2019-10-20T07:15:15.855557Z" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# from sqlalchemy import create_engine\n", 55 | "\n", 56 | "# #获取股票代码清\n", 57 | "# engine = create_engine(\"mysql+pymysql://{}:{}@{}/{}?charset={}\".format('root', 'qqq375701956', '127.0.0.1:3306', 'finance','utf8'))\n", 58 | "# conn = engine.connect()#创建连接\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "### baostock接口调用函数\n", 66 | "适用于多天数据为更新,所花时间较长\n", 67 | "\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "#### 获取股票列表" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 11, 80 | "metadata": { 81 | "ExecuteTime": { 82 | "end_time": "2019-10-20T07:15:19.786504Z", 83 | "start_time": "2019-10-20T07:15:19.272555Z" 84 | }, 85 | "scrolled": true 86 | }, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/html": [ 91 | "
\n", 92 | "\n", 105 | "\n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | "
nameindustryareapeoutstandingtotalstotalAssetsliquidAssetsfixedAssetsreserved...bvpspbtimeToMarketundpperundprevprofitgprnprholders
code
000001平安银行银行深圳9.61194.06194.0637076.830.00105.87808.16...13.821.13199104031152.935.9418.8015.470.0022.94299958.0
000002万 科A全国地产深圳12.5597.15113.0216387.6313581.85124.12119.31...14.801.8219910129981.548.6827.2130.4335.998.15290546.0
\n", 207 | "

2 rows × 22 columns

\n", 208 | "
" 209 | ], 210 | "text/plain": [ 211 | " name industry area pe outstanding totals totalAssets \\\n", 212 | "code \n", 213 | "000001 平安银行 银行 深圳 9.61 194.06 194.06 37076.83 \n", 214 | "000002 万 科A 全国地产 深圳 12.55 97.15 113.02 16387.63 \n", 215 | "\n", 216 | " liquidAssets fixedAssets reserved ... bvps pb timeToMarket \\\n", 217 | "code ... \n", 218 | "000001 0.00 105.87 808.16 ... 13.82 1.13 19910403 \n", 219 | "000002 13581.85 124.12 119.31 ... 14.80 1.82 19910129 \n", 220 | "\n", 221 | " undp perundp rev profit gpr npr holders \n", 222 | "code \n", 223 | "000001 1152.93 5.94 18.80 15.47 0.00 22.94 299958.0 \n", 224 | "000002 981.54 8.68 27.21 30.43 35.99 8.15 290546.0 \n", 225 | "\n", 226 | "[2 rows x 22 columns]" 227 | ] 228 | }, 229 | "execution_count": 11, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "\n", 236 | "stocks=ts.get_stock_basics().sort_index()\n", 237 | "stocks.head(2)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 12, 243 | "metadata": { 244 | "ExecuteTime": { 245 | "end_time": "2019-10-20T07:15:23.392378Z", 246 | "start_time": "2019-10-20T07:15:23.383645Z" 247 | } 248 | }, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "(3747, 22)\n", 255 | "(3683, 22)\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "print(stocks.shape)\n", 261 | "\n", 262 | "stocks=stocks[stocks.index.str[:3]!='688'] # 剔除科创板股票(688开头)\n", 263 | "print(stocks.shape)\n", 264 | "# stocks.index.str[0:1]" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "#### 调用接口函数" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 22, 277 | "metadata": { 278 | "ExecuteTime": { 279 | "end_time": "2019-09-21T01:01:16.424265Z", 280 | "start_time": "2019-09-21T01:01:16.414424Z" 281 | }, 282 | "code_folding": [] 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "#调用接口函数\n", 287 | "def addcode(c): #在代码后载入sz,sh\n", 288 | " return c+'.sh' if c[0:1]=='6' else c+'.sz' \n", 289 | "def precode(c): #在代码后载入sz,sh\n", 290 | " return 'sh.'+c if c[0:1]=='6' else 'sz.'+c \n", 291 | "\n", 292 | "def getBaostock(pcode,start_date,end_date,datatype):\n", 293 | "# print(code)\n", 294 | " rs = bs.query_history_k_data_plus(pcode,\n", 295 | " \"date,code,close,open,low,high,preclose,volume,amount,turn,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST\", #preclose,turn,\n", 296 | " start_date=start_date, end_date=end_date,\n", 297 | " frequency=datatype, adjustflag=\"3\")#复权类型选择123\n", 298 | " #### 打印结果集 ####\n", 299 | " data_list = []\n", 300 | " while (rs.error_code == '0') & rs.next():\n", 301 | " # 获取一条记录,将记录合并在一起\n", 302 | " data_list.append(rs.get_row_data())\n", 303 | " df=pd.DataFrame(data_list, columns=rs.fields)\n", 304 | " df.close=pd.to_numeric(pd.to_numeric(df.close, downcast='float')*100,downcast='integer') # 收盘价*100转整型数\n", 305 | " df.open=pd.to_numeric(pd.to_numeric(df.open, downcast='float')*100,downcast='integer')\n", 306 | " df.low=pd.to_numeric(pd.to_numeric(df.low, downcast='float')*100,downcast='integer')\n", 307 | " df.high=pd.to_numeric(pd.to_numeric(df.high, downcast='float')*100,downcast='integer')\n", 308 | " df.preclose=pd.to_numeric(pd.to_numeric(df.preclose, downcast='float')*100,downcast='integer')\n", 309 | " df.turn=pd.to_numeric(df.turn, downcast='float')\n", 310 | " df['flowmkt']=0.001*df.close*df.volume/df.turn\n", 311 | " df['pctChg']=100*df.close/df.preclose-100\n", 312 | " df['ep']=100/pd.to_numeric(df.peTTM, downcast='float')\n", 313 | " df['eb']=100/pd.to_numeric(df.pbMRQ, downcast='float')\n", 314 | " df.amount=pd.to_numeric(df.amount, downcast='float')/10000\n", 315 | " df.volume=pd.to_numeric(df.volume,downcast='float')/10000\n", 316 | " df.fillna(method = 'ffill',inplace=True)\n", 317 | " df.dropna(inplace=True)\n", 318 | " return df" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "#### 依次添加baostock获取的数据到mysql\n" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 23, 331 | "metadata": { 332 | "ExecuteTime": { 333 | "start_time": "2019-09-21T00:59:29.946Z" 334 | } 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "# def getStocksData(codes,start_date,issql=0):\n", 339 | "# dfdata=pd.DataFrame()\n", 340 | "# for code in codes:\n", 341 | "# # try:\n", 342 | "# df=getBaostock(precode(code),start_date,today,'d')\n", 343 | "# if issql==1:\n", 344 | "# if df.shape[0]>0 :\n", 345 | "# df['code']=code\n", 346 | "# df.to_sql(name='k'+code[0:4], con=conn, if_exists='append', index=False)\n", 347 | "\n", 348 | "# else:\n", 349 | "# print(code+' empty data')\n", 350 | "# else:\n", 351 | "# df=pd.DataFrame()\n", 352 | "# dfdata.append(df)\n", 353 | "# # except:\n", 354 | "# # print(code,'error')\n", 355 | " \n", 356 | "# import threading\n", 357 | "\n", 358 | "# start_time=time.time()\n", 359 | "# threads=[]\n", 360 | "# codes=stocks.index[:]\n", 361 | "# m=20\n", 362 | "# start_date='2010-01-01'\n", 363 | "# for i in range(1):\n", 364 | "# lg = bs.login()\n", 365 | "\n", 366 | "# threads.append(threading.Thread(target=getStocksData,args=(codes[i*m:i*m+m],start_date,1,)))\n", 367 | "# threads[i].start()\n", 368 | "# bs.logout()\n", 369 | "\n", 370 | "# thread2.join()\n", 371 | "\n", 372 | "# for i in range(4):\n", 373 | "# threads[i].join()\n", 374 | "# getStocksData(codes,issql=0)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 24, 380 | "metadata": { 381 | "ExecuteTime": { 382 | "end_time": "2019-09-21T01:07:57.225329Z", 383 | "start_time": "2019-09-21T01:07:55.458802Z" 384 | }, 385 | "code_folding": [] 386 | }, 387 | "outputs": [ 388 | { 389 | "name": "stdout", 390 | "output_type": "stream", 391 | "text": [ 392 | "login success!\n", 393 | "login respond msg:success\n", 394 | "0 2371.000000\n", 395 | "1 2330.000000\n", 396 | "2 2290.000000\n", 397 | "3 2265.000000\n", 398 | "4 2260.000000\n", 399 | " ... \n", 400 | "2399 1645.000122\n", 401 | "2400 1641.000000\n", 402 | "2401 1585.000000\n", 403 | "2402 1586.000000\n", 404 | "2403 1559.000000\n", 405 | "Name: close, Length: 2404, dtype: float32\n" 406 | ] 407 | }, 408 | { 409 | "ename": "TypeError", 410 | "evalue": "can't multiply sequence by non-int of type 'float'", 411 | "output_type": "error", 412 | "traceback": [ 413 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 414 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 415 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\ops\\__init__.py\u001b[0m in \u001b[0;36mna_op\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m 967\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 968\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 969\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 416 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\computation\\expressions.py\u001b[0m in \u001b[0;36mevaluate\u001b[1;34m(op, op_str, a, b, use_numexpr, **eval_kwargs)\u001b[0m\n\u001b[0;32m 220\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0muse_numexpr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 221\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_evaluate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 222\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_evaluate_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 417 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\computation\\expressions.py\u001b[0m in \u001b[0;36m_evaluate_numexpr\u001b[1;34m(op, op_str, a, b, truediv, reversed, **eval_kwargs)\u001b[0m\n\u001b[0;32m 126\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 127\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_evaluate_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 128\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 418 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\computation\\expressions.py\u001b[0m in \u001b[0;36m_evaluate_standard\u001b[1;34m(op, op_str, a, b, **eval_kwargs)\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 70\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 71\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 419 | "\u001b[1;31mTypeError\u001b[0m: can't multiply sequence by non-int of type 'float'", 420 | "\nDuring handling of the above exception, another exception occurred:\n", 421 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 422 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;31m# print(i,'of',len(stocks),code)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;31m# try:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mgetBaostock\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprecode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mstart_date\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtoday\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'd'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 423 | "\u001b[1;32m\u001b[0m in \u001b[0;36mgetBaostock\u001b[1;34m(pcode, start_date, end_date, datatype)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreclose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreclose\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdowncast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'float'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m*\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdowncast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'integer'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mturn\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mturn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdowncast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'float'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 26\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'flowmkt'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.001\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvolume\u001b[0m\u001b[1;33m/\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'pctChg'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m/\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreclose\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'ep'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m/\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpeTTM\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdowncast\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'float'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 424 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\ops\\__init__.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(left, right)\u001b[0m\n\u001b[0;32m 1046\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1048\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mna_op\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1049\u001b[0m return construct_result(\n\u001b[0;32m 1050\u001b[0m \u001b[0mleft\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mleft\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mres_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 425 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\ops\\__init__.py\u001b[0m in \u001b[0;36mna_op\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m 968\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 969\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 970\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmasked_arith_op\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 971\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 972\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmissing\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdispatch_fill_zeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 426 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\ops\\__init__.py\u001b[0m in \u001b[0;36mmasked_arith_op\u001b[1;34m(x, y, op)\u001b[0m\n\u001b[0;32m 445\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0many\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 446\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 447\u001b[1;33m \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mxrav\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues_from_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0myrav\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 448\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 449\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 427 | "\u001b[1;31mTypeError\u001b[0m: can't multiply sequence by non-int of type 'float'" 428 | ] 429 | } 430 | ], 431 | "source": [ 432 | "### 登陆系统 ####\n", 433 | "lg = bs.login()\n", 434 | "# 显示登陆返回信息\n", 435 | "start_date='2010-01-01'\n", 436 | "print('login respond msg:'+lg.error_msg)\n", 437 | "i=0\n", 438 | "symbols=stocks.index[:2]\n", 439 | "# symbols=['']\n", 440 | "for code in symbols:\n", 441 | " i+=1\n", 442 | "# if i%100==0:\n", 443 | "# print(i,'of',len(stocks),code)\n", 444 | "# try:\n", 445 | " df=getBaostock(precode(code),start_date,today,'d')\n", 446 | " print(df)\n", 447 | " df.code=code\n", 448 | " print(df)\n", 449 | "\n", 450 | " if df.shape[0]>0:\n", 451 | " df.to_sql(name='k'+code[0:4], con=conn, if_exists='append', index=False)\n", 452 | " # df.to_csv(code+'.csv')\n", 453 | " # mcodes.append(code)\n", 454 | " else:\n", 455 | " print(code+' empty data')\n", 456 | "# except:\n", 457 | "# print(code,'error')\n", 458 | "bs.logout()\n", 459 | "df.head()" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "### 获取tushare当日全部数据到mysql\n", 467 | "适用于当日数据更新,所花时间较短" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": { 474 | "ExecuteTime": { 475 | "end_time": "2019-08-11T13:52:47.508085Z", 476 | "start_time": "2019-08-11T13:52:11.078734Z" 477 | } 478 | }, 479 | "outputs": [], 480 | "source": [ 481 | "kdata=ts.get_today_all()\n", 482 | "jdata=kdata[['code','trade','open','settlement','low','high','turnoverratio','volume','amount']]\n", 483 | "jdata.columns=['code','close','open','preclose','low','high','turn','volume','amount'] # 列名不一样,重新设置一下\n", 484 | "jdata['date']=today\n", 485 | "# jdata.to_csv(today+'.csv')\n" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": { 492 | "ExecuteTime": { 493 | "end_time": "2019-08-11T13:53:05.263038Z", 494 | "start_time": "2019-08-11T13:53:05.168443Z" 495 | }, 496 | "scrolled": true 497 | }, 498 | "outputs": [], 499 | "source": [ 500 | "# jdata=kdata[['code','trade','open','settlement','low','high','turnoverratio','volume','amount']]\n", 501 | "# jdata.columns=['code','close','open','preclose','low','high','turn','volume','amount']\n", 502 | "# jdata['date']=today\n", 503 | "# jdata['date']='2019-08-09'\n", 504 | "jdata.head()\n" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": { 511 | "ExecuteTime": { 512 | "end_time": "2019-08-11T13:53:38.222560Z", 513 | "start_time": "2019-08-11T13:53:16.487770Z" 514 | } 515 | }, 516 | "outputs": [], 517 | "source": [ 518 | "for i in range(jdata.shape[0]):\n", 519 | " if i%500==1:\n", 520 | " print(i , 'of',jdata.shape[0],jdata.code[i])\n", 521 | "# print(jdata.code[i])\n", 522 | " jdata.iloc[i:i+1].to_sql(name='k'+jdata.code[i][0:4], con=conn, if_exists='append', index=False)" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "## 保存各类指数数据到数据库" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": { 536 | "ExecuteTime": { 537 | "end_time": "2019-08-18T07:24:20.376007Z", 538 | "start_time": "2019-08-18T07:24:20.371426Z" 539 | }, 540 | "code_folding": [] 541 | }, 542 | "outputs": [], 543 | "source": [ 544 | "##调用接口函数\n", 545 | "### 登陆系统 ####\n", 546 | "def get_index(codes,start_date):\n", 547 | " lg = bs.login()\n", 548 | " for code in codes:\n", 549 | " # 显示登陆返回信息\n", 550 | " df=getBaostock(code,start_date,today)\n", 551 | " df.code=code\n", 552 | "\n", 553 | " if df.shape[0]>0:\n", 554 | " df.to_sql(name='k'+code[:4].replace('.',''), con=conn, if_exists='append', index=False)\n", 555 | " else:\n", 556 | " print(code+' empty data')\n", 557 | " bs.logout()\n", 558 | " print('save to mysql ok')\n", 559 | "\n" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "ExecuteTime": { 567 | "end_time": "2019-08-08T12:32:46.455232Z", 568 | "start_time": "2019-08-08T12:32:46.382107Z" 569 | }, 570 | "code_folding": [] 571 | }, 572 | "outputs": [], 573 | "source": [ 574 | "# aindxname=pd.read_csv(u'/Users/hongyuouyang/python/finance/indexname.csv')\n", 575 | "# aindxname.to_sql(name='indexname', con=conn, if_exists='append', index=False)\n" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": { 582 | "ExecuteTime": { 583 | "end_time": "2019-08-18T07:24:43.031708Z", 584 | "start_time": "2019-08-18T07:24:22.837224Z" 585 | } 586 | }, 587 | "outputs": [], 588 | "source": [ 589 | "start_date='2019-01-01'\n", 590 | "\n", 591 | "aindxname=pd.read_sql('select * from indexname',con=conn)\n", 592 | "get_index(aindxname.code.values,start_date)" 593 | ] 594 | } 595 | ], 596 | "metadata": { 597 | "hide_input": false, 598 | "kernelspec": { 599 | "display_name": "Python 3", 600 | "language": "python", 601 | "name": "python3" 602 | }, 603 | "language_info": { 604 | "codemirror_mode": { 605 | "name": "ipython", 606 | "version": 3 607 | }, 608 | "file_extension": ".py", 609 | "mimetype": "text/x-python", 610 | "name": "python", 611 | "nbconvert_exporter": "python", 612 | "pygments_lexer": "ipython3", 613 | "version": "3.7.4" 614 | }, 615 | "latex_envs": { 616 | "LaTeX_envs_menu_present": true, 617 | "autoclose": false, 618 | "autocomplete": true, 619 | "bibliofile": "biblio.bib", 620 | "cite_by": "apalike", 621 | "current_citInitial": 1, 622 | "eqLabelWithNumbers": true, 623 | "eqNumInitial": 1, 624 | "hotkeys": { 625 | "equation": "Ctrl-E", 626 | "itemize": "Ctrl-I" 627 | }, 628 | "labels_anchors": false, 629 | "latex_user_defs": false, 630 | "report_style_numbering": false, 631 | "user_envs_cfg": false 632 | }, 633 | "toc": { 634 | "base_numbering": 1, 635 | "nav_menu": {}, 636 | "number_sections": true, 637 | "sideBar": true, 638 | "skip_h1_title": false, 639 | "title_cell": "Table of Contents", 640 | "title_sidebar": "Contents", 641 | "toc_cell": false, 642 | "toc_position": {}, 643 | "toc_section_display": true, 644 | "toc_window_display": false 645 | }, 646 | "varInspector": { 647 | "cols": { 648 | "lenName": 16, 649 | "lenType": 16, 650 | "lenVar": 40 651 | }, 652 | "kernels_config": { 653 | "python": { 654 | "delete_cmd_postfix": "", 655 | "delete_cmd_prefix": "del ", 656 | "library": "var_list.py", 657 | "varRefreshCmd": "print(var_dic_list())" 658 | }, 659 | "r": { 660 | "delete_cmd_postfix": ") ", 661 | "delete_cmd_prefix": "rm(", 662 | "library": "var_list.r", 663 | "varRefreshCmd": "cat(var_dic_list()) " 664 | } 665 | }, 666 | "types_to_exclude": [ 667 | "module", 668 | "function", 669 | "builtin_function_or_method", 670 | "instance", 671 | "_Feature" 672 | ], 673 | "window_display": false 674 | } 675 | }, 676 | "nbformat": 4, 677 | "nbformat_minor": 2 678 | } 679 | --------------------------------------------------------------------------------