├── 3.1.OSL.ipynb ├── 3.2.RSTR-DASTD.ipynb └── 1.get_alpha_n135.ipynb /3.1.OSL.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "65b19dba-13f2-4c0e-b876-96e7d77429ad", 6 | "metadata": {}, 7 | "source": [ 8 | "### **一、加载常用库**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "cd076ced-3d33-4a2d-a841-4c23f3e50f5d", 15 | "metadata": { 16 | "tags": [] 17 | }, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:7: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", 24 | " from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,\n", 25 | "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:7: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", 26 | " from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "import scipy as sp\n", 32 | "import numpy as np\n", 33 | "import pandas as pd\n", 34 | "import gc\n", 35 | "import statsmodels.api as sm\n", 36 | "pd.set_option('display.max_columns', None)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "ed8a88ec-c205-4fd4-bf47-ce1e661cbd9a", 42 | "metadata": {}, 43 | "source": [ 44 | "### **二、计算风格因子Beta和残差标准差resid_std**" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "id": "26024f3e-2c15-413f-a4a0-cda937b70862", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "#半衰期加权函数\n", 55 | "def Sma_half(df,h):\n", 56 | " #sma均值\n", 57 | " #df1 = df.ewm(alpha=m/n).mean()\n", 58 | " alpha = 1-np.exp(np.log(0.5)/h)\n", 59 | " return df.ewm(alpha = alpha , adjust=False).mean()\n", 60 | "\n", 61 | "#滚动回归\n", 62 | "def OLS_params(df,window):\n", 63 | " result = pd.DataFrame()\n", 64 | " listcode = df.code.unique()\n", 65 | " for code in listcode:\n", 66 | " df_sub = df[df.code == code].reset_index(drop = True)\n", 67 | " for row in range(window, len(df_sub)+1):\n", 68 | " y = df_sub.loc[row-window:row,['return_half']]\n", 69 | " x = df_sub.loc[row-window:row,['Retindex_half']]\n", 70 | " #X = sm.add_constant(x)\n", 71 | " model = sm.OLS(y,x).fit()\n", 72 | " df_sub.loc[row-1,['Beta']] = model.params[0]\n", 73 | " df_sub.loc[row-1,['resid_std']] = model.resid.std()\n", 74 | " result = pd.concat([result,df_sub])\n", 75 | " \n", 76 | " return result" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "id": "0a54b456-60fe-444c-ad56-f3a2f2e678df", 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "#导入股票和指数交易数据\n", 87 | "stktrd = pd.read_hdf('alldata.h5',key='stktrd')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "id": "e479d47b-0f03-4ccc-a425-467de6bd401e", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "#提取出需要的列\n", 98 | "stktrd_getbeta = stktrd.loc[:,['code','date','return','Retindex']]" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "id": "4b81e462-4be9-46d7-b7f7-126382ebe04d", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "#计算半衰指数加权股票收益率\n", 109 | "stktrd_getbeta['return_half'] = stktrd_getbeta.groupby(['code'])['return'].apply(lambda x: Sma_half(x,60))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "0a477850-fc03-42c1-8bb0-320e38351068", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "#计算半衰指数加权沪深300收益率\n", 120 | "stktrd_getbeta['Retindex_half'] = stktrd_getbeta.groupby(['code'])['Retindex'].apply(lambda x: Sma_half(x,60))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 7, 126 | "id": "b91042df-5ad3-499b-add0-9b2ed3bc9d18", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "ols_result = OLS_params(stktrd_getbeta,250)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 14, 136 | "id": "111b81ec-740e-496a-8320-edbce61f002b", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "ols_result.reset_index(drop = True, inplace = True)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 16, 146 | "id": "ab7b70ac-6476-4dfd-9957-aa0d5afddc14", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "ols_result.to_hdf('ols_result.h5', key = 'ols_result')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 17, 156 | "id": "2d523bdd-0929-4ca8-9d12-255252b63764", 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [ 162 | "
\n", 163 | "\n", 176 | "\n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | "
codedatereturnRetindexreturn_halfRetindex_halfBetaresid_std
012010-01-04-0.027082-0.011314-0.027082-0.011314NaNNaN
112010-01-05-0.0172920.008149-0.026970-0.011090NaNNaN
212010-01-06-0.017167-0.006260-0.026857-0.011035NaNNaN
312010-01-07-0.010917-0.019841-0.026674-0.011136NaNNaN
412010-01-08-0.0022080.002499-0.026393-0.010979NaNNaN
...........................
86726049009572022-01-24-0.0266460.001553-0.000022-0.0002901.6682510.000730
86726059009572022-01-25-0.016103-0.022622-0.000207-0.0005471.6970250.000720
86726069009572022-01-260.0049100.007237-0.000148-0.0004571.7133350.000714
86726079009572022-01-27-0.003257-0.019615-0.000184-0.0006771.7266800.000709
86726089009572022-01-280.008170-0.012144-0.000088-0.0008091.7451070.000707
\n", 314 | "

8672609 rows × 8 columns

\n", 315 | "
" 316 | ], 317 | "text/plain": [ 318 | " code date return Retindex return_half Retindex_half \\\n", 319 | "0 1 2010-01-04 -0.027082 -0.011314 -0.027082 -0.011314 \n", 320 | "1 1 2010-01-05 -0.017292 0.008149 -0.026970 -0.011090 \n", 321 | "2 1 2010-01-06 -0.017167 -0.006260 -0.026857 -0.011035 \n", 322 | "3 1 2010-01-07 -0.010917 -0.019841 -0.026674 -0.011136 \n", 323 | "4 1 2010-01-08 -0.002208 0.002499 -0.026393 -0.010979 \n", 324 | "... ... ... ... ... ... ... \n", 325 | "8672604 900957 2022-01-24 -0.026646 0.001553 -0.000022 -0.000290 \n", 326 | "8672605 900957 2022-01-25 -0.016103 -0.022622 -0.000207 -0.000547 \n", 327 | "8672606 900957 2022-01-26 0.004910 0.007237 -0.000148 -0.000457 \n", 328 | "8672607 900957 2022-01-27 -0.003257 -0.019615 -0.000184 -0.000677 \n", 329 | "8672608 900957 2022-01-28 0.008170 -0.012144 -0.000088 -0.000809 \n", 330 | "\n", 331 | " Beta resid_std \n", 332 | "0 NaN NaN \n", 333 | "1 NaN NaN \n", 334 | "2 NaN NaN \n", 335 | "3 NaN NaN \n", 336 | "4 NaN NaN \n", 337 | "... ... ... \n", 338 | "8672604 1.668251 0.000730 \n", 339 | "8672605 1.697025 0.000720 \n", 340 | "8672606 1.713335 0.000714 \n", 341 | "8672607 1.726680 0.000709 \n", 342 | "8672608 1.745107 0.000707 \n", 343 | "\n", 344 | "[8672609 rows x 8 columns]" 345 | ] 346 | }, 347 | "execution_count": 17, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "ols_result" 354 | ] 355 | } 356 | ], 357 | "metadata": { 358 | "kernelspec": { 359 | "display_name": "Python 3 (ipykernel)", 360 | "language": "python", 361 | "name": "python3" 362 | }, 363 | "language_info": { 364 | "codemirror_mode": { 365 | "name": "ipython", 366 | "version": 3 367 | }, 368 | "file_extension": ".py", 369 | "mimetype": "text/x-python", 370 | "name": "python", 371 | "nbconvert_exporter": "python", 372 | "pygments_lexer": "ipython3", 373 | "version": "3.9.7" 374 | } 375 | }, 376 | "nbformat": 4, 377 | "nbformat_minor": 5 378 | } 379 | -------------------------------------------------------------------------------- /3.2.RSTR-DASTD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "fa57655f-a11c-46bf-94f9-dc89c29df3e9", 6 | "metadata": {}, 7 | "source": [ 8 | "### **1、数据clean**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "d326b05d-457d-4791-b371-442c35387dae", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import scipy as sp\n", 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "pd.set_option('display.max_columns', None)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "id": "18fbea51-4bf3-4152-a062-513a0dc3788c", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "#因为RSTR数据计算的window为500,为了避免删除太多日期,故从08年交易数据开始计算\n", 32 | "TRD_Dalyr1 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr1.csv\")\n", 33 | "TRD_Dalyr2 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr2.csv\")\n", 34 | "TRD_Dalyr3 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr3.csv\")\n", 35 | "TRD_Dalyr4 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr4.csv\")\n", 36 | "TRD_Dalyr5 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr5.csv\")\n", 37 | "TRD_Dalyr6 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr6.csv\")\n", 38 | "TRD_Dalyr7 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr7.csv\")\n", 39 | "TRD_Dalyr8 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr8.csv\")\n", 40 | "TRD_Dalyr9 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr9.csv\")\n", 41 | "TRD_Dalyr10 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr10.csv\")\n", 42 | "TRD_Dalyr11 = pd.read_csv(\"D:/191alphas/return_day_08_22/TRD_Dalyr11.csv\")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "id": "d8c9979b-8f56-45e1-8d77-c1a05914d149", 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "TRD_Dalyr08_22 = pd.concat([TRD_Dalyr1,TRD_Dalyr2,TRD_Dalyr3,TRD_Dalyr4,TRD_Dalyr5,TRD_Dalyr6,TRD_Dalyr7,TRD_Dalyr8,TRD_Dalyr9,TRD_Dalyr10,TRD_Dalyr11])" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "id": "e8ec753e-7206-4898-b6f7-9bdd7bf5cda4", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "TRD_Dalyr08_22.drop(columns = ['Markettype'], inplace = True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "id": "d335ab10-68f8-4c82-b37a-fbf44178f605", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "TRD_Dalyr08_22.columns = ['code','date','return_day']" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "id": "0b9524ba-6403-44d4-bf9e-ab5d261049c8", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "TRD_Dalyr08_22.sort_values(by = ['code','date'], inplace = True)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 7, 88 | "id": "2af72883-6126-4eb9-a48f-1506bfb4b312", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "TRD_Dalyr08_22.reset_index(drop = True, inplace = True)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 8, 98 | "id": "c4c98b56-b8d7-43ce-89e1-80c85ec31b99", 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "TRD_Dalyr08_22['date'] = pd.to_datetime(TRD_Dalyr08_22['date'])" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 9, 108 | "id": "30437ef7-1955-4ea0-b17a-178c9ac4c427", 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/html": [ 114 | "
\n", 115 | "\n", 128 | "\n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | "
codedatereturn_day
012008-01-02-0.016062
112008-01-03-0.016588
212008-01-040.027309
312008-01-070.020850
412008-01-080.031146
............
91275326890092022-01-240.003281
91275336890092022-01-25-0.024295
91275346890092022-01-26-0.010535
91275356890092022-01-27-0.022423
91275366890092022-01-280.003465
\n", 206 | "

9127537 rows × 3 columns

\n", 207 | "
" 208 | ], 209 | "text/plain": [ 210 | " code date return_day\n", 211 | "0 1 2008-01-02 -0.016062\n", 212 | "1 1 2008-01-03 -0.016588\n", 213 | "2 1 2008-01-04 0.027309\n", 214 | "3 1 2008-01-07 0.020850\n", 215 | "4 1 2008-01-08 0.031146\n", 216 | "... ... ... ...\n", 217 | "9127532 689009 2022-01-24 0.003281\n", 218 | "9127533 689009 2022-01-25 -0.024295\n", 219 | "9127534 689009 2022-01-26 -0.010535\n", 220 | "9127535 689009 2022-01-27 -0.022423\n", 221 | "9127536 689009 2022-01-28 0.003465\n", 222 | "\n", 223 | "[9127537 rows x 3 columns]" 224 | ] 225 | }, 226 | "execution_count": 9, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "TRD_Dalyr08_22" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "id": "85c7dcbc-58f6-43e7-bb86-fd78371d200b", 238 | "metadata": {}, 239 | "source": [ 240 | "### **2、半衰加权函数**" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 10, 246 | "id": "6a188c3e-16f4-43c6-984f-e8c29618974d", 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "#计算半衰期指数权重,时间序列长度为windows,半衰期为half\n", 251 | "def Weihgt_half(windows,half):\n", 252 | " list_W = []\n", 253 | " for i in range(windows):\n", 254 | " locals()['W'+str(i)] = 0.5**((windows+1-i)/half)\n", 255 | " list_W.append(locals()['W'+str(i)])\n", 256 | " \n", 257 | " return list_W/pd.Series(list_W).sum()#返回一个数组" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 11, 263 | "id": "a60872dd-3616-4c6c-a18c-47879f52979c", 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "#计算半衰加权的函数,column为被加权的列,windows,half为权重计算参数,new_col为加权结果列\n", 268 | "def Mean_half(df,column,new_col,windows,half):\n", 269 | " result = pd.DataFrame()\n", 270 | " codelist = df.code.unique()\n", 271 | " Weihgt_h = Weihgt_half(windows, half)\n", 272 | " for code in codelist:\n", 273 | " TRD_sub = df[df.code == code].copy()\n", 274 | " TRD_sub.loc[:,[new_col]] = TRD_sub.rolling(windows)[column].apply(lambda x: (x*Weihgt_h).sum())\n", 275 | " result = pd.concat([result,TRD_sub])\n", 276 | " return result" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "id": "07db17d8-f7ca-4fae-9877-c628097f2f1c", 282 | "metadata": { 283 | "tags": [] 284 | }, 285 | "source": [ 286 | "### **3、计算RSTR**" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 12, 292 | "id": "850a670b-058e-4242-b559-483577845a71", 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "TRD_Dalyr08_22.loc[:,'ln_return_day'] = np.log(1+TRD_Dalyr08_22['return_day'])" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 13, 302 | "id": "7f2f4af6-8a74-4f14-a756-8f5bfe528f17", 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "Mean_half_500120 = Mean_half(TRD_Dalyr08_22 ,column = 'ln_return_day', new_col = 'RSTR_0', windows = 500, half = 120)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 14, 312 | "id": "e5e34a01-7f21-4500-b342-a161a5c9ff67", 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "Mean_half_500120.loc[:,'RSTR'] = Mean_half_500120.groupby('code')['RSTR_0'].shift(20)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 15, 322 | "id": "fa41ada5-490c-4259-8773-4f8fbad48d2d", 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "Mean_half_500120.to_hdf('C:/Users/Administrator/Downloads/alpha191/RSTR.h5', key = 'RSTR')" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 16, 332 | "id": "bea03dc1-c8ca-4e4e-968d-38c23279c8f9", 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/html": [ 338 | "
\n", 339 | "\n", 352 | "\n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | "
codedatereturn_dayln_return_dayRSTR_0RSTR
012008-01-02-0.016062-0.016192NaNNaN
112008-01-03-0.016588-0.016727NaNNaN
212008-01-040.0273090.026943NaNNaN
312008-01-070.0208500.020636NaNNaN
412008-01-080.0311460.030671NaNNaN
.....................
91275326890092022-01-240.0032810.003276NaNNaN
91275336890092022-01-25-0.024295-0.024595NaNNaN
91275346890092022-01-26-0.010535-0.010591NaNNaN
91275356890092022-01-27-0.022423-0.022678NaNNaN
91275366890092022-01-280.0034650.003459NaNNaN
\n", 466 | "

9127537 rows × 6 columns

\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " code date return_day ln_return_day RSTR_0 RSTR\n", 471 | "0 1 2008-01-02 -0.016062 -0.016192 NaN NaN\n", 472 | "1 1 2008-01-03 -0.016588 -0.016727 NaN NaN\n", 473 | "2 1 2008-01-04 0.027309 0.026943 NaN NaN\n", 474 | "3 1 2008-01-07 0.020850 0.020636 NaN NaN\n", 475 | "4 1 2008-01-08 0.031146 0.030671 NaN NaN\n", 476 | "... ... ... ... ... ... ...\n", 477 | "9127532 689009 2022-01-24 0.003281 0.003276 NaN NaN\n", 478 | "9127533 689009 2022-01-25 -0.024295 -0.024595 NaN NaN\n", 479 | "9127534 689009 2022-01-26 -0.010535 -0.010591 NaN NaN\n", 480 | "9127535 689009 2022-01-27 -0.022423 -0.022678 NaN NaN\n", 481 | "9127536 689009 2022-01-28 0.003465 0.003459 NaN NaN\n", 482 | "\n", 483 | "[9127537 rows x 6 columns]" 484 | ] 485 | }, 486 | "execution_count": 16, 487 | "metadata": {}, 488 | "output_type": "execute_result" 489 | } 490 | ], 491 | "source": [ 492 | "Mean_half_500120" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "id": "8dc483aa-333e-4b84-8923-0bb2a2cd660e", 498 | "metadata": {}, 499 | "source": [ 500 | "### **4、计算DASTD**" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "id": "1e3c40ab-c6ec-403a-9aa9-ae98992769c0", 506 | "metadata": {}, 507 | "source": [ 508 | "TRD_Dalyr08_22" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "id": "4f0ae0b9-8d5e-40c6-ba4b-c7f282622e39", 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "#计算日度收益率250日均值\n", 519 | "TRD_Dalyr08_22.loc[:,'return_day_mean250'] = TRD_Dalyr08_22.groupby('code',as_index=False)['return_day'].rolling(250).mean()['return_day']" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "id": "90050bf5-011c-4093-955c-65407ca9f7ea", 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "#计算日度收益率与250日均值的离差平方\n", 530 | "TRD_Dalyr08_22.loc[:,'return_d_Sq_dev'] = (TRD_Dalyr08_22['return_day'] - TRD_Dalyr08_22['return_day_mean250'])**2" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": null, 536 | "id": "e9f44874-f46b-4eac-bc01-5c6c9a86cea7", 537 | "metadata": { 538 | "tags": [] 539 | }, 540 | "outputs": [], 541 | "source": [ 542 | "#周期250,40天半衰期加权平均,因此需要至少500天交易数据\n", 543 | "Mean_half_25040 = Mean_half(TRD_Dalyr08_22 ,column = 'return_d_Sq_dev', new_col = 'DASTD', windows = 250, half = 40)" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "id": "1f370aed-4aaf-41f1-a6bf-914cc4d8e126", 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [ 553 | "Mean_half_25040.isnull().sum()" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": null, 559 | "id": "17eb2e53-1a66-4545-944e-5fd002d43e39", 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [ 563 | "Mean_half_25040.info()" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "id": "9eebb595-349a-46a1-a1c9-1207a0c24ef1", 570 | "metadata": {}, 571 | "outputs": [], 572 | "source": [ 573 | "Mean_half_25040_dropna = Mean_half_25040.dropna()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": null, 579 | "id": "cf61e111-a71a-4d9e-b7f7-63025975c686", 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "len(Mean_half_25040_dropna.code.unique())" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": null, 589 | "id": "d6c19d49-fadc-44c1-bdb3-63e903f84170", 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [ 593 | "Mean_half_25040.to_hdf('C:/Users/Administrator/Downloads/alpha191/DASTD.h5', key = 'DASTD')" 594 | ] 595 | } 596 | ], 597 | "metadata": { 598 | "kernelspec": { 599 | "display_name": "Python 3 (ipykernel)", 600 | "language": "python", 601 | "name": "python3" 602 | }, 603 | "language_info": { 604 | "codemirror_mode": { 605 | "name": "ipython", 606 | "version": 3 607 | }, 608 | "file_extension": ".py", 609 | "mimetype": "text/x-python", 610 | "name": "python", 611 | "nbconvert_exporter": "python", 612 | "pygments_lexer": "ipython3", 613 | "version": "3.9.7" 614 | } 615 | }, 616 | "nbformat": 4, 617 | "nbformat_minor": 5 618 | } 619 | -------------------------------------------------------------------------------- /1.get_alpha_n135.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "1c9016a1-1170-4ae4-bfc6-c741940d41b7", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "### **一、加载常用包**" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "id": "ee5481f4-2fd3-4bc8-9ad6-50d070228571", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:7: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", 24 | " from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,\n", 25 | "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:7: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n", 26 | " from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "from scipy.stats import rankdata\n", 32 | "import scipy as sp\n", 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "import statsmodels.api as sm\n", 36 | "pd.set_option('display.max_columns', None)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "0f1dc02e-3527-48be-8dab-fc33a224f668", 42 | "metadata": { 43 | "tags": [] 44 | }, 45 | "source": [ 46 | "### **二、数据初步清理:生成stktrd、stktrd2index、stktrd_code系列数据,并存入h5**" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "e309d240-65c7-4918-86ef-307ea267ab57", 52 | "metadata": { 53 | "tags": [] 54 | }, 55 | "source": [ 56 | "#读取股票日交易数据和沪深300指数日交易数据\n", 57 | "stktrd = pd.read_csv(\"alldata.csv\", encoding=\"gbk\")\n", 58 | "index = pd.read_csv('TRD_Index300.csv')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "id": "462bf10a-993b-4c14-ab4f-a28a2770f58c", 64 | "metadata": {}, 65 | "source": [ 66 | "#修改文件时间格式\n", 67 | "stktrd['Trddt'] = pd.to_datetime(stktrd['Trddt']) #时间格式\n", 68 | "index['Trddt'] = pd.to_datetime(index['Trddt']) #时间格式" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "id": "b2aadc41-72b1-40b3-8fc9-2bd019eae98e", 74 | "metadata": {}, 75 | "source": [ 76 | "#股票交易数据初步清理\n", 77 | "stktrd.drop(['Dsmvosd','Capchgdt', 'Markettype', 'IndustryName', 'Adjprcwd','Adjprcnd','Trdsta','Dretnd'],axis=1,inplace=True) #去除不需要的列\n", 78 | "stktrd.columns = ['code','date','open','high','low','close','volumn','amount','value','return','industry'] #修改列名" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "70ee7172-fd9f-4bb3-aa75-d0d38643d800", 84 | "metadata": {}, 85 | "source": [ 86 | "#指数去掉不需要的行\n", 87 | "index.drop(columns = ['Indexcd'], inplace=True)\n", 88 | "index.rename(columns = {'Trddt':'date'},inplace=True)#给trddt重命名为date以方便合并\n", 89 | "#合并stktrd和index\n", 90 | "stktrd = stktrd.merge(index, on='date', how='left')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "6b5dcb6c-3b06-40f7-ac30-d47e9bc9dbd1", 97 | "metadata": { 98 | "tags": [] 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#将stktrd数据转成透视表,有二级索引\n", 103 | "stktrd1 = pd.melt(stktrd, id_vars= ['code','date'], var_name = 'columns', value_name ='value')\n", 104 | "stktrd2index = stktrd1.pivot(index = ['columns','date'], columns='code', values = 'value')\n", 105 | "del stktrd1" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "id": "92fbf09f-7352-4971-8b99-3721515dc7d3", 111 | "metadata": {}, 112 | "source": [ 113 | "#将stktrd数据按code抽取出来,命名为stktrd_code\n", 114 | "#codelist = stktrd2index.columns.tolist() #生成code的list\n", 115 | "codelist = stktrd.code.unique().tolist()\n", 116 | "#遍历codelist,生成文件stktrd_1、stktrd_2、stktrd_4、stktrd_5...\n", 117 | "for cd in codelist:\n", 118 | " locals()['stktrd_' + str(cd)] = stktrd[stktrd['code'] == cd].reset_index(drop = True)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "515730c0-d513-42e1-a6ef-73a2b8bff401", 124 | "metadata": {}, 125 | "source": [ 126 | "stktrd_code = []\n", 127 | "for cd in codelist:\n", 128 | " stktrd_code.append('stktrd_'+ str(cd)) " 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "5dd30f9a-3988-44e9-b6b7-8d2719965d55", 134 | "metadata": { 135 | "tags": [] 136 | }, 137 | "source": [ 138 | "#将数据存入alldata.h5,stktrd为初始股票交易和指数交易日度数据,stktrd2index为stktrd转成的二级索引数据,stktrd_code为stktrd抽取的一系列股票交易数据切片\n", 139 | "stktrd.to_hdf('alldata.h5',key='stktrd')\n", 140 | "stktrd2index.to_hdf('alldata.h5',key='stktrd2index')\n", 141 | "#将stktrd_code系列文件存入h5\n", 142 | "for cd in stktrd_code:\n", 143 | " locals()[cd].to_hdf('alldata.h5', key=cd)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "id": "edc08c1b-2b31-49ff-ad9a-792f0f9b26b6", 149 | "metadata": { 150 | "tags": [] 151 | }, 152 | "source": [ 153 | "### **三、读取处理好的hdf文件**" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 2, 159 | "id": "c14db747-41a8-48ae-9427-5f69eb4fe6ab", 160 | "metadata": { 161 | "tags": [] 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "#读取hdf文件,stktrd为初始股票交易和指数交易日度数据,stktrd2index为stktrd转成的二级索引数据,stktrd_code为stktrd抽取的一系列股票交易数据切片\n", 166 | "stktrd = pd.read_hdf('alldata.h5',key='stktrd')\n", 167 | "#stktrd2index = pd.read_hdf('alldata.h5',key='stktrd2index')" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 6, 173 | "id": "1fdee5e0-35df-4647-956f-26d89f4d2521", 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "4824" 180 | ] 181 | }, 182 | "execution_count": 6, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "len(stktrd.code.unique())" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 3, 194 | "id": "619632a3-9fa2-42fc-bb55-464d9960018c", 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/html": [ 200 | "
\n", 201 | "\n", 214 | "\n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | "
codedateopenhighlowclosevolumnamountvaluereturnindustryOpnindexHiindexLoindexClsindexRetindex
012010-01-0424.52024.58023.68023.710241922765.802495e+0873629834.50-0.027082480101.03592.4683597.7483535.2293535.229-0.011314
112010-01-0523.75023.90022.75023.300556499821.293477e+0972356606.65-0.017292480101.03545.1863577.5263497.6613564.0380.008149
212010-01-0623.25023.25022.72022.900412143139.444537e+0871114433.15-0.017167480101.03558.7003588.8323541.1733541.727-0.006260
312010-01-0722.90023.05022.40022.650355336858.041663e+0870338074.71-0.010917480101.03543.1603558.5563452.7693471.456-0.019841
412010-01-0822.50022.75022.35022.600288543066.506674e+0870182803.02-0.002208480101.03456.9083482.0833426.6983480.1300.002499
...................................................
86726049009572022-01-240.6330.6380.6150.6214791002.998840e+05216729.00-0.026646620201.04753.9414801.0984746.4484786.7380.001553
86726059009572022-01-250.6170.6200.6070.6113727792.281030e+05213239.00-0.016103620201.04761.9504781.3914678.2474678.451-0.022622
86726069009572022-01-260.6140.6170.6100.6142226001.362910e+05214286.000.004910620201.04697.0994718.9954648.1284712.3110.007237
86726079009572022-01-270.6100.6130.6050.6122451001.487580e+05213588.00-0.003257620201.04708.0994708.4364616.0274619.877-0.019615
86726089009572022-01-280.6150.6180.6080.6172149341.319240e+05215333.000.008170620201.04641.8144660.5194559.8334563.772-0.012144
\n", 448 | "

8672609 rows × 16 columns

\n", 449 | "
" 450 | ], 451 | "text/plain": [ 452 | " code date open high low close volumn \\\n", 453 | "0 1 2010-01-04 24.520 24.580 23.680 23.710 24192276 \n", 454 | "1 1 2010-01-05 23.750 23.900 22.750 23.300 55649982 \n", 455 | "2 1 2010-01-06 23.250 23.250 22.720 22.900 41214313 \n", 456 | "3 1 2010-01-07 22.900 23.050 22.400 22.650 35533685 \n", 457 | "4 1 2010-01-08 22.500 22.750 22.350 22.600 28854306 \n", 458 | "... ... ... ... ... ... ... ... \n", 459 | "8672604 900957 2022-01-24 0.633 0.638 0.615 0.621 479100 \n", 460 | "8672605 900957 2022-01-25 0.617 0.620 0.607 0.611 372779 \n", 461 | "8672606 900957 2022-01-26 0.614 0.617 0.610 0.614 222600 \n", 462 | "8672607 900957 2022-01-27 0.610 0.613 0.605 0.612 245100 \n", 463 | "8672608 900957 2022-01-28 0.615 0.618 0.608 0.617 214934 \n", 464 | "\n", 465 | " amount value return industry Opnindex Hiindex \\\n", 466 | "0 5.802495e+08 73629834.50 -0.027082 480101.0 3592.468 3597.748 \n", 467 | "1 1.293477e+09 72356606.65 -0.017292 480101.0 3545.186 3577.526 \n", 468 | "2 9.444537e+08 71114433.15 -0.017167 480101.0 3558.700 3588.832 \n", 469 | "3 8.041663e+08 70338074.71 -0.010917 480101.0 3543.160 3558.556 \n", 470 | "4 6.506674e+08 70182803.02 -0.002208 480101.0 3456.908 3482.083 \n", 471 | "... ... ... ... ... ... ... \n", 472 | "8672604 2.998840e+05 216729.00 -0.026646 620201.0 4753.941 4801.098 \n", 473 | "8672605 2.281030e+05 213239.00 -0.016103 620201.0 4761.950 4781.391 \n", 474 | "8672606 1.362910e+05 214286.00 0.004910 620201.0 4697.099 4718.995 \n", 475 | "8672607 1.487580e+05 213588.00 -0.003257 620201.0 4708.099 4708.436 \n", 476 | "8672608 1.319240e+05 215333.00 0.008170 620201.0 4641.814 4660.519 \n", 477 | "\n", 478 | " Loindex Clsindex Retindex \n", 479 | "0 3535.229 3535.229 -0.011314 \n", 480 | "1 3497.661 3564.038 0.008149 \n", 481 | "2 3541.173 3541.727 -0.006260 \n", 482 | "3 3452.769 3471.456 -0.019841 \n", 483 | "4 3426.698 3480.130 0.002499 \n", 484 | "... ... ... ... \n", 485 | "8672604 4746.448 4786.738 0.001553 \n", 486 | "8672605 4678.247 4678.451 -0.022622 \n", 487 | "8672606 4648.128 4712.311 0.007237 \n", 488 | "8672607 4616.027 4619.877 -0.019615 \n", 489 | "8672608 4559.833 4563.772 -0.012144 \n", 490 | "\n", 491 | "[8672609 rows x 16 columns]" 492 | ] 493 | }, 494 | "execution_count": 3, 495 | "metadata": {}, 496 | "output_type": "execute_result" 497 | } 498 | ], 499 | "source": [ 500 | "stktrd" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 3, 506 | "id": "c1c9f19e-74b6-419c-a423-dc00b4fc24f7", 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [ 510 | "#生成代码列表以方便读取股票交易数据stktrd_1、stktrd_2、stktrd_4...\n", 511 | "codelist = stktrd.code.unique().tolist()\n", 512 | "stktrd_code = []\n", 513 | "for cd in codelist:\n", 514 | " stktrd_code.append('stktrd_'+ str(cd)) " 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 4, 520 | "id": "9e686eb9-4f81-4a49-9b7f-89c1fed30d80", 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "#读取stktrd_code系列文件(stktrd_1,stktrd_2...)\n", 525 | "for cd in stktrd_code:\n", 526 | " locals()[cd] = pd.read_hdf('alldata.h5', key=cd)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 5, 532 | "id": "0f939384-c993-47a2-afbd-b670a35f3cf5", 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "#将stktrd_code的名称等分成十份,分别命名为stktrd_code1,stktrd_code2...stktrd_code10\n", 537 | "def func(list, n=500):\n", 538 | " for i in range(0, len(list), n):\n", 539 | " yield list[i:i+n]\n", 540 | "\n", 541 | "split = func(stktrd_code, n=500)\n", 542 | "\n", 543 | "j = 0\n", 544 | "for i in split:\n", 545 | " j = j+1\n", 546 | " locals()['stktrd_code'+str(j)] = i" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "id": "5c2d2192-2551-4f1a-829c-4ad18708d7cf", 552 | "metadata": { 553 | "jp-MarkdownHeadingCollapsed": true, 554 | "tags": [] 555 | }, 556 | "source": [ 557 | "### **四、重新定义一些函数使得代码与因子计算公式相匹配**" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 6, 563 | "id": "df4c538c-89f7-4f76-88a5-cf2af4a13195", 564 | "metadata": { 565 | "tags": [] 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "def Log(sr):\n", 570 | " #自然对数函数\n", 571 | " return np.log(sr)\n", 572 | "\n", 573 | "def Rank(sr):\n", 574 | " #列-升序排序并转化成百分比\n", 575 | " return sr.rank(pct=True)\n", 576 | "\n", 577 | "def Delta(sr,period):\n", 578 | " #period日差分\n", 579 | " return sr.diff(period)\n", 580 | "\n", 581 | "def Delay(sr,period):\n", 582 | " #period阶滞后项\n", 583 | " return sr.shift(period)\n", 584 | "\n", 585 | "def Corr(x,y,window):\n", 586 | " #window日滚动相关系数\n", 587 | " return x.rolling(window).corr(y)\n", 588 | "\n", 589 | "def Cov(x,y,window):\n", 590 | " #window日滚动协方差\n", 591 | " return x.rolling(window).cov(y)\n", 592 | "\n", 593 | "def Sum(sr,window):\n", 594 | " #window日滚动求和\n", 595 | " return sr.rolling(window).sum()\n", 596 | "\n", 597 | "def Prod(sr,window):\n", 598 | " #window日滚动求乘积\n", 599 | " #return df.rolling(window).apply(lambda x: np.prod(x))\n", 600 | " sr1 = sr.copy()\n", 601 | " for i in range(window-1):\n", 602 | " sr1 = sr1*sr.shift(i+1)\n", 603 | " return sr1 \n", 604 | "\n", 605 | "def Mean(sr,window):\n", 606 | " #window日滚动求均值\n", 607 | " return sr.rolling(window).mean()\n", 608 | "\n", 609 | "def Std(sr,window):\n", 610 | " #window日滚动求标准差\n", 611 | " return sr.rolling(window).std()\n", 612 | "\n", 613 | "def Tsrank(sr, window):\n", 614 | " #window日序列末尾值的顺位\n", 615 | " #return df.rolling(window).apply(lambda x: rankdata(x)[-1])\n", 616 | " sr1 = sr.copy()\n", 617 | " sr1[0:window] = np.nan\n", 618 | " for i in range(window,len(sr)+1):\n", 619 | " sr1.iloc[i-1:i] = sr.iloc[i-window:i].rank().iloc[-1]\n", 620 | " return sr1\n", 621 | " \n", 622 | "def Tsmax(sr, window):\n", 623 | " #window日滚动求最大值 \n", 624 | " return sr.rolling(window).max()\n", 625 | "\n", 626 | "def Tsmin(sr, window):\n", 627 | " #window日滚动求最小值 \n", 628 | " return sr.rolling(window).min()\n", 629 | "\n", 630 | "def Sign(sr):\n", 631 | " #符号函数\n", 632 | " df = sr.to_frame()\n", 633 | " df1 = df.copy()\n", 634 | " df1[df1 > 0] = 1\n", 635 | " df1[df1 < 0] = -1\n", 636 | " df1[df1 == 0] = 0\n", 637 | " return df1.iloc[:,0]\n", 638 | "\n", 639 | "def Max(sr,n):\n", 640 | " #比较取大函数\n", 641 | " df = sr.to_frame()\n", 642 | " df1 = df.copy()\n", 643 | " df1[df1 < n] = n\n", 644 | " return df1.iloc[:,0]\n", 645 | "\n", 646 | "def Max2(sr1,sr2):\n", 647 | " #比较取小函数\n", 648 | " sr12 = sr1 - sr2\n", 649 | " df12 = sr12.to_frame()\n", 650 | " df12[df12 < 0] = 0\n", 651 | " sr12 = df12.iloc[:,0]\n", 652 | " sr12 = sr12 + sr2\n", 653 | " return sr12\n", 654 | "\n", 655 | "def Min(sr,n):\n", 656 | " #比较取小函数\n", 657 | " df = sr.to_frame()\n", 658 | " df1 = df.copy()\n", 659 | " df1[df1 > n] = n\n", 660 | " return df1.iloc[:,0]\n", 661 | "\n", 662 | "def Min2(sr1,sr2):\n", 663 | " #比较取小函数\n", 664 | " sr12 = sr1 - sr2\n", 665 | " df12 = sr12.to_frame()\n", 666 | " df12[df12 > 0] = 0\n", 667 | " sr12 = df12.iloc[:,0]\n", 668 | " sr12 = sr12 + sr2\n", 669 | " return sr12\n", 670 | "\n", 671 | "def Sma(sr,n,m):\n", 672 | " #sma均值\n", 673 | " #df1 = df.ewm(alpha=m/n).mean()\n", 674 | " return sr.ewm(alpha=m/n, adjust=False).mean()\n", 675 | "\n", 676 | "def Abs(sr):\n", 677 | " #求绝对值\n", 678 | " return sr.abs()\n", 679 | "\n", 680 | "def Sequence(n):\n", 681 | " #生成 1~n 的等差序列\n", 682 | " #return np.arange(1,n+1)\n", 683 | " return pd.Series(np.arange(1,n+1).tolist())\n", 684 | "\n", 685 | "'''\n", 686 | "def Regbeta(df,B,window):\n", 687 | " #回归求系数\n", 688 | " #temp=A.rolling(n).apply(lambda x:sp.stats.linregress(x,B)) \n", 689 | " #result = sm.OLS(A,B).fit()\n", 690 | " df1 = df.copy()\n", 691 | " df1.iloc[0:window] = None\n", 692 | " for i in range(window,len(df)+1):\n", 693 | " result = df.iloc[i-window:i,:].apply(lambda x: sp.stats.linregress(x,B) ,axis=0)\n", 694 | " df1.iloc[i-1,:] = result.iloc[0,:]\n", 695 | " return df1'''\n", 696 | "\n", 697 | "def Decaylinear(sr, window): #将dataframe运算转成np数组运算\n", 698 | "\n", 699 | " weights = np.arange(1,window+1,1)\n", 700 | " y = weights / weights.sum() #y是和为1的权重\n", 701 | " sr1 = sr.copy()\n", 702 | " \n", 703 | " for row in range(window - 1, sr.shape[0]):\n", 704 | " x = sr.iloc[row - window + 1: row + 1]\n", 705 | " sr1.iloc[row] = (x*y).sum()\n", 706 | " return sr1\n", 707 | "\n", 708 | "def Lowday(sr,window):\n", 709 | " #计算sr前window期时间序列中最小值距离当前时点的间隔\n", 710 | " sr1 = sr.copy()\n", 711 | " sr1[0:window] = np.nan\n", 712 | " for i in range(window, len(sr)+1):\n", 713 | " sr1.iloc[i-1:i] = window - 1 - sr.iloc[i-window:i].argmin()\n", 714 | " return sr1\n", 715 | "\n", 716 | "def Highday(sr,window):\n", 717 | " #计算sr前window期时间序列中最大值距离当前时点的间隔\n", 718 | " sr1 = sr.copy()\n", 719 | " sr1[0:window] = np.nan\n", 720 | " for i in range(window, len(sr)+1):\n", 721 | " sr1.iloc[i-1:i] = window - 1 - sr.iloc[i-window:i].argmax()\n", 722 | " return sr1\n", 723 | "\n", 724 | "def Wma(sr,window):\n", 725 | " weights = 0.9*np.arange(window-1,0-1,-1)\n", 726 | " sr1 = sr.copy()\n", 727 | " for row in range(window-1, len(sr)):\n", 728 | " sr1.iloc[0:window-1] = np.nan\n", 729 | " x = sr.iloc[row-window+1:row+1]\n", 730 | " sr1.iloc[row] = (x*weights).sum()\n", 731 | " return sr1\n", 732 | "\n", 733 | "def Count(part,window):\n", 734 | " #计算前n期满足条件condition的样本个数,此时输入的part为0、1变量\n", 735 | " part1 = pd.Series(np.zeros(part.shape))\n", 736 | " part1[0:window-1] = np.nan\n", 737 | " for i in range(window,len(part)+1): \n", 738 | " part1.iloc[i-1:i] = part.iloc[i-20:i].value_counts().get(1)\n", 739 | " return part1\n", 740 | "\n", 741 | "def Sumif(part,window):\n", 742 | " #对前n项条件求和,part为条件筛选后的数据\n", 743 | " part1 = pd.Series(np.zeros(part.shape))\n", 744 | " part1[0:window-1] = np.nan\n", 745 | " for i in range(window,len(part)+1): \n", 746 | " part1.iloc[i-1:i] = part.iloc[i-window:i].sum()\n", 747 | " return part1" 748 | ] 749 | }, 750 | { 751 | "cell_type": "markdown", 752 | "id": "d54d65dc-23a9-47f1-add3-f7cd576b81b7", 753 | "metadata": { 754 | "tags": [] 755 | }, 756 | "source": [ 757 | "### **五、定义alpha类**" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 78, 763 | "id": "6d80bb05-b3f3-46a7-8086-15fcaa05632c", 764 | "metadata": { 765 | "tags": [] 766 | }, 767 | "outputs": [], 768 | "source": [ 769 | "class Alphas:\n", 770 | " def __init__(self, stktrd):\n", 771 | "\n", 772 | " self.open = stktrd['open'] #开盘价\n", 773 | " self.high = stktrd['high'] #最高价\n", 774 | " self.low = stktrd['low'] #最低价\n", 775 | " self.close = stktrd['close']#收盘价\n", 776 | " self.close_prev = stktrd['close'].shift(1)#前一天收盘价\n", 777 | " self.volume = stktrd['volumn']#交易量\n", 778 | " self.value = stktrd['value']#公司总市值\n", 779 | " self.amount = stktrd['amount']#交易额\n", 780 | " self.returns = stktrd['return'] #每日收益率\n", 781 | " self.vwap = stktrd['amount']/(stktrd['volumn']+1)#交易均价\n", 782 | " self.benchmark_open = stktrd['Opnindex']#指数开盘价series\n", 783 | " self.benchmark_close = stktrd['Clsindex']#指数收盘价series\n", 784 | " \n", 785 | " def alpha_1(self): #平均1751个数据\n", 786 | " ##### (-1 * CORR(RANK(DELTA(LOG(VOLUME), 1)), RANK(((CLOSE - OPEN) / OPEN)), 6))#### \n", 787 | " return (-1 * Corr(Rank(Delta(Log(self.volume), 1)), Rank(((self.close - self.open) / self.open)), 6))\n", 788 | " \n", 789 | " def alpha_2(self): #1783\n", 790 | " ##### -1 * delta((((close-low)-(high-close))/(high-low)),1))####\n", 791 | " return -1*Delta((((self.close-self.low)-(self.high-self.close))/(self.high-self.low)),1) \n", 792 | " \n", 793 | " def alpha_3(self): \n", 794 | " ##### SUM((CLOSE=DELAY(CLOSE,1)?0:CLOSE-(CLOSE>DELAY(CLOSE,1)?MIN(LOW,DELAY(CLOSE,1)):MAX(HIGH,DELAY(CLOSE,1)))),6) ####\n", 795 | " cond1 = (self.close == Delay(self.close,1))\n", 796 | " cond2 = (self.close > Delay(self.close,1))\n", 797 | " cond3 = (self.close < Delay(self.close,1))\n", 798 | " part = pd.Series(np.zeros(self.close.shape))\n", 799 | " part[cond1] = 0\n", 800 | " part[cond2] = self.close - Min2(self.low,Delay(self.close,1))\n", 801 | " part[cond3] = self.close - Max2(self.high,Delay(self.close,1))\n", 802 | " return Sum(part, 6)\n", 803 | " \n", 804 | " def alpha_4(self): \n", 805 | " #####((((SUM(CLOSE, 8) / 8) + STD(CLOSE, 8)) < (SUM(CLOSE, 2) / 2)) ? (-1 * 1) : (((SUM(CLOSE, 2) / 2) <((SUM(CLOSE, 8) / 8) - STD(CLOSE, 8))) ? 1 : (((1 < (VOLUME / MEAN(VOLUME,20))) || ((VOLUME /MEAN(VOLUME,20)) == 1)) ? 1 : (-1 * 1))))\n", 806 | " cond1 = ((Sum(self.close, 8)/8 + Std(self.close, 8)) < Sum(self.close, 2)/2)\n", 807 | " cond2 = ((Sum(self.close, 8)/8 + Std(self.close, 8)) > Sum(self.close, 2)/2)\n", 808 | " cond3 = ((Sum(self.close, 8)/8 + Std(self.close, 8)) == Sum(self.close, 2)/2)\n", 809 | " cond4 = (self.volume/Mean(self.volume, 20) >= 1)\n", 810 | " part = pd.Series(np.zeros(self.close.shape))\n", 811 | " part[cond1] = -1\n", 812 | " part[cond2] = 1\n", 813 | " part[cond3][cond4] = 1\n", 814 | " part[cond3][~cond4] = -1\n", 815 | " \n", 816 | " return part\n", 817 | " \n", 818 | " def alpha_5(self): #1447\n", 819 | " ####(-1 * TSMAX(CORR(TSRANK(VOLUME, 5), TSRANK(HIGH, 5), 5), 3))###\n", 820 | " return -1*Tsmax(Corr(Tsrank(self.volume, 5),Tsrank(self.high, 5),5), 3)\n", 821 | " \n", 822 | " def alpha_6(self): #1779\n", 823 | " ####(RANK(SIGN(DELTA((((OPEN * 0.85) + (HIGH * 0.15))), 4)))* -1)### \n", 824 | " return -1*Rank(Sign(Delta(((self.open * 0.85) + (self.high * 0.15)), 4)))\n", 825 | " \n", 826 | " def alpha_7(self): #1782\n", 827 | " ####((RANK(MAX((VWAP - CLOSE), 3)) + RANK(MIN((VWAP - CLOSE), 3))) * RANK(DELTA(VOLUME, 3)))###\n", 828 | " return ((Rank(Max((self.vwap - self.close), 3)) + Rank(Min((self.vwap - self.close), 3))) * Rank(Delta(self.volume, 3)))\n", 829 | " \n", 830 | " def alpha_8(self): #1779\n", 831 | " ####RANK(DELTA(((((HIGH + LOW) / 2) * 0.2) + (VWAP * 0.8)), 4) * -1)### \n", 832 | " return Rank(Delta(((((self.high + self.low) / 2) * 0.2) + (self.vwap * 0.8)), 4) * -1)\n", 833 | " \n", 834 | " def alpha_9(self): #1790\n", 835 | " ####SMA(((HIGH+LOW)/2-(DELAY(HIGH,1)+DELAY(LOW,1))/2)*(HIGH-LOW)/VOLUME,7,2)### \n", 836 | " return Sma(((self.high+self.low)/2-(Delay(self.high,1)+Delay(self.low,1))/2)*(self.high-self.low)/self.volume,7,2)\n", 837 | " \n", 838 | " def alpha_10(self): \n", 839 | " ####(RANK(MAX(((RET < 0) ? STD(RET, 20) : CLOSE)^2),5))###\n", 840 | " cond = (self.returns < 0)\n", 841 | " part = pd.Series(np.zeros(self.close.shape))\n", 842 | " part[cond] = Std(self.returns, 20)\n", 843 | " part[~cond] = self.close\n", 844 | " part = part**2\n", 845 | " \n", 846 | " return Rank(Max(part, 5))\n", 847 | " \n", 848 | " def alpha_11(self): #1782\n", 849 | " ####SUM(((CLOSE-LOW)-(HIGH-CLOSE))/(HIGH-LOW)*VOLUME,6)### \n", 850 | " return Sum(((self.close-self.low)-(self.high-self.close))/(self.high-self.low)*self.volume,6)\n", 851 | " \n", 852 | " def alpha_12(self): #1779\n", 853 | " ####(RANK((OPEN - (SUM(VWAP, 10) / 10)))) * (-1 * (RANK(ABS((CLOSE - VWAP)))))### \n", 854 | " return (Rank((self.open - (Sum(self.vwap, 10) / 10)))) * (-1 * (Rank(Abs((self.close - self.vwap)))))\n", 855 | " \n", 856 | " def alpha_13(self): #1790\n", 857 | " ####(((HIGH * LOW)^0.5) - VWAP)###\n", 858 | " return (((self.high * self.low)**0.5) - self.vwap)\n", 859 | " \n", 860 | " def alpha_14(self): #1776\n", 861 | " ####CLOSE-DELAY(CLOSE,5)###\n", 862 | " return self.close-Delay(self.close,5)\n", 863 | " \n", 864 | " def alpha_15(self): #1790\n", 865 | " ####OPEN/DELAY(CLOSE,1)-1###\n", 866 | " return self.open/Delay(self.close,1)-1\n", 867 | " \n", 868 | " def alpha_16(self): #1736 \n", 869 | " ####(-1 * TSMAX(RANK(CORR(RANK(VOLUME), RANK(VWAP), 5)), 5))###\n", 870 | " return (-1 * Tsmax(Rank(Corr(Rank(self.volume), Rank(self.vwap), 5)), 5))\n", 871 | " \n", 872 | " def alpha_17(self): #1776 \n", 873 | " ####RANK((VWAP - MAX(VWAP, 15)))^DELTA(CLOSE, 5)###\n", 874 | " return Rank((self.vwap - Max(self.vwap, 15)))**Delta(self.close, 5)\n", 875 | " \n", 876 | " def alpha_18(self): #1776 \n", 877 | " ####CLOSE/DELAY(CLOSE,5)###\n", 878 | " return self.close/Delay(self.close,5) \n", 879 | " \n", 880 | " def alpha_19(self): \n", 881 | " ####(CLOSE Delay(self.close,5))\n", 885 | " part = pd.Series(np.zeros(self.close.shape))\n", 886 | " part[cond1] = (self.close-Delay(self.close,5))/Delay(self.close,5)\n", 887 | " part[cond2] = 0\n", 888 | " part[cond3] = (self.close-Delay(self.close,5))/self.close\n", 889 | " \n", 890 | " return part\n", 891 | " \n", 892 | " def alpha_20(self): #1773 \n", 893 | " ####(CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*100###\n", 894 | " return (self.close-Delay(self.close,6))/Delay(self.close,6)*100\n", 895 | " \n", 896 | " def alpha_21(self): #reg?\n", 897 | " ####REGBETA(MEAN(CLOSE,6),SEQUENCE(6))###\n", 898 | " return 0\n", 899 | " \n", 900 | " def alpha_22(self): #1736 \n", 901 | " ####SMA(((CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6)-DELAY((CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6),3)),12,1)###\n", 902 | " return Sma(((self.close-Mean(self.close,6))/Mean(self.close,6)-Delay((self.close-Mean(self.close,6))/Mean(self.close,6),3)),12,1)\n", 903 | " \n", 904 | " def alpha_23(self): \n", 905 | " ####SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1) / (SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1) + SMA((CLOSE<=DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1))*100###\n", 906 | " cond = (self.close > Delay(self.close,1))\n", 907 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 908 | " part1[cond] = Std(self.close,20)\n", 909 | " part1[~cond] = 0\n", 910 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 911 | " part2[~cond] = Std(self.close,20)\n", 912 | " part2[cond] = 0\n", 913 | " \n", 914 | " return 100*Sma(part1,20,1)/(Sma(part1,20,1) + Sma(part2,20,1))\n", 915 | " \n", 916 | " def alpha_24(self): #1776 \n", 917 | " ####SMA(CLOSE-DELAY(CLOSE,5),5,1)###\n", 918 | " return Sma(self.close-Delay(self.close,5),5,1)\n", 919 | " \n", 920 | " def alpha_25(self): #886 数据量较少\n", 921 | " ####((-1 * RANK((DELTA(CLOSE, 7) * (1 - RANK(DECAYLINEAR((VOLUME / MEAN(VOLUME,20)), 9)))))) * (1 + RANK(SUM(RET, 250))))###\n", 922 | " return ((-1 * Rank((Delta(self.close, 7) * (1 - Rank(Decaylinear((self.volume / Mean(self.volume,20)), 9)))))) * (1 + Rank(Sum(self.returns, 250))))\n", 923 | " \n", 924 | " def alpha_26(self): #平均数据量914,获得的数据量较少 \n", 925 | " ####((((SUM(CLOSE, 7) / 7) - CLOSE)) + ((CORR(VWAP, DELAY(CLOSE, 5), 230))))###\n", 926 | " return ((((Sum(self.close, 7) / 7) - self.close)) + ((Corr(self.vwap, Delay(self.close, 5), 230))))\n", 927 | " \n", 928 | " def alpha_27(self): \n", 929 | " ####WMA((CLOSE-DELAY(CLOSE,3))/DELAY(CLOSE,3)*100+(CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*100,12)###\n", 930 | " A = (self.close-Delay(self.close,3))/Delay(self.close,3)*100+(self.close-Delay(self.close,6))/Delay(self.close,6)*100\n", 931 | " return Wma(A, 12)\n", 932 | " \n", 933 | " def alpha_28(self): #1728 \n", 934 | " ####3*SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1)-2*SMA(SMA((CLOSE-TSMIN(LOW,9))/(MAX(HIGH,9)-TSMAX(LOW,9))*100,3,1),3,1)###\n", 935 | " return 3*Sma((self.close-Tsmin(self.low,9))/(Tsmax(self.high,9)-Tsmin(self.low,9))*100,3,1)-2*Sma(Sma((self.close-Tsmin(self.low,9))/(Max(self.high,9)-Tsmax(self.low,9))*100,3,1),3,1)\n", 936 | " \n", 937 | " def alpha_29(self): #1773 \n", 938 | " ####(CLOSE-DELAY(CLOSE,6))/DELAY(CLOSE,6)*VOLUME###\n", 939 | " return (self.close-Delay(self.close,6))/Delay(self.close,6)*self.volume\n", 940 | " \n", 941 | " def alpha_30(self): #reg?\n", 942 | " ####WMA((REGRESI(CLOSE/DELAY(CLOSE)-1,MKT,SMB,HML, 60))^2,20)###\n", 943 | " return 0\n", 944 | " \n", 945 | " def alpha_31(self): #1714\n", 946 | " ####(CLOSE-MEAN(CLOSE,12))/MEAN(CLOSE,12)*100###\n", 947 | " return (self.close-Mean(self.close,12))/Mean(self.close,12)*100\n", 948 | " \n", 949 | " def alpha_32(self): #1505\n", 950 | " ####(-1 * SUM(RANK(CORR(RANK(HIGH), RANK(VOLUME), 3)), 3))###\n", 951 | " return (-1 * Sum(Rank(Corr(Rank(self.high), Rank(self.volume), 3)), 3))\n", 952 | " \n", 953 | " def alpha_33(self): #904 数据量较少\n", 954 | " ####((((-1 * TSMIN(LOW, 5)) + DELAY(TSMIN(LOW, 5), 5)) * RANK(((SUM(RET, 240) - SUM(RET, 20)) / 220))) *TSRANK(VOLUME, 5))###\n", 955 | " return ((((-1 * Tsmin(self.low, 5)) + Delay(Tsmin(self.low, 5), 5)) * Rank(((Sum(self.returns, 240) - Sum(self.returns, 20)) / 220))) *Tsrank(self.volume, 5))\n", 956 | " \n", 957 | " def alpha_34(self): #1714\n", 958 | " ####MEAN(CLOSE,12)/CLOSE###\n", 959 | " return Mean(self.close,12)/self.close\n", 960 | " \n", 961 | " def alpha_35(self): #1790 (OPEN * 0.65) +(OPEN *0.35)有问题\n", 962 | " ####(MIN(RANK(DECAYLINEAR(DELTA(OPEN, 1), 15)), RANK(DECAYLINEAR(CORR((VOLUME), ((OPEN * 0.65) +(OPEN *0.35)), 17),7))) * -1)###\n", 963 | " return (Min2(Rank(Decaylinear(Delta(self.open, 1), 15)), Rank(Decaylinear(Corr((self.volume), ((self.open * 0.65) +(self.open *0.35)), 17),7))) * -1)\n", 964 | " \n", 965 | " def alpha_36(self): #1714\n", 966 | " ####RANK(SUM(CORR(RANK(VOLUME), RANK(VWAP),6), 2))###\n", 967 | " return Rank(Sum(Corr(Rank(self.volume), Rank(self.vwap),6 ), 2))\n", 968 | " \n", 969 | " def alpha_37(self): #1713\n", 970 | " ####(-1 * RANK(((SUM(OPEN, 5) * SUM(RET, 5)) - DELAY((SUM(OPEN, 5) * SUM(RET, 5)), 10))))###\n", 971 | " return (-1 * Rank(((Sum(self.open, 5) * Sum(self.returns, 5)) - Delay((Sum(self.open, 5) * Sum(self.returns, 5)), 10))))\n", 972 | " \n", 973 | " def alpha_38(self): \n", 974 | " ####(((SUM(HIGH, 20) / 20) < HIGH) ? (-1 * DELTA(HIGH, 2)) : 0)\n", 975 | " cond = ((Sum(self.high, 20) / 20) < self.high)\n", 976 | " part = pd.Series(np.zeros(self.close.shape))\n", 977 | " part[cond] = -1 * Delta(self.high, 2)\n", 978 | " part[~cond] = 0\n", 979 | " \n", 980 | " return part\n", 981 | " \n", 982 | " def alpha_39(self): #1666\n", 983 | " ####((RANK(DECAYLINEAR(DELTA((CLOSE), 2),8)) - RANK(DECAYLINEAR(CORR(((VWAP * 0.3) + (OPEN * 0.7)),SUM(MEAN(VOLUME,180), 37), 14), 12))) * -1)###\n", 984 | " return ((Rank(Decaylinear(Delta((self.close), 2),8)) - Rank(Decaylinear(Corr(((self.vwap * 0.3) + (self.open * 0.7)),Sum(Mean(self.volume,180), 37), 14), 12))) * -1)\n", 985 | " \n", 986 | " def alpha_40(self): \n", 987 | " ####SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:0),26)/SUM((CLOSE<=DELAY(CLOSE,1)?VOLUME:0),26)*100###\n", 988 | " cond = (self.close > Delay(self.close,1))\n", 989 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 990 | " part1[cond] = self.volume\n", 991 | " part1[~cond] = 0\n", 992 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 993 | " part2[~cond] = self.volume\n", 994 | " part2[cond] = 0\n", 995 | " \n", 996 | " return Sum(part1,26)/Sum(part2,26)*100\n", 997 | " \n", 998 | " def alpha_41(self): #1782\n", 999 | " ####(RANK(MAX(DELTA((VWAP), 3), 5))* -1)###\n", 1000 | " return (Rank(Max(Delta((self.vwap), 3), 5))* -1)\n", 1001 | " \n", 1002 | " def alpha_42(self): #1399 数据量较少\n", 1003 | " ####((-1 * RANK(STD(HIGH, 10))) * CORR(HIGH, VOLUME, 10))###\n", 1004 | " return ((-1 * Rank(Std(self.high, 10))) * Corr(self.high, self.volume, 10))\n", 1005 | " \n", 1006 | " def alpha_43(self): \n", 1007 | " ####SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE Delay(self.close,1))\n", 1009 | " cond2 = (self.close < Delay(self.close,1))\n", 1010 | " cond3 = (self.close == Delay(self.close,1))\n", 1011 | " part = pd.Series(np.zeros(self.close.shape))\n", 1012 | " part[cond1] = self.volume\n", 1013 | " part[cond2] = -self.volume\n", 1014 | " part[cond3] = 0\n", 1015 | " \n", 1016 | " return Sum(part,6)\n", 1017 | " \n", 1018 | " def alpha_44(self): #1748\n", 1019 | " ####(TSRANK(DECAYLINEAR(CORR(((LOW )), MEAN(VOLUME,10), 7), 6),4) + TSRANK(DECAYLINEAR(DELTA((VWAP),3), 10), 15))###\n", 1020 | " return (Tsrank(Decaylinear(Corr(((self.low)), Mean(self.volume,10), 7), 6),4) + Tsrank(Decaylinear(Delta((self.vwap),3), 10), 15))\n", 1021 | " \n", 1022 | " def alpha_45(self): #1070 数据量较少\n", 1023 | " ####(RANK(DELTA((((CLOSE * 0.6) + (OPEN *0.4))), 1)) * RANK(CORR(VWAP, MEAN(VOLUME,150), 15)))###\n", 1024 | " return (Rank(Delta((((self.close * 0.6) + (self.open *0.4))), 1)) * Rank(Corr(self.vwap, Mean(self.volume,150), 15)))\n", 1025 | " \n", 1026 | " def alpha_46(self): #1630\n", 1027 | " ####(MEAN(CLOSE,3)+MEAN(CLOSE,6)+MEAN(CLOSE,12)+MEAN(CLOSE,24))/(4*CLOSE)###\n", 1028 | " return (Mean(self.close,3)+Mean(self.close,6)+Mean(self.close,12)+Mean(self.close,24))/(4*self.close)\n", 1029 | " \n", 1030 | " def alpha_47(self): #1759\n", 1031 | " ####SMA((TSMAX(HIGH,6)-CLOSE)/(TSMAX(HIGH,6)-TSMIN(LOW,6))*100,9,1)###\n", 1032 | " return Sma((Tsmax(self.high,6)-self.close)/(Tsmax(self.high,6)-Tsmin(self.low,6))*100,9,1)\n", 1033 | " \n", 1034 | " def alpha_48(self): #1657\n", 1035 | " ####(-1*((RANK(((SIGN((CLOSE - DELAY(CLOSE, 1))) + SIGN((DELAY(CLOSE, 1) - DELAY(CLOSE, 2)))) + SIGN((DELAY(CLOSE, 2) - DELAY(CLOSE, 3)))))) * SUM(VOLUME, 5)) / SUM(VOLUME, 20))###\n", 1036 | " return (-1*((Rank(((Sign((self.close - Delay(self.close, 1))) + Sign((Delay(self.close, 1) - Delay(self.close, 2)))) + Sign((Delay(self.close, 2) - Delay(self.close, 3)))))) * Sum(self.volume, 5)) / Sum(self.volume, 20))\n", 1037 | " \n", 1038 | " def alpha_49(self): \n", 1039 | " ####SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12) / (SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12) + SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))\n", 1040 | " cond = ((self.high + self.low) > (Delay(self.high,1) + Delay(self.low,1)))\n", 1041 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1042 | " part1[cond] = 0\n", 1043 | " part1[~cond] = Max2(Abs(self.high - Delay(self.high,1)), Abs(self.low - Delay(self.low,1)))\n", 1044 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1045 | " part2[~cond] = 0\n", 1046 | " part2[cond] = Max2(Abs(self.high - Delay(self.high,1)), Abs(self.low - Delay(self.low,1)))\n", 1047 | " \n", 1048 | " return Sum(part1, 12) / (Sum(part1, 12) + Sum(part2, 12))\n", 1049 | " \n", 1050 | " def alpha_50(self): \n", 1051 | " ####SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))-SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)/(SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))###\n", 1052 | " cond = ((self.high + self.low) <= (Delay(self.high,1) + Delay(self.low,1)))\n", 1053 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1054 | " part1[cond] = 0\n", 1055 | " part1[~cond] = Max2(Abs(self.high - Delay(self.high,1)), Abs(self.low - Delay(self.low,1)))\n", 1056 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1057 | " part2[~cond] = 0\n", 1058 | " part2[cond] = Max2(Abs(self.high - Delay(self.high,1)), Abs(self.low - Delay(self.low,1)))\n", 1059 | " \n", 1060 | " return (Sum(part1, 12) - Sum(part2, 12)) / (Sum(part1, 12) + Sum(part2, 12)) \n", 1061 | "\n", 1062 | " def alpha_51(self): \n", 1063 | " ####SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12) / (SUM(((HIGH+LOW)<=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12)+SUM(((HIGH+LOW)>=(DELAY(HIGH,1)+DELAY(LOW,1))?0:MAX(ABS(HIGH-DELAY(HIGH,1)),ABS(LOW-DELAY(LOW,1)))),12))###\n", 1064 | " cond = ((self.high + self.low) <= (Delay(self.high,1) + Delay(self.low,1)))\n", 1065 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1066 | " part1[cond] = 0\n", 1067 | " part1[~cond] = Max2(Abs(self.high - Delay(self.high,1)), Abs(self.low - Delay(self.low,1)))\n", 1068 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1069 | " part2[~cond] = 0\n", 1070 | " part2[cond] = Max2(Abs(self.high - Delay(self.high,1)), Abs(self.low - Delay(self.low,1)))\n", 1071 | " \n", 1072 | " return Sum(part1, 12) / (Sum(part1, 12) + Sum(part2, 12))\n", 1073 | " \n", 1074 | " def alpha_52(self): #1611\n", 1075 | " ####SUM(MAX(0,HIGH-DELAY((HIGH+LOW+CLOSE)/3,1)),26)/SUM(MAX(0,DELAY((HIGH+LOW+CLOSE)/3,1)-L),26)*100###\n", 1076 | " return Sum(Max(self.high-Delay((self.high+self.low+self.close)/3,1),0),26)/Sum(Max(Delay((self.high+self.low+self.close)/3,1)-self.low, 0),26)*100\n", 1077 | " \n", 1078 | " def alpha_53(self): \n", 1079 | " ####COUNT(CLOSE>DELAY(CLOSE,1),12)/12*100###\n", 1080 | " cond = (self.close > Delay(self.close,1))\n", 1081 | " part = pd.Series(np.zeros(self.close.shape))\n", 1082 | " part[cond] = 1 #把满足条件的记为1,之后统计1的个数\n", 1083 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1084 | " part1[0:12] = np.nan\n", 1085 | " for i in range(12,len(part1)+1): \n", 1086 | " part1.iloc[i-1:i] = part.iloc[i-12:i].value_counts().get(1)\n", 1087 | " \n", 1088 | " return part1\n", 1089 | " \n", 1090 | " def alpha_54(self): #1729\n", 1091 | " ####(-1 * RANK((STD(ABS(CLOSE - OPEN)) + (CLOSE - OPEN)) + CORR(CLOSE, OPEN,10)))###\n", 1092 | " return (-1 * Rank(((Abs(self.close - self.open)).std() + (self.close - self.open)) + Corr(self.close, self.open,10)))\n", 1093 | " \n", 1094 | " def alpha_55(self): #公式有问题\n", 1095 | " ####SUM(16*(CLOSE-DELAY(CLOSE,1)+(CLOSE-OPEN)/2+DELAY(CLOSE,1)-DELAY(OPEN,1))/((ABS(HIGH-DELAY(CLOSE,1))>ABS(LOW-DELAY(CLOSE,1)) & ABS(HIGH-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))?ABS(HIGH-DELAY(CLOSE,1))+ABS(LOW-DELAY(CLOSE,1))/2 + ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:(ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1)) & ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(CLOSE,1))?ABS(LOW-DELAY(CLOSE,1))+ABS(HIGH-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:ABS(HIGH-DELAY(LOW,1))+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4)))*MAX(ABS(HIGH-DELAY(CLOSE,1)),ABS(LOW-DELAY(CLOSE,1))),20)\n", 1096 | " A = Abs(self.high - Delay(self.close, 1))\n", 1097 | " B = Abs(self.low - Delay(self.close, 1))\n", 1098 | " C = Abs(self.high - Delay(self.low, 1))\n", 1099 | " cond1 = ((A > B) & (A > C))\n", 1100 | " cond2 = ((B > C) & (B > A))\n", 1101 | " cond3 = ((C >= A) & (C >= B))\n", 1102 | " part0 = 16*(self.close + (self.close - self.open)/2 - Delay(self.open,1))\n", 1103 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1104 | " part1[cond1] = Abs(self.high - Delay(self.close, 1)) + Abs(self.low - Delay(self.close, 1))/2 + Abs(Delay(self.close, 1)-Delay(self.open, 1))/4\n", 1105 | " part1[cond2] = Abs(self.low - Delay(self.close, 1)) + Abs(self.high - Delay(self.close, 1))/2 + Abs(Delay(self.close, 1)-Delay(self.open, 1))/4\n", 1106 | " part1[cond3] = Abs(self.high - Delay(self.low, 1)) + Abs(Delay(self.close, 1)-Delay(self.open, 1))/4\n", 1107 | " \n", 1108 | " return Sum(part0/part1,20)\n", 1109 | " \n", 1110 | " def alpha_56(self): \n", 1111 | " ####(RANK((OPEN - TSMIN(OPEN, 12))) < RANK((RANK(CORR(SUM(((HIGH + LOW) / 2), 19),SUM(MEAN(VOLUME,40), 19), 13))^5)))###\n", 1112 | " A = Rank((self.open - Tsmin(self.open, 12)))\n", 1113 | " B = Rank((Rank(Corr(Sum(((self.high + self.low) / 2), 19),Sum(Mean(self.volume,40), 19), 13))**5))\n", 1114 | " cond = (A < B)\n", 1115 | " part = pd.Series(np.zeros(self.close.shape))\n", 1116 | " part[cond] = 1\n", 1117 | " #part[~cond] = 0\n", 1118 | " return part\n", 1119 | " \n", 1120 | " def alpha_57(self): #1736\n", 1121 | " ####SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1)###\n", 1122 | " return Sma((self.close-Tsmin(self.low,9))/(Tsmax(self.high,9)-Tsmin(self.low,9))*100,3,1)\n", 1123 | " \n", 1124 | " def alpha_58(self): \n", 1125 | " ####COUNT(CLOSE>DELAY(CLOSE,1),20)/20*100###\n", 1126 | "\n", 1127 | " cond = (self.close > Delay(self.close,1))\n", 1128 | " part = pd.Series(np.zeros(self.close.shape))\n", 1129 | " part[cond] = 1 #把满足条件的记为1,之后统计1的个数\n", 1130 | " '''\n", 1131 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1132 | " part1[0:19] = np.nan\n", 1133 | " for i in range(20,len(part1)+1): \n", 1134 | " part1.iloc[i-1:i] = part.iloc[i-20:i].value_counts().get(1)\n", 1135 | " return part1'''\n", 1136 | " return Count(part,20)\n", 1137 | " \n", 1138 | " \n", 1139 | " def alpha_59(self): \n", 1140 | " ####SUM((CLOSE=DELAY(CLOSE,1)?0:CLOSE-(CLOSE>DELAY(CLOSE,1)?MIN(LOW,DELAY(CLOSE,1)):MAX(HIGH,DELAY(CLOSE,1)))),20)###\n", 1141 | " cond1 = (self.close == Delay(self.close,1))\n", 1142 | " cond2 = (self.close > Delay(self.close,1))\n", 1143 | " cond3 = (self.close < Delay(self.close,1))\n", 1144 | " part = pd.Series(np.zeros(self.close.shape))\n", 1145 | " part[cond1] = 0\n", 1146 | " part[cond2] = self.close - Min2(self.low,Delay(self.close,1))\n", 1147 | " part[cond3] = self.close - Max2(self.low,Delay(self.close,1))\n", 1148 | " \n", 1149 | " return Sum(part, 20)\n", 1150 | " \n", 1151 | " def alpha_60(self): #1635\n", 1152 | " ####SUM(((CLOSE-LOW)-(HIGH-CLOSE))/(HIGH-LOW)*VOLUME,20)###\n", 1153 | " return Sum(((self.close-self.low)-(self.high-self.close))/(self.high-self.low)*self.volume,20)\n", 1154 | "\n", 1155 | " def alpha_61(self): #1790\n", 1156 | " ####(MAX(RANK(DECAYLINEAR(DELTA(VWAP, 1), 12)),RANK(DECAYLINEAR(RANK(CORR((LOW),MEAN(VOLUME,80), 8)), 17))) * -1)###\n", 1157 | " return (Max2(Rank(Decaylinear(Delta(self.vwap, 1), 12)),Rank(Decaylinear(Rank(Corr((self.low),Mean(self.volume,80), 8)), 17))) * -1)\n", 1158 | " \n", 1159 | " def alpha_62(self): #1479\n", 1160 | " ####(-1 * CORR(HIGH, RANK(VOLUME), 5))###\n", 1161 | " return (-1 * Corr(self.high, Rank(self.volume), 5))\n", 1162 | " \n", 1163 | " def alpha_63(self): #1789\n", 1164 | " ####SMA(MAX(CLOSE-DELAY(CLOSE,1),0),6,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),6,1)*100###\n", 1165 | " return Sma(Max(self.close-Delay(self.close,1),0),6,1)/Sma(Abs(self.close-Delay(self.close,1)),6,1)*100\n", 1166 | " \n", 1167 | " def alpha_64(self): #1774\n", 1168 | " ####(MAX(RANK(DECAYLINEAR(CORR(RANK(VWAP), RANK(VOLUME), 4), 4)),RANK(DECAYLINEAR(MAX(CORR(RANK(CLOSE), RANK(MEAN(VOLUME,60)), 4), 13), 14))) * -1)###\n", 1169 | " return (Max2(Rank(Decaylinear(Corr(Rank(self.vwap), Rank(self.volume), 4), 4)),Rank(Decaylinear(Max(Corr(Rank(self.close), Rank(Mean(self.volume,60)), 4), 13), 14))) * -1)\n", 1170 | " \n", 1171 | " def alpha_65(self): #1759\n", 1172 | " ####MEAN(CLOSE,6)/CLOSE###\n", 1173 | " return Mean(self.close,6)/self.close\n", 1174 | " \n", 1175 | " def alpha_66(self): #1759\n", 1176 | " ####(CLOSE-MEAN(CLOSE,6))/MEAN(CLOSE,6)*100###\n", 1177 | " return (self.close-Mean(self.close,6))/Mean(self.close,6)*100\n", 1178 | " \n", 1179 | " def alpha_67(self): #1759\n", 1180 | " ####SMA(MAX(CLOSE-DELAY(CLOSE,1),0),24,1)/SMA(ABS(CLOSE-DELAY(CLOSE,1)),24,1)*100###\n", 1181 | " return Sma(Max(self.close-Delay(self.close,1),0),24,1)/Sma(Abs(self.close-Delay(self.close,1)),24,1)*100\n", 1182 | " \n", 1183 | " def alpha_68(self): #1790\n", 1184 | " ####SMA(((HIGH+LOW)/2-(DELAY(HIGH,1)+DELAY(LOW,1))/2)*(HIGH-LOW)/VOLUME,15,2)###\n", 1185 | " return Sma(((self.high+self.low)/2-(Delay(self.high,1)+Delay(self.low,1))/2)*(self.high-self.low)/self.volume,15,2)\n", 1186 | " \n", 1187 | " def alpha_69(self): \n", 1188 | " ####(SUM(DTM,20)>SUM(DBM,20)? (SUM(DTM,20)-SUM(DBM,20))/SUM(DTM,20): (SUM(DTM,20)=SUM(DBM,20)?0: (SUM(DTM,20)-SUM(DBM,20))/SUM(DBM,20)))###\n", 1189 | " ####DTM (OPEN<=DELAY(OPEN,1)?0:MAX((HIGH-OPEN),(OPEN-DELAY(OPEN,1))))\n", 1190 | " ####DBM (OPEN>=DELAY(OPEN,1)?0:MAX((OPEN-LOW),(OPEN-DELAY(OPEN,1))))\n", 1191 | " cond1 = (self.open <= Delay(self.open,1))\n", 1192 | " cond2 = (self.open >= Delay(self.open,1))\n", 1193 | " \n", 1194 | " DTM = pd.Series(np.zeros(self.close.shape))\n", 1195 | " #DTM[cond1] = 0\n", 1196 | " DTM[~cond1] = Max2((self.high-self.open),(self.open-Delay(self.open,1)))\n", 1197 | " \n", 1198 | " DBM = pd.Series(np.zeros(self.close.shape))\n", 1199 | " #DBM[cond2] = 0\n", 1200 | " DBM[~cond2] = Max2((self.open-self.low),(self.open-Delay(self.open,1)))\n", 1201 | " \n", 1202 | " cond3 = (Sum(DTM,20) > Sum(DBM,20))\n", 1203 | " cond4 = (Sum(DTM,20)== Sum(DBM,20))\n", 1204 | " cond5 = (Sum(DTM,20) < Sum(DBM,20))\n", 1205 | " part = pd.Series(np.zeros(self.close.shape))\n", 1206 | " part[cond3] = (Sum(DTM,20)-Sum(DBM,20))/Sum(DTM,20)\n", 1207 | " #part[cond4] = 0\n", 1208 | " part[cond5] = (Sum(DTM,20)-Sum(DBM,20))/Sum(DBM,20)\n", 1209 | " return part\n", 1210 | " \n", 1211 | " def alpha_70(self): #1759\n", 1212 | " ####STD(AMOUNT,6)###\n", 1213 | " return Std(self.amount,6)\n", 1214 | " \n", 1215 | " def alpha_71(self): #1630\n", 1216 | " ####(CLOSE-MEAN(CLOSE,24))/MEAN(CLOSE,24)*100###\n", 1217 | " return (self.close-Mean(self.close,24))/Mean(self.close,24)*100\n", 1218 | " \n", 1219 | " def alpha_72(self): #1759\n", 1220 | " ####SMA((TSMAX(HIGH,6)-CLOSE)/(TSMAX(HIGH,6)-TSMIN(LOW,6))*100,15,1)###\n", 1221 | " return Sma((Tsmax(self.high,6)-self.close)/(Tsmax(self.high,6)-Tsmin(self.low,6))*100,15,1)\n", 1222 | " \n", 1223 | " def alpha_73(self): #1729\n", 1224 | " ####((TSRANK(DECAYLINEAR(DECAYLINEAR(CORR((CLOSE), VOLUME, 10), 16), 4), 5) - RANK(DECAYLINEAR(CORR(VWAP, MEAN(VOLUME,30), 4),3))) * -1)###\n", 1225 | " return ((Tsrank(Decaylinear(Decaylinear(Corr((self.close), self.volume, 10), 16), 4), 5) - Rank(Decaylinear(Corr(self.vwap, Mean(self.volume,30), 4),3))) * -1) \n", 1226 | " \n", 1227 | " def alpha_74(self): #1402\n", 1228 | " ####(RANK(CORR(SUM(((LOW * 0.35) + (VWAP * 0.65)), 20), SUM(MEAN(VOLUME,40), 20), 7)) + RANK(CORR(RANK(VWAP), RANK(VOLUME), 6)))###\n", 1229 | " return (Rank(Corr(Sum(((self.low * 0.35) + (self.vwap * 0.65)), 20), Sum(Mean(self.volume,40), 20), 7)) + Rank(Corr(Rank(self.vwap), Rank(self.volume), 6)))\n", 1230 | " \n", 1231 | " def alpha_75(self): \n", 1232 | " ####COUNT(CLOSE>OPEN & BANCHMARKINDEXCLOSEself.open)&(self.benchmark_closeDELAY(CLOSE,1)?VOLUME:(CLOSE Delay(self.close,1))\n", 1277 | " cond2 = (self.close < Delay(self.close,1))\n", 1278 | " cond3 = (self.close == Delay(self.close,1)) \n", 1279 | " part = pd.Series(np.zeros(self.close.shape))\n", 1280 | " part[cond1] = self.volume\n", 1281 | " part[cond2] = 0\n", 1282 | " part[cond3] = -self.volume \n", 1283 | " return Sum(part, 20)\n", 1284 | " \n", 1285 | " def alpha_85(self): #1657\n", 1286 | " ####(TSRANK((VOLUME / MEAN(VOLUME,20)), 20) * TSRANK((-1 * DELTA(CLOSE, 7)), 8))###\n", 1287 | " return (Tsrank((self.volume / Mean(self.volume,20)), 20) * Tsrank((-1 * Delta(self.close, 7)), 8))\n", 1288 | " \n", 1289 | " def alpha_86(self): \n", 1290 | " ####((0.25 < (((DELAY(CLOSE, 20) - DELAY(CLOSE, 10)) / 10) - ((DELAY(CLOSE, 10) - CLOSE) / 10))) ? (-1 * 1) :(((((DELAY(CLOSE, 20) - DELAY(CLOSE, 10)) / 10) - ((DELAY(CLOSE, 10) - CLOSE) / 10)) < 0) ?1 : ((-1 * 1) *(CLOSE - DELAY(CLOSE, 1)))))\n", 1291 | " A = (((Delay(self.close, 20) - Delay(self.close, 10)) / 10) - ((Delay(self.close, 10) - self.close) / 10))\n", 1292 | " cond1 = (A > 0.25)\n", 1293 | " cond2 = (A < 0.0)\n", 1294 | " cond3 = ((0 <= A) & (A <= 0.25))\n", 1295 | " part = pd.Series(np.zeros(self.close.shape))\n", 1296 | " part[cond1] = -1\n", 1297 | " part[cond2] = 1\n", 1298 | " part[cond3] = -1*(self.close - Delay(self.close, 1))\n", 1299 | " return part\n", 1300 | "\n", 1301 | " def alpha_87(self): #1741\n", 1302 | " ####((RANK(DECAYLINEAR(DELTA(VWAP, 4), 7)) + TSRANK(DECAYLINEAR(((((LOW * 0.9) + (LOW * 0.1)) - VWAP) /(OPEN - ((HIGH + LOW) / 2))), 11), 7)) * -1)###\n", 1303 | " return ((Rank(Decaylinear(Delta(self.vwap, 4), 7)) + Tsrank(Decaylinear(((((self.low * 0.9) + (self.low * 0.1)) - self.vwap) /(self.open - ((self.high + self.low) / 2))), 11), 7)) * -1)\n", 1304 | " \n", 1305 | " def alpha_88(self): #1745\n", 1306 | " ####(CLOSE-DELAY(CLOSE,20))/DELAY(CLOSE,20)*100###\n", 1307 | " return (self.close-Delay(self.close,20))/Delay(self.close,20)*100\n", 1308 | " \n", 1309 | " def alpha_89(self): #1797\n", 1310 | " ####2*(SMA(CLOSE,13,2)-SMA(CLOSE,27,2)-SMA(SMA(CLOSE,13,2)-SMA(CLOSE,27,2),10,2))###\n", 1311 | " return 2*(Sma(self.close,13,2)-Sma(self.close,27,2)-Sma(Sma(self.close,13,2)-Sma(self.close,27,2),10,2))\n", 1312 | " \n", 1313 | " def alpha_90(self): #1745\n", 1314 | " ####(RANK(CORR(RANK(VWAP), RANK(VOLUME), 5)) * -1)###\n", 1315 | " return (Rank(Corr(Rank(self.vwap), Rank(self.volume), 5)) * -1)\n", 1316 | " \n", 1317 | " def alpha_91(self): #1745\n", 1318 | " ####((RANK((CLOSE - MAX(CLOSE, 5)))*RANK(CORR((MEAN(VOLUME,40)), LOW, 5))) * -1)###\n", 1319 | " return ((Rank((self.close - Max(self.close, 5)))*Rank(Corr((Mean(self.volume,40)), self.low, 5))) * -1)\n", 1320 | " \n", 1321 | " def alpha_92(self): #1786\n", 1322 | " ####(MAX(RANK(DECAYLINEAR(DELTA(((CLOSE * 0.35) + (VWAP *0.65)), 2), 3)),TSRANK(DECAYLINEAR(ABS(CORR((MEAN(VOLUME,180)), CLOSE, 13)), 5), 15)) * -1)###\n", 1323 | " return (Max2(Rank(Decaylinear(Delta(((self.close * 0.35) + (self.vwap *0.65)), 2), 3)),Tsrank(Decaylinear(Abs(Corr((Mean(self.volume,180)), self.close, 13)), 5), 15)) * -1)\n", 1324 | " \n", 1325 | " def alpha_93(self): \n", 1326 | " ####SUM((OPEN>=DELAY(OPEN,1)?0:MAX((OPEN-LOW),(OPEN-DELAY(OPEN,1)))),20)###\n", 1327 | " cond = (self.open >= Delay(self.open,1))\n", 1328 | " part = pd.Series(np.zeros(self.close.shape))\n", 1329 | " #part[cond] = 0\n", 1330 | " part[~cond] = Max2((self.open-self.low),(self.open-Delay(self.open,1)))\n", 1331 | " return Sum(part, 20)\n", 1332 | " \n", 1333 | " def alpha_94(self): \n", 1334 | " ####SUM((CLOSE>DELAY(CLOSE,1)?VOLUME:(CLOSE Delay(self.close,1))\n", 1336 | " cond2 = (self.close < Delay(self.close,1))\n", 1337 | " cond3 = (self.close == Delay(self.close,1))\n", 1338 | " part = pd.Series(np.zeros(self.close.shape))\n", 1339 | " part[cond1] = self.volume\n", 1340 | " part[cond2] = -1*self.volume\n", 1341 | " #part[cond3] = 0\n", 1342 | " return Sum(part, 30)\n", 1343 | " \n", 1344 | " def alpha_95(self): #1657\n", 1345 | " ####STD(AMOUNT,20)###\n", 1346 | " return Std(self.amount,20)\n", 1347 | " \n", 1348 | " def alpha_96(self): #1736\n", 1349 | " ####SMA(SMA((CLOSE-TSMIN(LOW,9))/(TSMAX(HIGH,9)-TSMIN(LOW,9))*100,3,1),3,1)###\n", 1350 | " return Sma(Sma((self.close-Tsmin(self.low,9))/(Tsmax(self.high,9)-Tsmin(self.low,9))*100,3,1),3,1)\n", 1351 | " \n", 1352 | " def alpha_97(self): #1729\n", 1353 | " ####STD(VOLUME,10)###\n", 1354 | " return Std(self.volume,10)\n", 1355 | " \n", 1356 | " def alpha_98(self): \n", 1357 | " ####((((DELTA((SUM(CLOSE, 100) / 100), 100) / DELAY(CLOSE, 100)) < 0.05) || ((DELTA((SUM(CLOSE, 100) / 100), 100) /DELAY(CLOSE, 100)) == 0.05)) ? (-1 * (CLOSE - TSMIN(CLOSE, 100))) : (-1 * DELTA(CLOSE, 3)))###\n", 1358 | " cond = (Delta(Sum(self.close,100)/100, 100)/Delay(self.close, 100) <= 0.05)\n", 1359 | " part = pd.Series(np.zeros(self.close.shape))\n", 1360 | " part[cond] = -1 * (self.close - Tsmin(self.close, 100))\n", 1361 | " part[~cond] = -1 * Delta(self.close, 3)\n", 1362 | " return part\n", 1363 | " \n", 1364 | " def alpha_99(self): #1766\n", 1365 | " ####(-1 * Rank(Cov(Rank(self.close), Rank(self.volume), 5)))###\n", 1366 | " return (-1 * Rank(Cov(Rank(self.close), Rank(self.volume), 5)))\n", 1367 | " \n", 1368 | " def alpha_100(self): #1657\n", 1369 | " ####Std(self.volume,20)###\n", 1370 | " return Std(self.volume,20)\n", 1371 | " \n", 1372 | " def alpha_101(self): \n", 1373 | " ###((RANK(CORR(CLOSE, SUM(MEAN(VOLUME,30), 37), 15)) < RANK(CORR(RANK(((HIGH * 0.1) + (VWAP * 0.9))),RANK(VOLUME), 11))) * -1)\n", 1374 | " rank1 = Rank(Corr(self.close, Sum(Mean(self.volume,30), 37), 15))\n", 1375 | " rank2 = Rank(Corr(Rank(((self.high * 0.1) + (self.vwap * 0.9))),Rank(self.volume), 11))\n", 1376 | " cond = (rank10? CLOSE-DELAY(CLOSE,1):0),12) - SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12))/(SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12) + SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12))*100 \n", 1424 | " cond = (self.close-Delay(self.close,1) > 0)\n", 1425 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1426 | " part1[cond] = self.close-Delay(self.close,1)\n", 1427 | " #part1[~cond] = 0\n", 1428 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1429 | " part2[~cond] = Abs(self.close-Delay(self.close,1))\n", 1430 | " #part2[cond] = 0\n", 1431 | " return (Sum(part1,12) - Sum(part2,12))/(Sum(part1,12) + Sum(part2,12))*100\n", 1432 | " \n", 1433 | " def alpha_113(self): #1587\n", 1434 | " ####(-1 * ((RANK((SUM(DELAY(CLOSE, 5), 20) / 20)) * CORR(CLOSE, VOLUME, 2)) * RANK(CORR(SUM(CLOSE, 5),SUM(CLOSE, 20), 2))))###\n", 1435 | " return (-1 * ((Rank((Sum(Delay(self.close, 5), 20) / 20)) * Corr(self.close, self.volume, 2)) * Rank(Corr(Sum(self.close, 5),Sum(self.close, 20), 2))))\n", 1436 | " \n", 1437 | " def alpha_114(self): #1751\n", 1438 | " ####((RANK(DELAY(((HIGH - LOW) / (SUM(CLOSE, 5) / 5)), 2)) * RANK(RANK(VOLUME))) / (((HIGH - LOW) /(SUM(CLOSE, 5) / 5)) / (VWAP - CLOSE)))###\n", 1439 | " return ((Rank(Delay(((self.high - self.low) / (Sum(self.close, 5) / 5)), 2)) * Rank(Rank(self.volume))) / (((self.high - self.low) /(Sum(self.close, 5) / 5)) / (self.vwap - self.close)))\n", 1440 | " \n", 1441 | " def alpha_115(self): #1527\n", 1442 | " ####(RANK(CORR(((HIGH * 0.9) + (CLOSE * 0.1)), MEAN(VOLUME,30), 10))^RANK(CORR(TSRANK(((HIGH + LOW) /2), 4), TSRANK(VOLUME, 10), 7)))###\n", 1443 | " return (Rank(Corr(((self.high * 0.9) + (self.close * 0.1)), Mean(self.volume,30), 10))**Rank(Corr(Tsrank(((self.high + self.low) /2), 4), Tsrank(self.volume, 10), 7)))\n", 1444 | " \n", 1445 | " def alpha_116(self): \n", 1446 | " ####REGBETA(CLOSE,SEQUENCE,20)###\n", 1447 | " return 0\n", 1448 | " \n", 1449 | " def alpha_117(self): #1786\n", 1450 | " ####((TSRANK(VOLUME, 32) * (1 - TSRANK(((CLOSE + HIGH) - LOW), 16))) * (1 - TSRANK(RET, 32)))###\n", 1451 | " return ((Tsrank(self.volume, 32) * (1 - Tsrank(((self.close + self.high) - self.low), 16))) * (1 - Tsrank(self.returns, 32)))\n", 1452 | " \n", 1453 | " def alpha_118(self): #1657\n", 1454 | " ####SUM(HIGH-OPEN,20)/SUM(OPEN-LOW,20)*100###\n", 1455 | " return Sum(self.high-self.open,20)/Sum(self.open-self.low,20)*100\n", 1456 | " \n", 1457 | " def alpha_119(self): #1626\n", 1458 | " ####(RANK(DECAYLINEAR(CORR(VWAP, SUM(MEAN(VOLUME,5), 26), 5), 7)) - RANK(DECAYLINEAR(TSRANK(MIN(CORR(RANK(OPEN), RANK(MEAN(VOLUME,15)), 21), 9), 7), 8)))###\n", 1459 | " return (Rank(Decaylinear(Corr(self.vwap, Sum(Mean(self.volume,5), 26), 5), 7)) - Rank(Decaylinear(Tsrank(Min(Corr(Rank(self.open), Rank(Mean(self.volume,15)), 21), 9), 7), 8)))\n", 1460 | " \n", 1461 | " def alpha_120(self): #1797\n", 1462 | " ####(RANK((VWAP - CLOSE)) / RANK((VWAP + CLOSE)))###\n", 1463 | " return (Rank((self.vwap - self.close)) / Rank((self.vwap + self.close)))\n", 1464 | " \n", 1465 | " def alpha_121(self): #972 数据量较少\n", 1466 | " ####((RANK((VWAP - MIN(VWAP, 12)))^TSRANK(CORR(TSRANK(VWAP, 20), TSRANK(MEAN(VOLUME,60), 2), 18), 3)) *-1)###\n", 1467 | " return ((Rank((self.vwap - Min(self.vwap, 12)))**Tsrank(Corr(Tsrank(self.vwap, 20), Tsrank(Mean(self.volume,60), 2), 18), 3)) *-1)\n", 1468 | " \n", 1469 | " def alpha_122(self): #1790\n", 1470 | " ####(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2)-DELAY(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2),1))/DELAY(SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2),1)###\n", 1471 | " return (Sma(Sma(Sma(Log(self.close),13,2),13,2),13,2)-Delay(Sma(Sma(Sma(Log(self.close),13,2),13,2),13,2),1))/Delay(Sma(Sma(Sma(Log(self.close),13,2),13,2),13,2),1)\n", 1472 | " \n", 1473 | " def alpha_123(self): \n", 1474 | " ####((RANK(CORR(SUM(((HIGH + LOW) / 2), 20), SUM(MEAN(VOLUME,60), 20), 9)) < RANK(CORR(LOW, VOLUME,6))) * -1)###\n", 1475 | " A = Rank(Corr(Sum(((self.high + self.low) / 2), 20), Sum(Mean(self.volume,60), 20), 9))\n", 1476 | " B = Rank(Corr(self.low, self.volume,6))\n", 1477 | " cond = (A < B)\n", 1478 | " part = pd.Series(np.zeros(self.close.shape))\n", 1479 | " part[cond] = -1\n", 1480 | " #part[~cond] = 0\n", 1481 | " return part\n", 1482 | " \n", 1483 | " def alpha_124(self): #1592\n", 1484 | " ####(CLOSE - VWAP) / DECAYLINEAR(RANK(TSMAX(CLOSE, 30)),2)###\n", 1485 | " return (self.close - self.vwap) / Decaylinear(Rank(Tsmax(self.close, 30)),2)\n", 1486 | " \n", 1487 | " def alpha_125(self): #1678\n", 1488 | " ####(RANK(DECAYLINEAR(CORR((VWAP), MEAN(VOLUME,80),17), 20)) / RANK(DECAYLINEAR(DELTA(((CLOSE * 0.5) + (VWAP * 0.5)), 3), 16)))###\n", 1489 | " return (Rank(Decaylinear(Corr((self.vwap), Mean(self.volume,80),17), 20)) / Rank(Decaylinear(Delta(((self.close * 0.5) + (self.vwap * 0.5)), 3), 16)))\n", 1490 | " \n", 1491 | " def alpha_126(self): #1797\n", 1492 | " ####(CLOSE+HIGH+LOW)/3###\n", 1493 | " return (self.close+self.high+self.low)/3\n", 1494 | " \n", 1495 | " def alpha_127(self): #公式有问题,我们假设mean周期为12\n", 1496 | " ####(MEAN((100*(CLOSE-MAX(CLOSE,12))/(MAX(CLOSE,12)))^2),12)^(1/2)###\n", 1497 | " return (Mean((100*(self.close-Max(self.close,12))/(Max(self.close,12)))**2,12))**(1/2)\n", 1498 | " \n", 1499 | " def alpha_128(self): \n", 1500 | " ####100-(100/(1+SUM(((HIGH+LOW+CLOSE)/3>DELAY((HIGH+LOW+CLOSE)/3,1)?(HIGH+LOW+CLOSE)/3*VOLUME:0),14)/SUM(((HIGH+LOW+CLOSE)/3 Delay(A,1)) \n", 1503 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1504 | " part1[cond] = A*self.volume\n", 1505 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1506 | " part2[~cond] = A*self.volume\n", 1507 | " return 100-(100/(1+Sum(part1,14)/Sum(part2,14)))\n", 1508 | "\n", 1509 | " def alpha_129(self): \n", 1510 | " ####SUM((CLOSE-DELAY(CLOSE,1)<0?ABS(CLOSE-DELAY(CLOSE,1)):0),12)###\n", 1511 | " cond = ((self.close-Delay(self.close,1)) < 0)\n", 1512 | " part = pd.Series(np.zeros(self.close.shape))\n", 1513 | " part[cond] = Abs(self.close-Delay(self.close,1))\n", 1514 | " #part[~cond] = 0\n", 1515 | " return Sum(part, 12)\n", 1516 | " \n", 1517 | " def alpha_130(self): #1657\n", 1518 | " ####(RANK(DECAYLINEAR(CORR(((HIGH + LOW) / 2), MEAN(VOLUME,40), 9), 10)) / RANK(DECAYLINEAR(CORR(RANK(VWAP), RANK(VOLUME), 7),3)))###\n", 1519 | " return (Rank(Decaylinear(Corr(((self.high + self.low) / 2), Mean(self.volume,40), 9), 10)) / Rank(Decaylinear(Corr(Rank(self.vwap), Rank(self.volume), 7),3)))\n", 1520 | " \n", 1521 | " def alpha_131(self): #1030 数据量较少\n", 1522 | " ####(RANK(DELAT(VWAP, 1))^TSRANK(CORR(CLOSE,MEAN(VOLUME,50), 18), 18))###\n", 1523 | " return (Rank(Delta(self.vwap, 1))**Tsrank(Corr(self.close,Mean(self.volume,50), 18), 18))\n", 1524 | " \n", 1525 | " def alpha_132(self): #1657\n", 1526 | " ####MEAN(AMOUNT,20)###\n", 1527 | " return Mean(self.amount,20)\n", 1528 | " \n", 1529 | " def alpha_133(self): \n", 1530 | " ####((20-HIGHDAY(HIGH,20))/20)*100-((20-LOWDAY(LOW,20))/20)*100###\n", 1531 | " return ((20-Highday(self.high,20))/20)*100-((20-Lowday(self.low,20))/20)*100\n", 1532 | " \n", 1533 | " def alpha_134(self): #1760\n", 1534 | " ####(CLOSE-DELAY(CLOSE,12))/DELAY(CLOSE,12)*VOLUME###\n", 1535 | " return (self.close-Delay(self.close,12))/Delay(self.close,12)*self.volume\n", 1536 | " \n", 1537 | " def alpha_135(self): #1744\n", 1538 | " ####SMA(DELAY(CLOSE/DELAY(CLOSE,20),1),20,1)###\n", 1539 | " return Sma(Delay(self.close/Delay(self.close,20),1),20,1)\n", 1540 | " \n", 1541 | " def alpha_136(self): #1729\n", 1542 | " ####((-1 * RANK(DELTA(RET, 3))) * CORR(OPEN, VOLUME, 10))###\n", 1543 | " return ((-1 * Rank(Delta(self.returns, 3))) * Corr(self.open, self.volume, 10))\n", 1544 | " \n", 1545 | " def alpha_137(self): \n", 1546 | " ####16*(CLOSE-DELAY(CLOSE,1)+(CLOSE-OPEN)/2+DELAY(CLOSE,1)-DELAY(OPEN,1))/((ABS(HIGH-DELAY(CLOSE,1))>ABS(LOW-DELAY(CLOSE,1)) & ABS(HIGH-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1))?ABS(HIGH-DELAY(CLOSE,1))+ABS(LOW-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:(ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(LOW,1)) & ABS(LOW-DELAY(CLOSE,1))>ABS(HIGH-DELAY(CLOSE,1))?ABS(LOW-DELAY(CLOSE,1))+ABS(HIGH-DELAY(CLOSE,1))/2+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4:ABS(HIGH-DELAY(LOW,1))+ABS(DELAY(CLOSE,1)-DELAY(OPEN,1))/4)))*MAX(ABS(HIGH-DELAY(CLOSE,1)),ABS(LOW-DELAY(CLOSE,1)))\n", 1547 | " A = Abs(self.high- Delay(self.close,1))\n", 1548 | " B = Abs(self.low - Delay(self.close,1))\n", 1549 | " C = Abs(self.high- Delay(self.low,1))\n", 1550 | " D = Abs(Delay(self.close,1)-Delay(self.open,1)) \n", 1551 | " cond1 = ((A>B) & (A>C))\n", 1552 | " cond2 = ((B>C) & (B>A))\n", 1553 | " cond3 = ((C>=A) & (C>=B)) \n", 1554 | " part0 = 16*(self.close + (self.close - self.open)/2 - Delay(self.open,1))\n", 1555 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1556 | " part1[cond1] = A + B/2 + D/4\n", 1557 | " part1[cond2] = B + A/2 + D/4\n", 1558 | " part1[cond3] = C + D/4 \n", 1559 | " return part0/part1*Max2(A,B)\n", 1560 | "\n", 1561 | " def alpha_138(self): #1448\n", 1562 | " ####((RANK(DECAYLINEAR(DELTA((((LOW * 0.7) + (VWAP *0.3))), 3), 20)) - TSRANK(DECAYLINEAR(TSRANK(CORR(TSRANK(LOW, 8), TSRANK(MEAN(VOLUME,60), 17), 5), 19), 16), 7)) * -1)###\n", 1563 | " return ((Rank(Decaylinear(Delta((((self.low * 0.7) + (self.vwap *0.3))), 3), 20)) - Tsrank(Decaylinear(Tsrank(Corr(Tsrank(self.low, 8), Tsrank(Mean(self.volume,60), 17), 5), 19), 16), 7)) * -1)\n", 1564 | " \n", 1565 | " def alpha_139(self): #1729\n", 1566 | " ####(-1 * CORR(OPEN, VOLUME, 10))###\n", 1567 | " return (-1 * Corr(self.open, self.volume, 10))\n", 1568 | " \n", 1569 | " def alpha_140(self): #1797\n", 1570 | " ####MIN(RANK(DECAYLINEAR(((RANK(OPEN) + RANK(LOW)) - (RANK(HIGH) + RANK(CLOSE))), 8)), TSRANK(DECAYLINEAR(CORR(TSRANK(CLOSE, 8), TSRANK(MEAN(VOLUME,60), 20), 8), 7), 3))###\n", 1571 | " return Min2(Rank(Decaylinear(((Rank(self.open) + Rank(self.low)) - (Rank(self.high) + Rank(self.close))), 8)), Tsrank(Decaylinear(Corr(Tsrank(self.close, 8), Tsrank(Mean(self.volume,60), 20), 8), 7), 3))\n", 1572 | " \n", 1573 | " def alpha_141(self): #1637\n", 1574 | " ####(RANK(CORR(RANK(HIGH), RANK(MEAN(VOLUME,15)), 9))* -1)###\n", 1575 | " return (Rank(Corr(Rank(self.high), Rank(Mean(self.volume,15)), 9))* -1)\n", 1576 | " \n", 1577 | " def alpha_142(self): #1657\n", 1578 | " ####(((-1 * RANK(TSRANK(CLOSE, 10))) * RANK(DELTA(DELTA(CLOSE, 1), 1))) * RANK(TSRANK((VOLUME/MEAN(VOLUME,20)), 5)))###\n", 1579 | " return (((-1 * Rank(Tsrank(self.close, 10))) * Rank(Delta(Delta(self.close, 1), 1))) * Rank(Tsrank((self.volume/Mean(self.volume,20)), 5)))\n", 1580 | " \n", 1581 | " def alpha_143(self): \n", 1582 | " ####CLOSE>DELAY(CLOSE,1)?(CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)*SELF:SELF###\n", 1583 | "\n", 1584 | " return 0\n", 1585 | " \n", 1586 | " def alpha_144(self): \n", 1587 | " ####SUMIF(ABS(CLOSE/DELAY(CLOSE,1)-1)/AMOUNT,20,CLOSEDELAY(CLOSE,1))?1/(CLOSE-DELAY(CLOSE,1)):1) - MIN( ((CLOSE>DELAY(CLOSE,1))?1/(CLOSE-DELAY(CLOSE,1)):1) ,12) )/(HIGH-LOW)*100,13,2)###\n", 1684 | " cond = (self.close>Delay(self.close,1))\n", 1685 | " part = pd.Series(np.zeros(self.close.shape))\n", 1686 | " part[cond] = 1/(self.close-Delay(self.close,1))\n", 1687 | " part[~cond] = 1\n", 1688 | " return Sma((part - Min(part,12))/(self.high-self.low)*100, 13, 2)\n", 1689 | " \n", 1690 | " def alpha_165(self): \n", 1691 | " ####MAX(SUMAC(CLOSE-MEAN(CLOSE,48)))-MIN(SUMAC(CLOSE-MEAN(CLOSE,48)))/STD(CLOSE,48)###\n", 1692 | " \n", 1693 | " return 0\n", 1694 | " \n", 1695 | " def alpha_166(self): #公式有问题\n", 1696 | " \n", 1697 | " return 0\n", 1698 | "\n", 1699 | " def alpha_167(self): \n", 1700 | " ####SUM((CLOSE-DELAY(CLOSE,1)>0?CLOSE-DELAY(CLOSE,1):0),12)###\n", 1701 | " cond = (self.close > Delay(self.close,1))\n", 1702 | " part = pd.Series(np.zeros(self.close.shape))\n", 1703 | " part[cond] = self.close-Delay(self.close,1)\n", 1704 | " #part[~cond] = 0\n", 1705 | " return Sum(part,12)\n", 1706 | " \n", 1707 | " def alpha_168(self): #1657\n", 1708 | " ####(-1*VOLUME/MEAN(VOLUME,20))###\n", 1709 | " return (-1*self.volume/Mean(self.volume,20))\n", 1710 | " \n", 1711 | " def alpha_169(self): #1610\n", 1712 | " ####SMA(MEAN(DELAY(SMA(CLOSE-DELAY(CLOSE,1),9,1),1),12)-MEAN(DELAY(SMA(CLOSE-DELAY(CLOSE,1),9,1),1),26),10,1)###\n", 1713 | " return Sma(Mean(Delay(Sma(self.close-Delay(self.close,1),9,1),1),12)-Mean(Delay(Sma(self.close-Delay(self.close,1),9,1),1),26),10,1)\n", 1714 | " \n", 1715 | " def alpha_170(self): #1657\n", 1716 | " ####((((RANK((1 / CLOSE)) * VOLUME) / MEAN(VOLUME,20)) * ((HIGH * RANK((HIGH - CLOSE))) / (SUM(HIGH, 5) /5))) - RANK((VWAP - DELAY(VWAP, 5))))###\n", 1717 | " return ((((Rank((1 / self.close)) * self.volume) / Mean(self.volume,20)) * ((self.high * Rank((self.high - self.close))) / (Sum(self.high, 5) /5))) - Rank((self.vwap - Delay(self.vwap, 5))))\n", 1718 | " \n", 1719 | " def alpha_171(self): #1789\n", 1720 | " ####((-1 * ((LOW - CLOSE) * (OPEN^5))) / ((CLOSE - HIGH) * (CLOSE^5)))###\n", 1721 | " return ((-1 * ((self.low - self.close) * (self.open**5))) / ((self.close - self.high) * (self.close**5)))\n", 1722 | " \n", 1723 | " def alpha_172(self): \n", 1724 | " ####MEAN(ABS(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 &HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6)\n", 1725 | " TR = Max2(Max2(self.high-self.low,Abs(self.high-Delay(self.close,1))),Abs(self.low-Delay(self.close,1)))\n", 1726 | " HD = self.high-Delay(self.high,1)\n", 1727 | " LD = Delay(self.low,1)-self.low\n", 1728 | " cond1 = ((LD>0) & (LD>HD))\n", 1729 | " cond2 = ((HD>0) & (HD>LD)) \n", 1730 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1731 | " part1[cond1] = LD\n", 1732 | " #part1[~cond1] = 0\n", 1733 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1734 | " part2[cond2] = HD\n", 1735 | " #part2[~cond2] = 0\n", 1736 | " return Mean(Abs(Sum(part1,14)*100/Sum(TR,14)-Sum(part2,14)*100/Sum(TR,14))/(Sum(part1,14)*100/Sum(TR,14)+Sum(part2,14)*100/Sum(TR,14))*100,6)\n", 1737 | " \n", 1738 | " def alpha_173(self): #1797\n", 1739 | " ####3*SMA(CLOSE,13,2)-2*SMA(SMA(CLOSE,13,2),13,2)+SMA(SMA(SMA(LOG(CLOSE),13,2),13,2),13,2)###\n", 1740 | " return 3*Sma(self.close,13,2)-2*Sma(Sma(self.close,13,2),13,2)+Sma(Sma(Sma(Log(self.close),13,2),13,2),13,2)\n", 1741 | " \n", 1742 | " def alpha_174(self): \n", 1743 | " ####SMA((CLOSE>DELAY(CLOSE,1)?STD(CLOSE,20):0),20,1)###\n", 1744 | " cond = (self.close>Delay(self.close,1))\n", 1745 | " part = pd.Series(np.zeros(self.close.shape))\n", 1746 | " part[cond] = Std(self.close,20)\n", 1747 | " #part[~cond] = 0\n", 1748 | " return Sma(part,20,1)\n", 1749 | " \n", 1750 | " def alpha_175(self): #1759\n", 1751 | " ####MEAN(MAX(MAX((HIGH-LOW),ABS(DELAY(CLOSE,1)-HIGH)),ABS(DELAY(CLOSE,1)-LOW)),6)###\n", 1752 | " return Mean(Max2(Max2((self.high-self.low),Abs(Delay(self.close,1)-self.high)),Abs(Delay(self.close,1)-self.low)),6)\n", 1753 | " \n", 1754 | " def alpha_176(self): #1678\n", 1755 | " ####CORR(RANK(((CLOSE - TSMIN(LOW, 12)) / (TSMAX(HIGH, 12) - TSMIN(LOW,12)))), RANK(VOLUME), 6)###\n", 1756 | " return Corr(Rank(((self.close - Tsmin(self.low, 12)) / (Tsmax(self.high, 12) - Tsmin(self.low,12)))), Rank(self.volume), 6)\n", 1757 | " \n", 1758 | " def alpha_177(self): \n", 1759 | " ####((20-HIGHDAY(HIGH,20))/20)*100###\n", 1760 | " return ((20-Highday(self.high,20))/20)*100\n", 1761 | " \n", 1762 | " def alpha_178(self): #1790\n", 1763 | " ####(CLOSE-DELAY(CLOSE,1))/DELAY(CLOSE,1)*VOLUME###\n", 1764 | " return (self.close-Delay(self.close,1))/Delay(self.close,1)*self.volume\n", 1765 | " \n", 1766 | " def alpha_179(self): #1421 数据量较少\n", 1767 | " ####(RANK(CORR(VWAP, VOLUME, 4)) *RANK(CORR(RANK(LOW), RANK(MEAN(VOLUME,50)), 12)))###\n", 1768 | " return (Rank(Corr(self.vwap, self.volume, 4)) *Rank(Corr(Rank(self.low), Rank(Mean(self.volume,50)), 12)))\n", 1769 | " \n", 1770 | " def alpha_180(self): #指标有问题\n", 1771 | " ####((MEAN(VOLUME,20) < VOLUME) ? ((-1 * TSRANK(ABS(DELTA(CLOSE, 7)), 60)) * SIGN(DELTA(CLOSE, 7)) : (-1 *VOLUME)))\n", 1772 | " cond = (Mean(self.volume,20) < self.volume)\n", 1773 | " part = pd.Series(np.zeros(self.close.shape))\n", 1774 | " part[cond] = (-1 * Tsrank(Abs(Delta(self.close, 7)), 60)) * Sign(Delta(self.close, 7)) \n", 1775 | " part[~cond] = -1 * self.volume\n", 1776 | " return part\n", 1777 | " \n", 1778 | " def alpha_181(self): #1532 公式有问题,假设后面的sum周期为20\n", 1779 | " ####SUM(((CLOSE/DELAY(CLOSE,1)-1)-MEAN((CLOSE/DELAY(CLOSE,1)-1),20))-(BANCHMARKINDEXCLOSE-MEAN(BANCHMARKINDEXCLOSE,20))^2,20)/SUM((BANCHMARKINDEXCLOSE-MEAN(BANCHMARKINDEXCLOSE,20))^3)###\n", 1780 | " return Sum(((self.close/Delay(self.close,1)-1)-Mean((self.close/Delay(self.close,1)-1),20))-(self.benchmark_close-Mean(self.benchmark_close,20))**2,20)/Sum(((self.benchmark_close-Mean(self.benchmark_close,20))**3),20)\n", 1781 | " \n", 1782 | " def alpha_182(self): \n", 1783 | " ####COUNT((CLOSE>OPEN & BANCHMARKINDEXCLOSE>BANCHMARKINDEXOPEN)OR(CLOSEself.open) & (self.benchmark_close>self.benchmark_open))\n", 1785 | " cond2 = ((self.close0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6)+DELAY(MEAN(ABS(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)-SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))/(SUM((LD>0 & LD>HD)?LD:0,14)*100/SUM(TR,14)+SUM((HD>0 & HD>LD)?HD:0,14)*100/SUM(TR,14))*100,6),6))/2\n", 1805 | " TR = Max2(Max2(self.high-self.low,Abs(self.high-Delay(self.close,1))),Abs(self.low-Delay(self.close,1)))\n", 1806 | " HD = self.high-Delay(self.high,1)\n", 1807 | " LD = Delay(self.low,1)-self.low\n", 1808 | " cond1 = ((LD>0) & (LD>HD))\n", 1809 | " cond2 = ((HD>0) & (HD>LD)) \n", 1810 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1811 | " part1[cond1] = LD\n", 1812 | " #part1[~cond1] = 0\n", 1813 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1814 | " part2[cond2] = HD\n", 1815 | " #part2[~cond2] = 0\n", 1816 | " return (Mean(Abs(Sum(part1,14)*100/Sum(TR,14)-Sum(part2,14)*100/Sum(TR,14))/(Sum(part1,14)*100/Sum(TR,14)+Sum(part2,14)*100/Sum(TR,14))*100,6)+Delay(Mean(Abs(Sum(part1,14)*100/Sum(TR,14)-Sum(part2,14)*100/Sum(TR,14))/(Sum(part1,14)*100/Sum(TR,14)+Sum(part2,14)*100/Sum(TR,14))*100,6),6))/2\n", 1817 | " \n", 1818 | " def alpha_187(self): \n", 1819 | " ####SUM((OPEN<=DELAY(OPEN,1)?0:MAX((HIGH-OPEN),(OPEN-DELAY(OPEN,1)))),20)###\n", 1820 | " cond = (self.open<=Delay(self.open,1))\n", 1821 | " part = pd.Series(np.zeros(self.close.shape))\n", 1822 | " part[cond] = 0\n", 1823 | " part[~cond] = Max2((self.high-self.open),(self.open-Delay(self.open,1)))\n", 1824 | " return Sum(part,20) \n", 1825 | " \n", 1826 | " def alpha_188(self): #1797\n", 1827 | " ####((HIGH-LOW–SMA(HIGH-LOW,11,2))/SMA(HIGH-LOW,11,2))*100###\n", 1828 | " return ((self.high-self.low-Sma(self.high-self.low,11,2))/Sma(self.high-self.low,11,2))*100\n", 1829 | " \n", 1830 | " def alpha_189(self): #1721\n", 1831 | " ####MEAN(ABS(CLOSE-MEAN(CLOSE,6)),6)###\n", 1832 | " return Mean(Abs(self.close-Mean(self.close,6)),6)\n", 1833 | " \n", 1834 | " def alpha_190(self): #公式有大问题,\n", 1835 | " ####LOG((COUNT( CLOSE/DELAY(CLOSE,1)>((CLOSE/DELAY(CLOSE,19))^(1/20)-1) ,20)-1)*(SUMIF((CLOSE/DELAY(CLOSE,1)-((CLOSE/DELAY(CLOSE,19))^(1/20)-1))^2,20,CLOSE/DELAY(CLOSE,1)<(CLOSE/DELAY(CLOSE,19))^(1/20)-1))/((COUNT((CLOSE/DELAY(CLOSE,1)<(CLOSE/DELAY(CLOSE,19))^(1/20)-1),20))*(SUMIF((CLOSE/DELAY(CLOSE,1)-((CLOSE/DELAY(CLOSE,19))^(1/20)-1))^2,20,CLOSE/DELAY(CLOSE,1)>(CLOSE/DELAY(CLOSE,19))^(1/20)-1))))\n", 1836 | " '''\n", 1837 | " cond = ((self.close/Delay(self.close,1)) > ((self.close/Delay(self.close,19))**(1/20)-1))\n", 1838 | " part1 = pd.Series(np.zeros(self.close.shape))\n", 1839 | " part1[cond] = 1 #COUNT\n", 1840 | " part2 = pd.Series(np.zeros(self.close.shape))\n", 1841 | " part2[~cond] = (self.close/Delay(self.close,1)-((self.close/Delay(self.close,19))**(1/20)-1))**2#SUMIF\n", 1842 | " part3 = pd.Series(np.zeros(self.close.shape))\n", 1843 | " part3[~cond] = 1 #COUNT\n", 1844 | " part4 = pd.Series(np.zeros(self.close.shape))\n", 1845 | " part4[cond] = (self.close/Delay(self.close,1)-((self.close/Delay(self.close,19))**(1/20)-1))**2#SUMIF\n", 1846 | " return Log((Count(part1,20))*Sumif(part2,20)/(Count(part3,20)*Sumif(part4,20)))'''\n", 1847 | " return 0\n", 1848 | " \n", 1849 | " def alpha_191(self): #1721\n", 1850 | " ####((CORR(MEAN(VOLUME,20), LOW, 5) + ((HIGH + LOW) / 2)) - CLOSE)###\n", 1851 | " return ((Corr(Mean(self.volume,20), self.low, 5) + ((self.high + self.low) / 2)) - self.close)\n", 1852 | " " 1853 | ] 1854 | }, 1855 | { 1856 | "cell_type": "markdown", 1857 | "id": "43acf14a-da2c-4260-bbe3-3ee46e62f532", 1858 | "metadata": { 1859 | "tags": [] 1860 | }, 1861 | "source": [ 1862 | "### **六、使用df[]方法计算第一批alpha(共计135个)**" 1863 | ] 1864 | }, 1865 | { 1866 | "cell_type": "markdown", 1867 | "id": "e0f02e92-4943-46a2-acd7-aae71e6f82c6", 1868 | "metadata": { 1869 | "tags": [] 1870 | }, 1871 | "source": [ 1872 | "#### **1、alpha的计算函数**" 1873 | ] 1874 | }, 1875 | { 1876 | "cell_type": "code", 1877 | "execution_count": 8, 1878 | "id": "b838fde4-7769-46ea-96ba-73c3f7dbb9ac", 1879 | "metadata": { 1880 | "tags": [] 1881 | }, 1882 | "outputs": [], 1883 | "source": [ 1884 | "def GET_alpha(df):\n", 1885 | " stock=Alphas(df)\n", 1886 | " df1 = df.copy()\n", 1887 | " df2 = df[['date']].copy()\n", 1888 | " df3 = df[['date']].copy()\n", 1889 | " df4 = df[['date']].copy()\n", 1890 | " df1['alpha_1']=stock.alpha_1() \n", 1891 | " df1['alpha_2']=stock.alpha_2()\n", 1892 | " #df1['alpha_3']=stock.alpha_3()\n", 1893 | " #df1['alpha_4']=stock.alpha_4()\n", 1894 | " df1['alpha_5']=stock.alpha_5()\n", 1895 | " df1['alpha_6']=stock.alpha_6()\n", 1896 | " df1['alpha_7']=stock.alpha_7()\n", 1897 | " df1['alpha_8']=stock.alpha_8()\n", 1898 | " df1['alpha_9']=stock.alpha_9()\n", 1899 | " #df1['alpha_10']=stock.alpha_10()\n", 1900 | " df1['alpha_11']=stock.alpha_11() \n", 1901 | " df1['alpha_12']=stock.alpha_12()\n", 1902 | " df1['alpha_13']=stock.alpha_13()\n", 1903 | " df1['alpha_14']=stock.alpha_14()\n", 1904 | " df1['alpha_15']=stock.alpha_15()\n", 1905 | " df1['alpha_16']=stock.alpha_16()\n", 1906 | " df1['alpha_17']=stock.alpha_17()\n", 1907 | " df1['alpha_18']=stock.alpha_18()\n", 1908 | " #df1['alpha_19']=stock.alpha_19()\n", 1909 | " df1['alpha_20']=stock.alpha_20()\n", 1910 | " #df1['alpha_21']=stock.alpha_21() \n", 1911 | " df1['alpha_22']=stock.alpha_22()\n", 1912 | " #df1['alpha_23']=stock.alpha_23()\n", 1913 | " df1['alpha_24']=stock.alpha_24()\n", 1914 | " df1['alpha_25']=stock.alpha_25()\n", 1915 | " df1['alpha_26']=stock.alpha_26()\n", 1916 | " #df1['alpha_27']=stock.alpha_27()\n", 1917 | " df1['alpha_28']=stock.alpha_28()\n", 1918 | " df1['alpha_29']=stock.alpha_29()\n", 1919 | " #df1['alpha_30']=stock.alpha_30()\n", 1920 | " df1['alpha_31']=stock.alpha_31() \n", 1921 | " df1['alpha_32']=stock.alpha_32()\n", 1922 | " df1['alpha_33']=stock.alpha_33()\n", 1923 | " df1['alpha_34']=stock.alpha_34()\n", 1924 | " df1['alpha_35']=stock.alpha_35()\n", 1925 | " df1['alpha_36']=stock.alpha_36()\n", 1926 | " df1['alpha_37']=stock.alpha_37()\n", 1927 | " #df1['alpha_38']=stock.alpha_38()\n", 1928 | " df1['alpha_39']=stock.alpha_39()\n", 1929 | " #df1['alpha_40']=stock.alpha_40()\n", 1930 | " df1['alpha_41']=stock.alpha_41() \n", 1931 | " df1['alpha_42']=stock.alpha_42()\n", 1932 | " #df1['alpha_43']=stock.alpha_43()\n", 1933 | " df1['alpha_44']=stock.alpha_44()\n", 1934 | " df1['alpha_45']=stock.alpha_45()\n", 1935 | " df1['alpha_46']=stock.alpha_46()\n", 1936 | " df1['alpha_47']=stock.alpha_47()\n", 1937 | " df1['alpha_48']=stock.alpha_48()\n", 1938 | " #df1['alpha_49']=stock.alpha_49()\n", 1939 | " #df1['alpha_50']=stock.alpha_50()\n", 1940 | " #df2['alpha_51']=stock.alpha_51() \n", 1941 | " df2['alpha_52']=stock.alpha_52()\n", 1942 | " #df2['alpha_53']=stock.alpha_53()\n", 1943 | " df2['alpha_54']=stock.alpha_54()\n", 1944 | " #df2['alpha_55']=stock.alpha_55()\n", 1945 | " #df2['alpha_56']=stock.alpha_56()\n", 1946 | " df2['alpha_57']=stock.alpha_57()\n", 1947 | " #df2['alpha_58']=stock.alpha_58()\n", 1948 | " #df2['alpha_59']=stock.alpha_59()\n", 1949 | " df2['alpha_60']=stock.alpha_60()\n", 1950 | " df2['alpha_61']=stock.alpha_61() \n", 1951 | " df2['alpha_62']=stock.alpha_62()\n", 1952 | " df2['alpha_63']=stock.alpha_63()\n", 1953 | " df2['alpha_64']=stock.alpha_64()\n", 1954 | " df2['alpha_65']=stock.alpha_65()\n", 1955 | " df2['alpha_66']=stock.alpha_66()\n", 1956 | " df2['alpha_67']=stock.alpha_67()\n", 1957 | " df2['alpha_68']=stock.alpha_68()\n", 1958 | " #df2['alpha_69']=stock.alpha_69()\n", 1959 | " df2['alpha_70']=stock.alpha_70()\n", 1960 | " df2['alpha_71']=stock.alpha_71() \n", 1961 | " df2['alpha_72']=stock.alpha_72()\n", 1962 | " df2['alpha_73']=stock.alpha_73()\n", 1963 | " df2['alpha_74']=stock.alpha_74()\n", 1964 | " #df2['alpha_75']=stock.alpha_75()\n", 1965 | " df2['alpha_76']=stock.alpha_76()\n", 1966 | " df2['alpha_77']=stock.alpha_77()\n", 1967 | " df2['alpha_78']=stock.alpha_78()\n", 1968 | " df2['alpha_79']=stock.alpha_79()\n", 1969 | " df2['alpha_80']=stock.alpha_80()\n", 1970 | " df2['alpha_81']=stock.alpha_81() \n", 1971 | " df2['alpha_82']=stock.alpha_82()\n", 1972 | " df2['alpha_83']=stock.alpha_83()\n", 1973 | " #df2['alpha_84']=stock.alpha_84()\n", 1974 | " df2['alpha_85']=stock.alpha_85()\n", 1975 | " #df2['alpha_86']=stock.alpha_86()\n", 1976 | " df2['alpha_87']=stock.alpha_87()\n", 1977 | " df2['alpha_88']=stock.alpha_88()\n", 1978 | " df2['alpha_89']=stock.alpha_89()\n", 1979 | " df2['alpha_90']=stock.alpha_90()\n", 1980 | " df2['alpha_91']=stock.alpha_91() \n", 1981 | " df2['alpha_92']=stock.alpha_92()\n", 1982 | " #df2['alpha_93']=stock.alpha_93()\n", 1983 | " #df2['alpha_94']=stock.alpha_94()\n", 1984 | " df2['alpha_95']=stock.alpha_95()\n", 1985 | " df2['alpha_96']=stock.alpha_96()\n", 1986 | " df2['alpha_97']=stock.alpha_97()\n", 1987 | " #df2['alpha_98']=stock.alpha_98()\n", 1988 | " df2['alpha_99']=stock.alpha_99()\n", 1989 | " df2['alpha_100']=stock.alpha_100()\n", 1990 | " #df3['alpha_101']=stock.alpha_101() \n", 1991 | " df3['alpha_102']=stock.alpha_102()\n", 1992 | " #df3['alpha_103']=stock.alpha_103()\n", 1993 | " df3['alpha_104']=stock.alpha_104()\n", 1994 | " df3['alpha_105']=stock.alpha_105()\n", 1995 | " df3['alpha_106']=stock.alpha_106()\n", 1996 | " df3['alpha_107']=stock.alpha_107()\n", 1997 | " df3['alpha_108']=stock.alpha_108()\n", 1998 | " df3['alpha_109']=stock.alpha_109()\n", 1999 | " df3['alpha_110']=stock.alpha_110()\n", 2000 | " df3['alpha_111']=stock.alpha_111() \n", 2001 | " #df3['alpha_112']=stock.alpha_112()\n", 2002 | " df3['alpha_113']=stock.alpha_113()\n", 2003 | " df3['alpha_114']=stock.alpha_114()\n", 2004 | " df3['alpha_115']=stock.alpha_115()\n", 2005 | " #df3['alpha_116']=stock.alpha_116()\n", 2006 | " df3['alpha_117']=stock.alpha_117()\n", 2007 | " df3['alpha_118']=stock.alpha_118()\n", 2008 | " df3['alpha_119']=stock.alpha_119()\n", 2009 | " df3['alpha_120']=stock.alpha_120()\n", 2010 | " df3['alpha_121']=stock.alpha_121() \n", 2011 | " df3['alpha_122']=stock.alpha_122()\n", 2012 | " #df3['alpha_123']=stock.alpha_123()\n", 2013 | " df3['alpha_124']=stock.alpha_124()\n", 2014 | " df3['alpha_125']=stock.alpha_125()\n", 2015 | " df3['alpha_126']=stock.alpha_126()\n", 2016 | " #df3['alpha_127']=stock.alpha_127()\n", 2017 | " #df3['alpha_128']=stock.alpha_128()\n", 2018 | " #df3['alpha_129']=stock.alpha_129()\n", 2019 | " df3['alpha_130']=stock.alpha_130()\n", 2020 | " df3['alpha_131']=stock.alpha_131() \n", 2021 | " df3['alpha_132']=stock.alpha_132()\n", 2022 | " #df3['alpha_133']=stock.alpha_133()\n", 2023 | " df3['alpha_134']=stock.alpha_134()\n", 2024 | " df3['alpha_135']=stock.alpha_135()\n", 2025 | " df3['alpha_136']=stock.alpha_136()\n", 2026 | " #df3['alpha_137']=stock.alpha_137()\n", 2027 | " df3['alpha_138']=stock.alpha_138()\n", 2028 | " df3['alpha_139']=stock.alpha_139()\n", 2029 | " df3['alpha_140']=stock.alpha_140()\n", 2030 | " df3['alpha_141']=stock.alpha_141() \n", 2031 | " df3['alpha_142']=stock.alpha_142()\n", 2032 | " #df3['alpha_143']=stock.alpha_143()\n", 2033 | " #df3['alpha_144']=stock.alpha_144()\n", 2034 | " df3['alpha_145']=stock.alpha_145()\n", 2035 | " df3['alpha_146']=stock.alpha_146()\n", 2036 | " #df3['alpha_147']=stock.alpha_147()\n", 2037 | " #df3['alpha_148']=stock.alpha_148()\n", 2038 | " #df3['alpha_149']=stock.alpha_149()\n", 2039 | " df3['alpha_150']=stock.alpha_150()\n", 2040 | " df4['alpha_151']=stock.alpha_151() \n", 2041 | " df4['alpha_152']=stock.alpha_152()\n", 2042 | " df4['alpha_153']=stock.alpha_153()\n", 2043 | " #df4['alpha_154']=stock.alpha_154()\n", 2044 | " df4['alpha_155']=stock.alpha_155()\n", 2045 | " df4['alpha_156']=stock.alpha_156()\n", 2046 | " df4['alpha_157']=stock.alpha_157()\n", 2047 | " df4['alpha_158']=stock.alpha_158()\n", 2048 | " df4['alpha_159']=stock.alpha_159()\n", 2049 | " #df4['alpha_160']=stock.alpha_160()\n", 2050 | " df4['alpha_161']=stock.alpha_161() \n", 2051 | " df4['alpha_162']=stock.alpha_162()\n", 2052 | " df4['alpha_163']=stock.alpha_163()\n", 2053 | " #df4['alpha_164']=stock.alpha_164()\n", 2054 | " #df4['alpha_165']=stock.alpha_165()\n", 2055 | " #df4['alpha_166']=stock.alpha_166()\n", 2056 | " #df4['alpha_167']=stock.alpha_167()\n", 2057 | " df4['alpha_168']=stock.alpha_168()\n", 2058 | " df4['alpha_169']=stock.alpha_169()\n", 2059 | " df4['alpha_170']=stock.alpha_170()\n", 2060 | " df4['alpha_171']=stock.alpha_171() \n", 2061 | " #df4['alpha_172']=stock.alpha_172()\n", 2062 | " df4['alpha_173']=stock.alpha_173()\n", 2063 | " #df4['alpha_174']=stock.alpha_174()\n", 2064 | " df4['alpha_175']=stock.alpha_175()\n", 2065 | " df4['alpha_176']=stock.alpha_176()\n", 2066 | " #df4['alpha_177']=stock.alpha_177()\n", 2067 | " df4['alpha_178']=stock.alpha_178()\n", 2068 | " df4['alpha_179']=stock.alpha_179()\n", 2069 | " #df4['alpha_180']=stock.alpha_180()\n", 2070 | " df4['alpha_181']=stock.alpha_181() \n", 2071 | " #df4['alpha_182']=stock.alpha_182()\n", 2072 | " #df4['alpha_183']=stock.alpha_183()\n", 2073 | " df4['alpha_184']=stock.alpha_184()\n", 2074 | " df4['alpha_185']=stock.alpha_185()\n", 2075 | " #df4['alpha_186']=stock.alpha_186()\n", 2076 | " #df4['alpha_187']=stock.alpha_187()\n", 2077 | " df4['alpha_188']=stock.alpha_188()\n", 2078 | " df4['alpha_189']=stock.alpha_189()\n", 2079 | " #df4['alpha_190']=stock.alpha_190()\n", 2080 | " df4['alpha_191']=stock.alpha_191() \n", 2081 | " df12 = pd.merge(df1, df2, on = ['date'])\n", 2082 | " df34 = pd.merge(df3, df4, on = ['date'])\n", 2083 | " df1234 = pd.merge(df12, df34, on = ['date'])\n", 2084 | " \n", 2085 | " return df1234" 2086 | ] 2087 | }, 2088 | { 2089 | "cell_type": "markdown", 2090 | "id": "57f26f80-f747-443a-80e7-4a254b4c6d61", 2091 | "metadata": { 2092 | "tags": [] 2093 | }, 2094 | "source": [ 2095 | "#### **2、分组运算alpha,并存入h5**" 2096 | ] 2097 | }, 2098 | { 2099 | "cell_type": "markdown", 2100 | "id": "cfa50d83-26f4-4723-8520-e2e3442a2636", 2101 | "metadata": { 2102 | "tags": [] 2103 | }, 2104 | "source": [ 2105 | "##### 2.1 按code分组运算" 2106 | ] 2107 | }, 2108 | { 2109 | "cell_type": "code", 2110 | "execution_count": null, 2111 | "id": "1fcb6703-4627-4e91-80d5-dd8ea2e8009c", 2112 | "metadata": { 2113 | "tags": [] 2114 | }, 2115 | "outputs": [], 2116 | "source": [ 2117 | "#计算第1组alpha\n", 2118 | "for i in stktrd_code1:\n", 2119 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2120 | ] 2121 | }, 2122 | { 2123 | "cell_type": "markdown", 2124 | "id": "a46f0b00-4d40-4662-9993-8721317b5f1c", 2125 | "metadata": { 2126 | "tags": [] 2127 | }, 2128 | "source": [ 2129 | "#计算第2组alpha\n", 2130 | "for i in stktrd_code2:\n", 2131 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2132 | ] 2133 | }, 2134 | { 2135 | "cell_type": "markdown", 2136 | "id": "afde1bbb-abc7-439c-8640-41d8201eb4ef", 2137 | "metadata": {}, 2138 | "source": [ 2139 | "#计算第3组alpha\n", 2140 | "for i in stktrd_code3:\n", 2141 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2142 | ] 2143 | }, 2144 | { 2145 | "cell_type": "markdown", 2146 | "id": "6b5c6b02-f4a7-4b4c-a6a0-edcaa7bd9f69", 2147 | "metadata": {}, 2148 | "source": [ 2149 | "#计算第4组alpha\n", 2150 | "for i in stktrd_code4:\n", 2151 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2152 | ] 2153 | }, 2154 | { 2155 | "cell_type": "markdown", 2156 | "id": "dc1de1a6-280b-4b69-9236-2d787c8ff463", 2157 | "metadata": {}, 2158 | "source": [ 2159 | "#计算第5组alpha\n", 2160 | "for i in stktrd_code5:\n", 2161 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2162 | ] 2163 | }, 2164 | { 2165 | "cell_type": "markdown", 2166 | "id": "cfd1ce3a-d0b3-4e5d-80b0-4c8cd8fd135a", 2167 | "metadata": {}, 2168 | "source": [ 2169 | "#计算第6组alpha\n", 2170 | "for i in stktrd_code6:\n", 2171 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2172 | ] 2173 | }, 2174 | { 2175 | "cell_type": "markdown", 2176 | "id": "35059429-0ea5-4fdf-b05d-55c260229841", 2177 | "metadata": {}, 2178 | "source": [ 2179 | "#计算第7组alpha\n", 2180 | "for i in stktrd_code7:\n", 2181 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2182 | ] 2183 | }, 2184 | { 2185 | "cell_type": "markdown", 2186 | "id": "fbed7fa9-1119-4acd-b9ce-47135c07e3e2", 2187 | "metadata": {}, 2188 | "source": [ 2189 | "#计算第8组alpha\n", 2190 | "for i in stktrd_code8:\n", 2191 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2192 | ] 2193 | }, 2194 | { 2195 | "cell_type": "markdown", 2196 | "id": "f3d50dcd-3eb9-4c09-b057-581ccdd0dce4", 2197 | "metadata": {}, 2198 | "source": [ 2199 | "#计算第9组alpha\n", 2200 | "for i in stktrd_code9:\n", 2201 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2202 | ] 2203 | }, 2204 | { 2205 | "cell_type": "raw", 2206 | "id": "ecd35e15-62e9-44b6-ae23-e979e47ee3c1", 2207 | "metadata": {}, 2208 | "source": [ 2209 | "#计算第10组alpha\n", 2210 | "for i in stktrd_code10:\n", 2211 | " locals()['GET_alpha'+str(i)] = GET_alpha(locals()[i])" 2212 | ] 2213 | }, 2214 | { 2215 | "cell_type": "markdown", 2216 | "id": "d4e789b4-57b4-42bb-8e86-e3bea337edef", 2217 | "metadata": { 2218 | "tags": [] 2219 | }, 2220 | "source": [ 2221 | "##### 2.2按code分组合并" 2222 | ] 2223 | }, 2224 | { 2225 | "cell_type": "markdown", 2226 | "id": "54240b5c-78e2-48d7-a286-9892ef86e506", 2227 | "metadata": { 2228 | "tags": [] 2229 | }, 2230 | "source": [ 2231 | "#合并第一组alpha,命名为alphas_group1\n", 2232 | "alphas_group1 = pd.DataFrame()\n", 2233 | "for i in stktrd_code1:\n", 2234 | " alphas_group1 = pd.concat([alphas_group1, locals()['GET_alpha'+str(i)]])\n", 2235 | "\n", 2236 | "#重置索引\n", 2237 | "alphas_group1.reset_index(inplace = True, drop = True)" 2238 | ] 2239 | }, 2240 | { 2241 | "cell_type": "markdown", 2242 | "id": "835fee24-e2f9-44ab-9f5a-83f8a502a79b", 2243 | "metadata": {}, 2244 | "source": [ 2245 | "#合并第二组alpha,命名为alphas_group2\n", 2246 | "alphas_group2 = pd.DataFrame()\n", 2247 | "for i in stktrd_code2:\n", 2248 | " alphas_group2 = pd.concat([alphas_group2, locals()['GET_alpha'+str(i)]])\n", 2249 | "\n", 2250 | "#重置索引\n", 2251 | "alphas_group2.reset_index(inplace = True, drop = True)" 2252 | ] 2253 | }, 2254 | { 2255 | "cell_type": "markdown", 2256 | "id": "40646d20-48e4-4cbc-9cdd-7254185bac46", 2257 | "metadata": {}, 2258 | "source": [ 2259 | "#合并第3组alpha,命名为alphas_group3\n", 2260 | "alphas_group3 = pd.DataFrame()\n", 2261 | "for i in stktrd_code3:\n", 2262 | " alphas_group3 = pd.concat([alphas_group3, locals()['GET_alpha'+str(i)]])\n", 2263 | "\n", 2264 | "#重置索引\n", 2265 | "alphas_group3.reset_index(inplace = True, drop = True)" 2266 | ] 2267 | }, 2268 | { 2269 | "cell_type": "markdown", 2270 | "id": "3bacad55-bc77-4a89-a42b-18cde53b5906", 2271 | "metadata": {}, 2272 | "source": [ 2273 | "#合并第4组alpha,命名为alphas_group4\n", 2274 | "alphas_group4 = pd.DataFrame()\n", 2275 | "for i in stktrd_code4:\n", 2276 | " alphas_group4 = pd.concat([alphas_group4, locals()['GET_alpha'+str(i)]])\n", 2277 | "\n", 2278 | "#重置索引\n", 2279 | "alphas_group4.reset_index(inplace = True, drop = True)" 2280 | ] 2281 | }, 2282 | { 2283 | "cell_type": "markdown", 2284 | "id": "309cd263-247b-4f83-b79e-fa08af0fe150", 2285 | "metadata": {}, 2286 | "source": [ 2287 | "#合并第5组alpha,命名为alphas_group5\n", 2288 | "alphas_group5 = pd.DataFrame()\n", 2289 | "for i in stktrd_code5:\n", 2290 | " alphas_group5 = pd.concat([alphas_group5, locals()['GET_alpha'+str(i)]])\n", 2291 | "\n", 2292 | "#重置索引\n", 2293 | "alphas_group5.reset_index(inplace = True, drop = True)" 2294 | ] 2295 | }, 2296 | { 2297 | "cell_type": "markdown", 2298 | "id": "0ae30319-98d8-4ec9-bdb9-7a5628c85c92", 2299 | "metadata": {}, 2300 | "source": [ 2301 | "#合并第6组alpha,命名为alphas_group6\n", 2302 | "alphas_group6 = pd.DataFrame()\n", 2303 | "for i in stktrd_code6:\n", 2304 | " alphas_group6 = pd.concat([alphas_group6, locals()['GET_alpha'+str(i)]])\n", 2305 | "\n", 2306 | "#重置索引\n", 2307 | "alphas_group6.reset_index(inplace = True, drop = True)" 2308 | ] 2309 | }, 2310 | { 2311 | "cell_type": "markdown", 2312 | "id": "a6a646e5-ae14-42de-a1af-7efb2e364a78", 2313 | "metadata": {}, 2314 | "source": [ 2315 | "#合并第7组alpha,命名为alphas_group7\n", 2316 | "alphas_group7 = pd.DataFrame()\n", 2317 | "for i in stktrd_code7:\n", 2318 | " alphas_group7 = pd.concat([alphas_group7, locals()['GET_alpha'+str(i)]])\n", 2319 | "\n", 2320 | "#重置索引\n", 2321 | "alphas_group7.reset_index(inplace = True, drop = True)" 2322 | ] 2323 | }, 2324 | { 2325 | "cell_type": "markdown", 2326 | "id": "6beee709-15d4-447e-862a-1a9e5dbe0766", 2327 | "metadata": {}, 2328 | "source": [ 2329 | "#合并第8组alpha,命名为alphas_group8\n", 2330 | "alphas_group8 = pd.DataFrame()\n", 2331 | "for i in stktrd_code8:\n", 2332 | " alphas_group8 = pd.concat([alphas_group8, locals()['GET_alpha'+str(i)]])\n", 2333 | "\n", 2334 | "#重置索引\n", 2335 | "alphas_group8.reset_index(inplace = True, drop = True)" 2336 | ] 2337 | }, 2338 | { 2339 | "cell_type": "markdown", 2340 | "id": "33f7d945-f1ff-4c20-8362-07c028ee8644", 2341 | "metadata": {}, 2342 | "source": [ 2343 | "#合并第9组alpha,命名为alphas_group9\n", 2344 | "alphas_group9 = pd.DataFrame()\n", 2345 | "for i in stktrd_code9:\n", 2346 | " alphas_group9 = pd.concat([alphas_group9, locals()['GET_alpha'+str(i)]])\n", 2347 | "\n", 2348 | "#重置索引\n", 2349 | "alphas_group9.reset_index(inplace = True, drop = True)" 2350 | ] 2351 | }, 2352 | { 2353 | "cell_type": "markdown", 2354 | "id": "90d0be95-d912-4841-bb03-ede9406959ae", 2355 | "metadata": {}, 2356 | "source": [ 2357 | "#合并第10组alpha,命名为alphas_group10\n", 2358 | "alphas_group10 = pd.DataFrame()\n", 2359 | "for i in stktrd_code10:\n", 2360 | " alphas_group10 = pd.concat([alphas_group10, locals()['GET_alpha'+str(i)]])\n", 2361 | "\n", 2362 | "#重置索引\n", 2363 | "alphas_group10.reset_index(inplace = True, drop = True)" 2364 | ] 2365 | }, 2366 | { 2367 | "cell_type": "markdown", 2368 | "id": "81cfcb2a-7395-47f5-a6ae-3af959d60a09", 2369 | "metadata": { 2370 | "tags": [] 2371 | }, 2372 | "source": [ 2373 | "##### 2.3写入h5" 2374 | ] 2375 | }, 2376 | { 2377 | "cell_type": "markdown", 2378 | "id": "4c89fd83-82ae-46e2-b7ee-d58967104ae5", 2379 | "metadata": {}, 2380 | "source": [ 2381 | "alphas_group1.to_hdf('alphas.h5',key='alphas_group1')" 2382 | ] 2383 | }, 2384 | { 2385 | "cell_type": "markdown", 2386 | "id": "b54a1e5b-7ab1-4a9d-b05b-ea7f90ebef51", 2387 | "metadata": {}, 2388 | "source": [ 2389 | "alphas_group2.to_hdf('alphas.h5',key='alphas_group2')" 2390 | ] 2391 | }, 2392 | { 2393 | "cell_type": "markdown", 2394 | "id": "474d80af-7ec7-4ba7-8686-24e8d3795066", 2395 | "metadata": {}, 2396 | "source": [ 2397 | "alphas_group3.to_hdf('alphas.h5',key='alphas_group3')" 2398 | ] 2399 | }, 2400 | { 2401 | "cell_type": "markdown", 2402 | "id": "3069c721-efd1-4ff4-b1f2-a2b00624affb", 2403 | "metadata": {}, 2404 | "source": [ 2405 | "alphas_group4.to_hdf('alphas.h5',key='alphas_group4')" 2406 | ] 2407 | }, 2408 | { 2409 | "cell_type": "markdown", 2410 | "id": "2265a1d2-7957-4c27-8cf1-eb7eac76feb4", 2411 | "metadata": {}, 2412 | "source": [ 2413 | "alphas_group5.to_hdf('alphas.h5',key='alphas_group5')" 2414 | ] 2415 | }, 2416 | { 2417 | "cell_type": "markdown", 2418 | "id": "25026398-491e-4271-b239-08191fabab9e", 2419 | "metadata": {}, 2420 | "source": [ 2421 | "alphas_group6.to_hdf('alphas.h5',key='alphas_group6')" 2422 | ] 2423 | }, 2424 | { 2425 | "cell_type": "markdown", 2426 | "id": "7f701c01-6740-4101-be9a-68627ccff818", 2427 | "metadata": { 2428 | "tags": [] 2429 | }, 2430 | "source": [ 2431 | "alphas_group7.to_hdf('alphas.h5',key='alphas_group7')" 2432 | ] 2433 | }, 2434 | { 2435 | "cell_type": "markdown", 2436 | "id": "083e14f7-a9a9-4992-9f87-76acb246f436", 2437 | "metadata": {}, 2438 | "source": [ 2439 | "alphas_group8.to_hdf('alphas.h5',key='alphas_group8')" 2440 | ] 2441 | }, 2442 | { 2443 | "cell_type": "markdown", 2444 | "id": "3d444748-f319-487a-af00-061c3f721e1b", 2445 | "metadata": {}, 2446 | "source": [ 2447 | "alphas_group9.to_hdf('alphas.h5',key='alphas_group9')" 2448 | ] 2449 | }, 2450 | { 2451 | "cell_type": "markdown", 2452 | "id": "c36ab14d-8bda-4893-8c66-c543cc048b98", 2453 | "metadata": {}, 2454 | "source": [ 2455 | "alphas_group10.to_hdf('alphas.h5',key='alphas_group10')" 2456 | ] 2457 | } 2458 | ], 2459 | "metadata": { 2460 | "kernelspec": { 2461 | "display_name": "Python 3 (ipykernel)", 2462 | "language": "python", 2463 | "name": "python3" 2464 | }, 2465 | "language_info": { 2466 | "codemirror_mode": { 2467 | "name": "ipython", 2468 | "version": 3 2469 | }, 2470 | "file_extension": ".py", 2471 | "mimetype": "text/x-python", 2472 | "name": "python", 2473 | "nbconvert_exporter": "python", 2474 | "pygments_lexer": "ipython3", 2475 | "version": "3.9.7" 2476 | } 2477 | }, 2478 | "nbformat": 4, 2479 | "nbformat_minor": 5 2480 | } 2481 | --------------------------------------------------------------------------------