├── 01-alpha因子编写 ├── 02-基于万矿计算因子数据 ├── 03-单因子IC测试 ├── 04-单因子分层测试 ├── 05-回测相关处理函数 ├── 06-有效因子回测 └── README.md /01-alpha因子编写: -------------------------------------------------------------------------------- 1 | from scipy.stats import rankdata 2 | from dateutil import parser 3 | import numpy as np 4 | import numpy.linalg as la 5 | import pandas as pd 6 | from datetime import datetime 7 | import scipy.stats as stats 8 | import matplotlib.pyplot as plt 9 | from WindPy import * # 万矿数据接口 10 | w.start() 11 | 12 | ## 内置函数 13 | # 行业中性化 14 | def neutral(data,ind): 15 | stocks = list(data.index) 16 | data_ = pd.DataFrame(ind.Data,index=['INDUSTRY'],columns=ind.Codes).T 17 | data_med = pd.get_dummies(data_,columns=['INDUSTRY']) 18 | X= np.array(data_med) 19 | y = data.values 20 | beta_ols = la.inv(X.T.dot(X)).dot(X.T).dot(y) 21 | residual = y - X.dot(beta_ols) 22 | return residual 23 | def IndNeutralize(vwap,ind): 24 | vwap_ = vwap.fillna(value = 0) 25 | ind = w.wss(stocks, "industry_citic","unit=1;tradeDate="+ind.Times[0].strftime("%Y%m%d")+";industryType=1") 26 | for i in range(len(vwap_)): 27 | vwap_.iloc[i] = neutral(vwap_.iloc[i],ind) 28 | return vwap_ 29 | # 移动求和 30 | def ts_sum(df, window): 31 | return df.rolling(window).sum() 32 | # 移动平均 33 | def sma(df, window): 34 | return df.rolling(window).mean() 35 | # 移动标准差 36 | def stddev(df, window): 37 | return df.rolling(window).std() 38 | # 移动相关系数 39 | def correlation(x, y, window): 40 | return x.rolling(window).corr(y) 41 | # 移动协方差 42 | def covariance(x, y, window): 43 | return x.rolling(window).cov(y) 44 | # 在过去d天的时序排名 45 | def rolling_rank(na): 46 | return rankdata(na)[-1] 47 | def ts_rank(df,window): 48 | return df.rolling(window).apply(rolling_rank) 49 | # 过去d天的时序乘积 50 | def rolling_prod(na): 51 | return np.prod(na) 52 | def product(df, window): 53 | return df.rolling(window).apply(rolling_prod) 54 | # 过去d天最小值 55 | def ts_min(df, window): 56 | return df.rolling(window).min() 57 | # 过去d天最大值 58 | def ts_max(df, window): 59 | return df.rolling(window).max() 60 | # 当天取值减去d天前的值 61 | def delta(df, period): 62 | return df.diff(period) 63 | # d天前的值,滞后值 64 | def delay(df, period): 65 | return df.shift(period) 66 | # 截面数据排序,输出boolean值 67 | def rank(df): 68 | return df.rank(pct=True,axis=1) 69 | # 缩放时间序列,使其和为1 70 | def scale(df, k=1): 71 | return df.mul(k).div(np.abs(df).sum()) 72 | # 过去d天最大值的位置 73 | def ts_argmax(df, window): 74 | return df.rolling(window).apply(np.argmax) + 1 75 | # 过去d天最小值的位置 76 | def ts_argmin(df, window): 77 | return df.rolling(window).apply(np.argmin) + 1 78 | # 线性衰减的移动平均加权 79 | def decay_linear(df, period): 80 | if df.isnull().values.any(): 81 | df.fillna(method='ffill', inplace=True) 82 | df.fillna(method='bfill', inplace=True) 83 | df.fillna(value=0, inplace=True) 84 | na_lwma = np.zeros_like(df) # 生成与df大小相同的零数组 85 | na_lwma[:period, :] = df.iloc[:period, :] # 赋前period项的值 86 | na_series = df.as_matrix() 87 | # 计算加权系数 88 | divisor = period * (period + 1) / 2 89 | y = (np.arange(period) + 1) * 1.0 / divisor 90 | # 从第period项开始计算数值 91 | for row in range(period - 1, df.shape[0]): 92 | x = na_series[row - period + 1: row + 1, :] 93 | na_lwma[row, :] = (np.dot(x.T, y)) 94 | return pd.DataFrame(na_lwma,index = df.index,columns = df.columns) 95 | 96 | ## 因子函数 97 | def alpha1(close,returns): 98 | x = close 99 | x[returns < 0] = stddev(returns,20) 100 | alpha = rank(ts_argmax(x ** 2, 5))-0.5 101 | return alpha.fillna(value = 0) 102 | def alpha2(Open,close,volume): 103 | r1 = rank(delta(np.log(volume), 2)) 104 | r2 = rank((close - Open) / Open) 105 | alpha = -1 * correlation(r1,r2,6) 106 | return alpha.fillna(value = 0) 107 | def alpha3(Open,volume): 108 | r1 = rank(Open) 109 | r2 = rank(volume) 110 | alpha = -1 * correlation(r1,r2,10) 111 | return alpha.replace([-np.inf, np.inf], 0).fillna(value = 0) 112 | def alpha4(low): 113 | r = rank(low) 114 | alpha = -1 * ts_rank(r,9) 115 | return alpha.fillna(value = 0) 116 | def alpha5(Open,vwap,close): 117 | alpha = (rank((Open - (ts_sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap))))) 118 | return alpha.fillna(value = 0) 119 | def alpha6(Open, volume): 120 | alpha = -1 * correlation(Open, volume, 10) 121 | return alpha.replace([-np.inf, np.inf], 0).fillna(value = 0) 122 | def alpha7(volume,close): 123 | adv20 = sma(volume, 20) 124 | alpha = -1 * ts_rank(abs(delta(close, 7)), 60) * np.sign(delta(close, 7)) 125 | alpha[adv20 >= volume] = -1 126 | return alpha.fillna(value = 0) 127 | def alpha8(Open,returns): 128 | x1 = (ts_sum(Open, 5) * ts_sum(returns, 5)) 129 | x2 = delay((ts_sum(Open, 5) * ts_sum(returns, 5)), 10) 130 | alpha = -1 * rank(x1-x2) 131 | return alpha.fillna(value = 0) 132 | def alpha9(close): 133 | delta_close = delta(close, 1) 134 | x1 = ts_min(delta_close, 5) > 0 135 | x2 = ts_max(delta_close, 5) < 0 136 | alpha = -1 * delta_close 137 | alpha[x1 | x2] = delta_close 138 | return alpha.fillna(value = 0) 139 | def alpha10(close): 140 | delta_close = delta(close, 1) 141 | x1 = ts_min(delta_close, 4) > 0 142 | x2 = ts_max(delta_close, 4) < 0 143 | x = -1 * delta_close 144 | x[x1 | x2] = delta_close 145 | alpha = rank(x) 146 | return alpha.fillna(value = 0) 147 | def alpha11(vwap,close,volume): 148 | x1 = rank(ts_max((vwap - close), 3)) 149 | x2 = rank(ts_min((vwap - close), 3)) 150 | x3 = rank(delta(volume, 3)) 151 | alpha = (x1 + x2) * x3 152 | return alpha.fillna(value = 0) 153 | def alpha12(volume,close): 154 | alpha = np.sign(delta(volume, 1)) * (-1 * delta(close, 1)) 155 | return alpha.fillna(value = 0) 156 | def alpha13(volume,close): 157 | alpha = -1 * rank(covariance(rank(close), rank(volume), 5)) 158 | return alpha.fillna(value = 0) 159 | def alpha14(Open,volume,returns): 160 | x1 = correlation(Open, volume, 10).replace([-np.inf, np.inf], 0).fillna(value=0) 161 | x2 = -1 * rank(delta(returns, 3)) 162 | alpha = x1 * x2 163 | return alpha.fillna(value = 0) 164 | def alpha15(high,volume): 165 | x1 = correlation(rank(high), rank(volume), 3).replace([-np.inf, np.inf], 0).fillna(value=0) 166 | alpha = -1 * ts_sum(rank(x1), 3) 167 | return alpha.fillna(value = 0) 168 | def alpha16(high,volume): 169 | alpha = -1 * rank(covariance(rank(high), rank(volume), 5)) 170 | return alpha.fillna(value = 0) 171 | def alpha17(volume,close): 172 | adv20 = sma(volume, 20) 173 | x1 = rank(ts_rank(close, 10)) 174 | x2 = rank(delta(delta(close, 1), 1)) 175 | x3 = rank(ts_rank((volume / adv20), 5)) 176 | alpha = -1 * (x1 * x2 * x3) 177 | return alpha.fillna(value = 0) 178 | def alpha18(close,Open): 179 | x = correlation(close, Open, 10).replace([-np.inf, np.inf], 0).fillna(value=0) 180 | alpha = -1 * (rank((stddev(abs((close - Open)), 5) + (close - Open)) + x)) 181 | return alpha.fillna(value = 0) 182 | def alpha19(close,returns): 183 | x1 = (-1 * np.sign((close - delay(close, 7)) + delta(close, 7))) 184 | x2 = (1 + rank(1 + ts_sum(returns, 250))) 185 | alpha = x1 * x2 186 | return alpha.fillna(value = 0) 187 | def alpha20(Open,high,close,low): 188 | alpha = -1 * (rank(Open - delay(high, 1)) * rank(Open - delay(close, 1)) * rank(Open - delay(low, 1))) 189 | return alpha.fillna(value = 0) 190 | def alpha21(volume,close): 191 | x1 = sma(close, 8) + stddev(close, 8) < sma(close, 2) 192 | x2 = sma(close, 8) - stddev(close, 8) > sma(close, 2) 193 | x3 = sma(volume, 20) / volume < 1 194 | alpha = pd.DataFrame(np.ones_like(close), index = close.index,columns = close.columns) 195 | alpha[x1 | x3] = -1 * alpha 196 | return alpha 197 | def alpha22(high,volume,close): 198 | x = correlation(high, volume, 5).replace([-np.inf, np.inf], 0).fillna(value=0) 199 | alpha = -1 * delta(x, 5) * rank(stddev(close, 20)) 200 | return alpha.fillna(value = 0) 201 | def alpha23(high,close): 202 | x = sma(high, 20) < high 203 | alpha = pd.DataFrame(np.zeros_like(close),index = close.index,columns = close.columns) 204 | alpha[x] = -1 * delta(high, 2).fillna(value = 0) 205 | return alpha 206 | def alpha24(close): 207 | x = delta(sma(close, 100), 100) / delay(close, 100) <= 0.05 208 | alpha = -1 * delta(close, 3) 209 | alpha[x] = -1 * (close - ts_min(close, 100)) 210 | return alpha.fillna(value = 0) 211 | def alpha25(volume,returns,vwap,high,close): 212 | adv20 = sma(volume, 20) 213 | alpha = rank((((-1 * returns) * adv20) * vwap) * (high - close)) 214 | return alpha.fillna(value = 0) 215 | def alpha26(volume,high): 216 | x = correlation(ts_rank(volume, 5), ts_rank(high, 5), 5).replace([-np.inf, np.inf], 0).fillna(value=0) 217 | alpha = -1 * ts_max(x, 3) 218 | return alpha.fillna(value = 0) 219 | def alpha27(volume,vwap): 220 | alpha = rank((sma(correlation(rank(volume), rank(vwap), 6), 2) / 2.0)) 221 | alpha[alpha > 0.5] = -1 222 | alpha[alpha <= 0.5] = 1 223 | return alpha.fillna(value = 0) 224 | def alpha28(volume,high,low,close): 225 | adv20 = sma(volume, 20) 226 | x = correlation(adv20, low, 5).replace([-np.inf, np.inf], 0).fillna(value=0) 227 | alpha = scale(((x + ((high + low) / 2)) - close)) 228 | return alpha.fillna(value = 0) 229 | def alpha29(close,returns): 230 | x1 = ts_min(rank(rank(scale(np.log(ts_sum(rank(rank(-1 * rank(delta((close - 1), 5)))), 2))))), 5) 231 | x2 = ts_rank(delay((-1 * returns), 6), 5) 232 | alpha = x1 + x2 233 | return alpha.fillna(value = 0) 234 | def alpha30(close,volume): 235 | delta_close = delta(close, 1) 236 | x = np.sign(delta_close) + np.sign(delay(delta_close, 1)) + np.sign(delay(delta_close, 2)) 237 | alpha = ((1.0 - rank(x)) * ts_sum(volume, 5)) / ts_sum(volume, 20) 238 | return alpha.fillna(value = 0) 239 | def alpha31(close,low,volume): 240 | adv20 = sma(volume,20) 241 | x1 = rank(rank(rank(decay_linear((-1 * rank(rank(delta(close, 10)))), 10)))) 242 | x2 = rank((-1 * delta(close, 3))) 243 | x3 = np.sign(scale(correlation(adv20, low, 12).replace([-np.inf, np.inf], 0).fillna(value=0))) 244 | alpha = x1 + x2 + x3 245 | return alpha.fillna(value = 0) 246 | def alpha32(close,vwap): 247 | x = correlation(vwap, delay(close, 5),230).replace([-np.inf, np.inf], 0).fillna(value=0) 248 | alpha = scale(((sma(close, 7)) - close)) + 20 * scale(x) 249 | return alpha.fillna(value = 0) 250 | def alpha33(Open,close): 251 | alpha = rank(-1 + (Open / close)) 252 | return alpha 253 | def alpha34(close,returns): 254 | x = (stddev(returns, 2) / stddev(returns, 5)).fillna(value = 0) 255 | alpha = rank(2 - rank(x) - rank(delta(close, 1))) 256 | return alpha.fillna(value = 0) 257 | def alpha35(volume,close,high,low,returns): 258 | x1 = ts_rank(volume, 32) 259 | x2 = 1 - ts_rank(close + high - low, 16) 260 | x3 = 1 - ts_rank(returns, 32) 261 | alpha = (x1 * x2 * x3).fillna(value = 0) 262 | return alpha 263 | def alpha36(Open,close,volume,returns,vwap): 264 | adv20 = sma(volume, 20) 265 | x1 = 2.21 * rank(correlation((close - Open), delay(volume, 1), 15)) 266 | x2 = 0.7 * rank((Open- close)) 267 | x3 = 0.73 * rank(ts_rank(delay((-1 * returns), 6), 5)) 268 | x4 = rank(abs(correlation(vwap,adv20, 6))) 269 | x5 = 0.6 * rank((sma(close, 200) - Open) * (close - Open)) 270 | alpha = x1 + x2 + x3 + x4 + x5 271 | return alpha.fillna(value = 0) 272 | def alpha37(Open,close): 273 | alpha = rank(correlation(delay(Open - close, 1), close, 200)) + rank(Open - close) 274 | return alpha.fillna(value = 0) 275 | def alpha38(close,Open): 276 | x = (close / Open).replace([-np.inf, np.inf], 0).fillna(value=0) 277 | alpha = -1 * rank(ts_rank(Open, 10)) * rank(x) 278 | return alpha.fillna(value = 0) 279 | def alpha39(volume,close,returns): 280 | adv20 = sma(volume, 20) 281 | x = -1 * rank(delta(close, 7)) * (1 - rank(decay_linear((volume / adv20), 9))) 282 | alpha = x *(1 + rank(ts_sum(returns, 250))) 283 | return alpha.fillna(value = 0) 284 | def alpha40(high,volume): 285 | alpha = -1 * rank(stddev(high, 10)) * correlation(high, volume, 10) 286 | return alpha.fillna(value = 0) 287 | def alpha41(high,low,vwap): 288 | alpha = pow((high * low),0.5) - vwap 289 | return alpha 290 | def alpha42(vwap,close): 291 | alpha = rank((vwap - close)) / rank((vwap + close)) 292 | return alpha 293 | def alpha43(volume,close): 294 | adv20 = sma(volume, 20) 295 | alpha = ts_rank(volume / adv20, 20) * ts_rank((-1 * delta(close, 7)), 8) 296 | return alpha.fillna(value = 0) 297 | def alpha44(high,volume): 298 | alpha = -1 *correlation(high, rank(volume), 5).replace([-np.inf, np.inf], 0).fillna(value=0) 299 | return alpha 300 | def alpha45(close,volume): 301 | x = correlation(close, volume, 2).replace([-np.inf, np.inf], 0).fillna(value=0) 302 | alpha = -1 * (rank(sma(delay(close, 5), 20)) * x * rank(correlation(ts_sum(close, 5), ts_sum(close, 20), 2))) 303 | return alpha.fillna(value = 0) 304 | def alpha46(close): 305 | x = ((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10) 306 | alpha = (-1 * (close - delay(close, 1))) 307 | alpha[x < 0] = 1 308 | alpha[x > 0.25] = -1 309 | return alpha.fillna(value = 0) 310 | def alpha47(volume,close,high,vwap): 311 | adv20 = sma(volume, 20) 312 | alpha = ((rank((1 / close)) * volume) / adv20) * ((high * rank((high - close))) / sma(high, 5)) - rank((vwap - delay(vwap, 5))) 313 | return alpha.fillna(value = 0) 314 | def alpha49(close): 315 | x = (((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) 316 | alpha = (-1 * delta(close,1)) 317 | alpha[x < -0.1] = 1 318 | return alpha.fillna(value = 0) 319 | def alpha50(volume,vwap): 320 | alpha = -1 * ts_max(rank(correlation(rank(volume), rank(vwap), 5)), 5) 321 | return alpha.fillna(value = 0) 322 | def alpha51(close): 323 | inner = (((delay(close, 20) - delay(close, 10)) / 10) - ((delay(close, 10) - close) / 10)) 324 | alpha = (-1 * delta(close,1)) 325 | alpha[inner < -0.05] = 1 326 | return alpha.fillna(value = 0) 327 | def alpha52(returns,volume,low): 328 | x = rank(((ts_sum(returns, 240) - ts_sum(returns, 20)) / 220)) 329 | alpha = -1 * delta(ts_min(low, 5), 5) * x * ts_rank(volume, 5) 330 | return alpha.fillna(value = 0) 331 | def alpha53(close,high,low): 332 | alpha = -1 * delta((((close - low) - (high - close)) / (close - low).replace(0, 0.0001)), 9) 333 | return alpha.fillna(value = 0) 334 | def alpha54(Open,close,high,low): 335 | x = (low - high).replace(0, -0.0001) 336 | alpha = -1 * (low - close) * (Open ** 5) / (x * (close ** 5)) 337 | return alpha 338 | def alpha55(high,low,close,volume): 339 | x = (close - ts_min(low, 12)) / (ts_max(high, 12) - ts_min(low, 12)).replace(0, 0.0001) 340 | alpha = -1 * correlation(rank(x), rank(volume), 6).replace([-np.inf, np.inf], 0).fillna(value=0) 341 | return alpha 342 | def alpha56(returns,cap): 343 | alpha = 0 - (1 * (rank((sma(returns, 10) / sma(sma(returns, 2), 3))) * rank((returns * cap)))) 344 | return alpha.fillna(value = 0) 345 | def alpha57(close,vwap): 346 | alpha = 0 - 1 * ((close - vwap) / decay_linear(rank(ts_argmax(close, 30)), 2)) 347 | return alpha.fillna(value = 0) 348 | def alpha60(close,high,low,volume): 349 | x = ((close - low) - (high - close)) * volume / (high - low).replace(0, 0.0001) 350 | alpha = - ((2 * scale(rank(x))) - scale(rank(ts_argmax(close, 10)))) 351 | return alpha.fillna(value = 0) 352 | def alpha61(volume,vwap): 353 | adv180 = sma(volume, 180) 354 | alpha = rank((vwap - ts_min(vwap, 16))) < rank(correlation(vwap, adv180, 18)) 355 | return alpha 356 | def alpha62(volume,high,low,Open,vwap): 357 | adv20 = sma(volume, 20) 358 | x1 = rank(correlation(vwap, ts_sum(adv20, 22), 10)) 359 | x2 = rank(((rank(Open) + rank(Open)) < (rank(((high + low) / 2)) + rank(high)))) 360 | alpha = x1 < x2 361 | return alpha*-1 362 | def alpha64(high,low,Open,volume,vwap): 363 | adv120 = sma(volume, 120) 364 | x1 = rank(correlation(ts_sum(((Open * 0.178404) + (low * (1 - 0.178404))), 13),ts_sum(adv120, 13), 17)) 365 | x2 = rank(delta(((((high + low) / 2) * 0.178404) + (vwap * (1 -0.178404))), 3.69741)) 366 | alpha = x1 < x2 367 | return alpha*-1 368 | def alpha65(volume,vwap,Open): 369 | adv60 = sma(volume, 60) 370 | x1 = rank(correlation(((Open * 0.00817205) + (vwap * (1 - 0.00817205))), ts_sum(adv60,9), 6)) 371 | x2 = rank((Open - ts_min(Open, 14))) 372 | alpha = x1 < x2 373 | return alpha*-1 374 | def alpha66(vwap,low,Open,high): 375 | x1 = rank(decay_linear(delta(vwap, 4), 7)) 376 | x2 = (((low* 0.96633) + (low * (1 - 0.96633))) - vwap) / (Open - ((high + low) / 2)) 377 | alpha = (x1 + ts_rank(decay_linear(x2, 11), 7)) * -1 378 | return alpha.fillna(value = 0) 379 | def alpha68(volume,high,close,low): 380 | adv15 = sma(volume, 15) 381 | x1 = ts_rank(correlation(rank(high), rank(adv15), 9), 14) 382 | x2 = rank(delta(((close * 0.518371) + (low * (1 - 0.518371))), 1.06157)) 383 | alpha = x1 < x2 384 | return alpha*-1 385 | def alpha71(volume,close,low,Open,vwap): 386 | adv180 = sma(volume, 180) 387 | x1 = ts_rank(decay_linear(correlation(ts_rank(close, 3), ts_rank(adv180,12), 18), 4), 16) 388 | x2 = ts_rank(decay_linear((rank(((low + Open) - (vwap + vwap))).pow(2)), 16), 4) 389 | alpha = x1 390 | alpha[x1 < x2] = x2 391 | return alpha.fillna(value = 0) 392 | def alpha72(volume,high,low,vwap): 393 | adv40 = sma(volume, 40) 394 | x1 = rank(decay_linear(correlation(((high + low) / 2), adv40, 9), 10)) 395 | x2 = rank(decay_linear(correlation(ts_rank(vwap, 4), ts_rank(volume, 19), 7), 3)) 396 | alpha = (x1 / x2.replace(0, 0.0001)).fillna(value = 0) 397 | return alpha 398 | def alpha73(vwap,Open,low): 399 | x1 = rank(decay_linear(delta(vwap, 5), 3)) 400 | x2 = delta(((Open * 0.147155) + (low * (1 - 0.147155))), 2) / ((Open *0.147155) + (low * (1 - 0.147155))) 401 | x3 = ts_rank(decay_linear((x2 * -1), 3), 17) 402 | alpha = x1 403 | alpha[x1 < x3] = x3 404 | return -1 * alpha.fillna(value = 0) 405 | def alpha74(volume,close,high,vwap): 406 | adv30 = sma(volume, 30) 407 | x1 = rank(correlation(close, ts_sum(adv30, 37), 15)) 408 | x2 = rank(correlation(rank(((high * 0.0261661) + (vwap * (1 - 0.0261661)))), rank(volume), 11)) 409 | alpha = x1 < x2 410 | return alpha*-1 411 | def alpha75(volume,vwap,low): 412 | adv50 = sma(volume, 50) 413 | alpha = rank(correlation(vwap, volume, 4)) < rank(correlation(rank(low), rank(adv50), 12)) 414 | return alpha 415 | def alpha77(volume,high,low,vwap): 416 | adv40 = sma(volume, 40) 417 | x1 = rank(decay_linear(((((high + low) / 2) + high) - (vwap + high)), 20)) 418 | x2 = rank(decay_linear(correlation(((high + low) / 2), adv40, 3), 6)) 419 | alpha = x1 420 | alpha[x1 > x2] = x2 421 | return alpha.fillna(value = 0) 422 | def alpha78(volume,low,vwap): 423 | adv40 = sma(volume, 40) 424 | x1 = rank(correlation(ts_sum(((low * 0.352233) + (vwap * (1 - 0.352233))), 20), ts_sum(adv40, 20),7)) 425 | x2 = rank(correlation(rank(vwap), rank(volume), 6)) 426 | alpha = x1.pow(x2) 427 | return alpha.fillna(value = 0) 428 | def alpha81(volume,vwap): 429 | adv10 = sma(volume, 10) 430 | x1 = rank(np.log(product(rank((rank(correlation(vwap, ts_sum(adv10, 50), 8)).pow(4))), 15))) 431 | x2 = rank(correlation(rank(vwap), rank(volume), 5)) 432 | alpha = x1 < x2 433 | return alpha*-1 434 | def alpha83(high,low,close,volume): 435 | x = rank(delay(((high - low) / (ts_sum(close, 5) / 5)), 2)) * rank(rank(volume)) 436 | alpha = x / (((high - low) / (ts_sum(close, 5) / 5)) / (vwap - close)) 437 | return alpha.fillna(value = 0) 438 | def alpha84(vwap,close): 439 | alpha = pow(ts_rank((vwap - ts_max(vwap, 15)), 21), delta(close,5)) 440 | return alpha.fillna(value = 0) 441 | def alpha85(volume,high,close,low): 442 | adv30 = sma(volume, 30) 443 | x1 = rank(correlation(((high * 0.876703) + (close * (1 - 0.876703))), adv30,10)) 444 | alpha = x1.pow(rank(correlation(ts_rank(((high + low) / 2), 4), ts_rank(volume, 10), 7))) 445 | return alpha.fillna(value = 0) 446 | def alpha86(volume,close,Open,vwap): 447 | adv20 = sma(volume, 20) 448 | x1 = ts_rank(correlation(close, sma(adv20, 15), 6), 20) 449 | x2 = rank(((Open+ close) - (vwap + Open))) 450 | alpha = x1 < x2 451 | return alpha*-1 452 | def alpha88(volume,Open,low,high,close): 453 | adv60 = sma(volume, 60) 454 | x1 = rank(decay_linear(((rank(Open) + rank(low)) - (rank(high) + rank(close))),8)) 455 | x2 = ts_rank(decay_linear(correlation(ts_rank(close, 8), ts_rank(adv60,21), 8), 7), 3) 456 | alpha = x1 457 | alpha[x1 > x2] = x2 458 | return alpha.fillna(value = 0) 459 | def alpha92(volume,high,low,close,Open): 460 | adv30 = sma(volume, 30) 461 | x1 = ts_rank(decay_linear(((((high + low) / 2) + close) < (low + Open)), 15), 19) 462 | x2 = ts_rank(decay_linear(correlation(rank(low), rank(adv30), 8), 7),7) 463 | alpha = x1 464 | alpha[x1 > x2] = x2 465 | return alpha.fillna(value = 0) 466 | def alpha94(volume,vwap): 467 | adv60 = sma(volume, 60) 468 | x = rank((vwap - ts_min(vwap, 12))) 469 | alpha = x.pow(ts_rank(correlation(ts_rank(vwap,20), ts_rank(adv60, 4), 18), 3))* -1 470 | return alpha.fillna(value = 0) 471 | def alpha95(volume,high,low,Open): 472 | adv40 = sma(volume, 40) 473 | x = ts_rank((rank(correlation(sma(((high + low)/ 2), 19), sma(adv40, 19), 13)).pow(5)), 12) 474 | alpha = rank((Open - ts_min(Open, 12))) < x 475 | return alpha.fillna(value = 0) 476 | def alpha96(volume,vwap,close): 477 | adv60 = sma(volume, 60) 478 | x1 = ts_rank(decay_linear(correlation(rank(vwap), rank(volume), 4),4), 8) 479 | x2 = ts_rank(decay_linear(ts_argmax(correlation(ts_rank(close, 7),ts_rank(adv60, 4), 4), 13), 14), 13) 480 | alpha = x1 481 | alpha[x1 < x2] = x2 482 | return alpha.fillna(value = 0) 483 | def alpha98(volume,Open,vwap): 484 | adv5 = sma(volume, 5) 485 | adv15 = sma(volume, 15) 486 | x1 = rank(decay_linear(correlation(vwap, sma(adv5, 26), 5), 7)) 487 | alpha = x1 - rank(decay_linear(ts_rank(ts_argmin(correlation(rank(Open), rank(adv15), 21), 9),7), 8)) 488 | return alpha.fillna(value = 0) 489 | def alpha99(volume,high,low): 490 | adv60 = sma(volume, 60) 491 | x1 = rank(correlation(ts_sum(((high + low) / 2), 20), ts_sum(adv60,20), 9)) 492 | x2 = rank(correlation(low, volume, 6)) 493 | alpha = x1 < x2 494 | return alpha*-1 495 | def alpha101(close,Open,high,low): 496 | alpha = (close - Open) / ((high - low) + 0.001) 497 | return alpha 498 | 499 | def alpha48(close,ind): 500 | r1 = (correlation(delta(close, 1), delta(delay(close, 1), 1), 250) * delta(close, 1)) / close 501 | r2 = ts_sum((pow((delta(close, 1) / delay(close, 1)),2)), 250) 502 | alpha = IndNeutralize(r1, ind) / r2 503 | return alpha.fillna(value = 0) 504 | 505 | def alpha58(vwap,volume,ind): 506 | x = IndNeutralize(vwap, ind) 507 | alpha = -1 * ts_rank(decay_linear(correlation(x, volume, 4), 8), 6) 508 | return alpha.fillna(value = 0) 509 | 510 | def alpha59(vwap,volume,ind): 511 | x = IndNeutralize(((vwap * 0.728317) + (vwap * (1 - 0.728317))), ind) 512 | alpha = -1 * ts_rank(decay_linear(correlation(x, volume, 4), 16), 8) 513 | return alpha.fillna(value = 0) 514 | 515 | def alpha63(volume,close,vwap,Open,ind): 516 | adv180 = sma(volume, 180).fillna(value = 0) 517 | r1 = rank(decay_linear(delta(IndNeutralize(close, ind), 2), 8)) 518 | r2 = rank(decay_linear(correlation(((vwap * 0.318108) + (Open * (1 - 0.318108))), ts_sum(adv180, 37), 14), 12)) 519 | alpha = -1 * (r1 - r2) 520 | return alpha.fillna(value = 0) 521 | 522 | def alpha67(volume,vwap,high,ind): 523 | adv20 = sma(volume, 20) 524 | r = rank(correlation(IndNeutralize(vwap, ind), IndNeutralize(adv20, ind), 6)) 525 | alpha = pow(rank(high - ts_min(high, 2)),r) * -1 526 | return alpha.fillna(value = 0) 527 | 528 | def alpha69(volume,vwap,ind,close): 529 | adv20 = sma(volume, 20) 530 | r1 = rank(ts_max(delta(IndNeutralize(vwap, ind), 3), 5)) 531 | r2 = ts_rank(correlation(((close * 0.490655) + (vwap * (1 - 0.490655))), adv20, 5), 9) 532 | alpha = pow(r1,r2) * -1 533 | return alpha.fillna(value = 0) 534 | 535 | def alpha70(close,ind,vwap): 536 | adv50 = sma(volume, 50).fillna(value = 0) 537 | r = ts_rank(correlation(IndNeutralize(close, ind), adv50, 18), 18) 538 | alpha = pow(rank(delta(vwap, 1)),r) * -1 539 | return alpha.fillna(value = 0) 540 | 541 | def alpha76(volume,vwap,low,ind): 542 | adv81 = sma(volume, 81).fillna(value = 0) 543 | r1 = rank(decay_linear(delta(vwap, 1), 12)) 544 | r2 = ts_rank(decay_linear(ts_rank(correlation(IndNeutralize(low, ind), adv81, 8), 20), 17), 19) 545 | alpha = r1 546 | alpha[r1 < r2] = r2 547 | return alpha.fillna(value = 0) 548 | 549 | def alpha79(volume,close,Open,ind,vwap): 550 | adv150 = sma(volume, 150).fillna(value = 0) 551 | r1 = rank(delta(IndNeutralize(((close * 0.60733) + (Open * (1 - 0.60733))), ind), 1)) 552 | r2 = rank(correlation(ts_rank(vwap, 4), ts_rank(adv150, 9), 15)) 553 | alpha = (r1 < r2) * -1 554 | return alpha.fillna(value = 0) 555 | 556 | def alpha80(Open,high,ind): 557 | adv10 = sma(volume, 10) 558 | r1 = rank(np.sign(delta(IndNeutralize(((Open * 0.868128) + (high * (1 - 0.868128))), ind), 4))) 559 | r2 = ts_rank(correlation(high, adv10, 5), 6) 560 | alpha = pow(r1,r2) * -1 561 | return alpha.fillna(value = 0) 562 | 563 | def alpha82(Open,volume,ind): 564 | r1 = rank(decay_linear(delta(Open, 1), 15)) 565 | r2 = ts_rank(decay_linear(correlation(IndNeutralize(volume, ind), ((Open * 0.634196) + (Open * (1 - 0.634196))), 17), 7), 13) 566 | alpha = r1 567 | alpha[r1 > r2] = r2 568 | return -1 * alpha.fillna(value = 0) 569 | 570 | def alpha87(volume,close,vwap): 571 | adv81 = sma(volume, 81).fillna(value = 0) 572 | r1 = rank(decay_linear(delta(((close * 0.369701) + (vwap * (1 - 0.369701))), 2), 3)) 573 | r2 = ts_rank(decay_linear(abs(correlation(IndNeutralize(adv81, ind), close, 13)), 5), 14) 574 | alpha = r1 575 | alpha[r1 < r2] = r2 576 | return -1 * alpha.fillna(value = 0) 577 | 578 | def alpha89(low,vwap,ind): 579 | adv10 = sma(volume, 10) 580 | r1 = ts_rank(decay_linear(correlation(((low * 0.967285) + (low * (1 - 0.967285))), adv10, 7), 6), 4) 581 | r2 = ts_rank(decay_linear(delta(IndNeutralize(vwap, ind), 3), 10), 15) 582 | alpha = r1 - r2 583 | return alpha.fillna(value = 0) 584 | 585 | def alpha90(volume,close,ind,low): 586 | adv40 = sma(volume, 40).fillna(value = 0) 587 | r1 = rank((close - ts_max(close, 5))) 588 | r2 = ts_rank(correlation(IndNeutralize(adv40, ind), low, 5), 3) 589 | alpha = pow(r1,r2) * -1 590 | return alpha.fillna(value = 0) 591 | 592 | def alpha91(close,ind,volume,vwap): 593 | adv30 = sma(volume, 30) 594 | r1 = ts_rank(decay_linear(decay_linear(correlation(IndNeutralize(close, ind), volume, 10), 16), 4), 5) 595 | r2 = rank(decay_linear(correlation(vwap, adv30, 4), 3)) 596 | alpha = (r1 - r2) * -1 597 | return alpha.fillna(value = 0) 598 | 599 | def alpha93(vwap,ind,volume,close): 600 | adv81 = sma(volume, 81).fillna(value = 0) 601 | r1 = ts_rank(decay_linear(correlation(IndNeutralize(vwap, ind), adv81, 17), 20), 8) 602 | r2 = rank(decay_linear(delta(((close * 0.524434) + (vwap * (1 - 0.524434))), 3), 16)) 603 | alpha = r1 / r2 604 | return alpha.fillna(value = 0) 605 | 606 | def alpha97(volume,low,vwap,ind): 607 | adv60 = sma(volume, 60).fillna(value = 0) 608 | r1 = rank(decay_linear(delta(IndNeutralize(((low * 0.721001) + (vwap * (1 - 0.721001))), ind), 3), 20)) 609 | r2 = ts_rank(decay_linear(ts_rank(correlation(ts_rank(low, 8), ts_rank(adv60, 17), 5), 19), 16), 7) 610 | alpha = (r1 - r2) * -1 611 | return alpha.fillna(value = 0) 612 | 613 | def alpha100(volume,close,low,high,ind): 614 | adv20 = sma(volume, 20) 615 | r1 = IndNeutralize(rank(((((close - low) - (high - close)) / (high - low)) * volume)), ind) 616 | r2 = 1.5 * scale(IndNeutralize(r1, ind)) 617 | r3 = scale(IndNeutralize((correlation(close, rank(adv20), 5) - rank(ts_argmin(close, 30))), ind)) 618 | alpha = -1 * (r2 - r3) * (volume / adv20) 619 | return alpha.fillna(value = 0) 620 | -------------------------------------------------------------------------------- /02-基于万矿计算因子数据: -------------------------------------------------------------------------------- 1 | from WindPy import * 2 | import numpy as np 3 | import pandas as pd 4 | from datetime import datetime 5 | import matplotlib.pyplot as plt 6 | w.start() 7 | 8 | ## 获取股票池 9 | s_date = '2014-01-01' 10 | e_date = '2016-01-01' 11 | date = w.tdays(s_date, e_date, "preiod = D").Data[0] # 日期函数 12 | stocks = w.wset("sectorconstituent", "date="+e_date+";windcode=000906.SH").Data[1] # 中证800股票池 13 | 14 | ## 获取日频量价原始数据 15 | close = w.wsd(stocks,'close',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 16 | # 改日期索引为数字,考虑后文rolling函数应用的便捷性 17 | returns = w.wsd(stocks,'pct_chg',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 18 | Open = w.wsd(stocks,'open',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 19 | low = w.wsd(stocks,'low',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 20 | vwap = w.wsd(stocks,'vwap',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 21 | high = w.wsd(stocks,'high',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 22 | cap = w.wsd(stocks,'mkt_cap_ashare',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 23 | volume = w.wsd(stocks,'volume',s_date,e_date,usedf = True)[1].reset_index().drop(columns = ['index']) 24 | ind = w.wss(stocks, "industry_citic","unit=1;tradeDate="+date[0].strftime("%Y%m%d")+";industryType=1") 25 | 26 | ## 计算因子值 27 | start_1 = datetime.now() # 记录计算用时 28 | 29 | alpha_1 = alpha1(close,returns) 30 | alpha_2 = alpha2(Open,close,volume) 31 | alpha_3 = alpha3(Open,volume) 32 | alpha_4 = alpha4(low) 33 | alpha_5 = alpha5(Open,vwap,close) 34 | alpha_6 = alpha6(Open, volume) 35 | alpha_7 = alpha7(volume,close) 36 | alpha_8 = alpha8(Open,returns) 37 | alpha_9 = alpha9(close) 38 | alpha_10 = alpha10(close) 39 | alpha_11 = alpha11(vwap,close,volume) 40 | alpha_12 = alpha12(volume,close) 41 | alpha_13 = alpha13(volume,close) 42 | alpha_14 = alpha14(Open,volume,returns) 43 | alpha_15 = alpha15(high,volume) 44 | alpha_16 = alpha16(high,volume) 45 | alpha_17 = alpha17(volume,close) 46 | alpha_18 = alpha18(close,Open) 47 | alpha_19 = alpha19(close,returns) 48 | alpha_20 = alpha20(Open,high,close,low) 49 | alpha_21 = alpha21(volume,close) 50 | alpha_22 = alpha22(high,volume,close) 51 | alpha_23 = alpha23(high,close) 52 | alpha_24 = alpha24(close) 53 | alpha_25 = alpha25(volume,returns,vwap,high,close) 54 | alpha_26 = alpha26(volume,high) 55 | alpha_27 = alpha27(volume,vwap) 56 | alpha_28 = alpha28(volume,high,low,close) 57 | alpha_29 = alpha29(close,returns) 58 | alpha_30 = alpha30(close,volume) 59 | alpha_31 = alpha31(close,low,volume) 60 | alpha_32 = alpha32(close,vwap) 61 | alpha_33 = alpha33(Open,close) 62 | alpha_34 = alpha34(close,returns) 63 | alpha_35 = alpha35(volume,close,high,low,returns) 64 | alpha_36 = alpha36(Open,close,volume,returns,vwap) 65 | alpha_37 = alpha37(Open,close) 66 | alpha_38 = alpha38(close,Open) 67 | alpha_39 = alpha39(volume,close,returns) 68 | alpha_40 = alpha40(high,volume) 69 | alpha_41 = alpha41(high,low,vwap) 70 | alpha_42 = alpha42(vwap,close) 71 | alpha_43 = alpha43(volume,close) 72 | alpha_44 = alpha44(high,volume) 73 | alpha_45 = alpha45(close,volume) 74 | alpha_46 = alpha46(close) 75 | alpha_47 = alpha47(volume,close,high,vwap) 76 | alpha_49 = alpha49(close) 77 | alpha_50 = alpha50(volume,vwap) 78 | alpha_51 = alpha51(close) 79 | alpha_52 = alpha52(returns,volume,low) 80 | alpha_53 = alpha53(close,high,low) 81 | alpha_54 = alpha54(Open,close,high,low) 82 | alpha_55 = alpha55(high,low,close,volume) 83 | alpha_56 = alpha56(returns,cap) 84 | alpha_57 = alpha57(close,vwap) 85 | alpha_60 = alpha60(close,high,low,volume) 86 | alpha_61 = alpha61(volume,vwap) 87 | alpha_62 = alpha62(volume,high,low,Open,vwap) 88 | alpha_64 = alpha64(high,low,Open,volume,vwap) 89 | alpha_65 = alpha65(volume,vwap,Open) 90 | alpha_66 = alpha66(vwap,low,Open,high) 91 | alpha_68 = alpha41(high,low,vwap) 92 | alpha_71 = alpha71(volume,close,low,Open,vwap) 93 | alpha_72 = alpha72(volume,high,low,vwap) 94 | alpha_73 = alpha73(vwap,Open,low) 95 | alpha_74 = alpha74(volume,close,high,vwap) 96 | alpha_75 = alpha75(volume,vwap,low) 97 | alpha_77 = alpha77(volume,high,low,vwap) 98 | alpha_78 = alpha78(volume,low,vwap) 99 | alpha_81 = alpha81(volume,vwap) 100 | alpha_83 = alpha83(high,low,close,volume) 101 | alpha_84 = alpha84(vwap,close) 102 | alpha_85 = alpha85(volume,high,close,low) 103 | alpha_86 = alpha41(high,low,vwap) 104 | alpha_88 = alpha88(volume,Open,low,high,close) 105 | alpha_92 = alpha92(volume,high,low,close,Open) 106 | alpha_94 = alpha94(volume,vwap) 107 | alpha_95 = alpha95(volume,high,low,Open) 108 | alpha_96 = alpha96(volume,vwap,close) 109 | alpha_98 = alpha98(volume,Open,vwap) 110 | alpha_99 = alpha99(volume,high,low) 111 | alpha_100 = alpha41(high,low,vwap) 112 | alpha_101 = alpha101(close,Open,high,low) 113 | 114 | alpha_48 = alpha48(close,ind) # 这些因子涉及行业中性化,计算时间较长 115 | alpha_58 = alpha58(vwap,volume,ind) 116 | alpha_59 = alpha59(vwap,volume,ind) 117 | alpha_63 = alpha63(volume,close,vwap,Open,ind) 118 | alpha_67 = alpha67(volume,vwap,high,ind) 119 | alpha_69 = alpha69(volume,vwap,ind,close) 120 | alpha_70 = alpha70(close,ind,vwap) 121 | alpha_76 = alpha76(volume,vwap,low,ind) 122 | alpha_79 = alpha79(volume,close,Open,ind,vwap) 123 | alpha_80 = alpha80(Open,high,ind) 124 | alpha_82 = alpha82(Open,volume,ind) 125 | alpha_87 = alpha87(volume,close,vwap) 126 | alpha_89 = alpha89(low,vwap,ind) 127 | alpha_90 = alpha90(volume,close,ind,low) 128 | alpha_91 = alpha91(close,ind,volume,vwap) 129 | alpha_93 = alpha93(vwap,ind,volume,close) 130 | alpha_97 = alpha97(volume,low,vwap,ind) 131 | alpha_100 = alpha100(volume,close,low,high,ind) 132 | 133 | # 因子放入列表 134 | data_alpha = [alpha_1,alpha_2,alpha_3,alpha_4,alpha_5,alpha_6,alpha_7,alpha_8,alpha_9,alpha_10, 135 | alpha_11,alpha_12,alpha_13,alpha_14,alpha_15,alpha_16,alpha_17,alpha_18,alpha_19,alpha_20, 136 | alpha_21,alpha_22,alpha_23,alpha_24,alpha_25,alpha_26,alpha_27,alpha_28,alpha_29,alpha_30, 137 | alpha_31,alpha_32,alpha_33,alpha_34,alpha_35,alpha_36,alpha_37,alpha_38,alpha_39,alpha_40, 138 | alpha_41,alpha_42,alpha_43,alpha_44,alpha_45,alpha_46,alpha_47,alpha_48,alpha_49,alpha_50, 139 | alpha_51,alpha_52,alpha_53,alpha_54,alpha_55,alpha_56,alpha_57,alpha_58,alpha_59,alpha_60, 140 | alpha_61,alpha_62,alpha_63,alpha_64,alpha_65,alpha_66,alpha_67,alpha_68,alpha_69,alpha_70, 141 | alpha_71,alpha_72,alpha_73,alpha_74,alpha_75,alpha_76,alpha_77,alpha_78,alpha_79,alpha_80, 142 | alpha_81,alpha_82,alpha_83,alpha_84,alpha_85,alpha_86,alpha_87,alpha_88,alpha_89,alpha_90, 143 | alpha_91,alpha_92,alpha_93,alpha_94,alpha_95,alpha_96,alpha_97,alpha_98,alpha_99,alpha_100,alpha_101] 144 | 145 | # 由于一些因子应用前几个月的数据进行计算,因此初始时期因子为错误值,所以取两年保留一年 146 | for i in range(len(data_alpha)): 147 | data_alpha[i] = data_alpha[i].iloc[245:] # 243,245,244,244,244/这些数字为某年交易日数量 148 | df_101 = pd.concat(data_alpha) # 合并各因子的数据 149 | df_101.to_csv('data/alpha.csv') # 储存数据至csv文件 150 | 151 | ## 提取数据并转换数据结构 152 | # 因子数据转化为时间+股票的双重索引格式 153 | def zhuanhuan(alpha_1,date,stocks,columns = ['alpha']): 154 | alpha_year = alpha_1.fillna(value = 0) 155 | alpha_year.set_index(date,inplace = True) 156 | index = pd.MultiIndex.from_product([date,stocks],names = ['date', 'codes']) 157 | df = pd.DataFrame(alpha_year.stack(),columns = columns) 158 | alpha = pd.DataFrame(df.values,columns = columns,index = index) 159 | return alpha 160 | 161 | date_list = ['2014-01-01','2015-01-01','2016-01-01','2017-01-01','2018-01-01','2019-05-30'] # 测试区间,每年更换一次股票池 162 | data_alpha = [] 163 | div = 101 164 | # 从csv文件取出因子数据存进列表 165 | for i in range(5): 166 | data_ = pd.read_csv('data/data_alpha_re'+str(i+1)+'.csv') 167 | data_.drop(data_.columns[0], axis=1, inplace=True) 168 | data_alpha.append(data_) 169 | # 将之前合并的因子进行分割,并转换数据结构为双重索引,存进二维列表 170 | for i in range(5): 171 | data = [] 172 | df = data_alpha[i] 173 | n = int(len(df)/div) 174 | date = w.wsd('000001.SZ','close',date_list[i],date_list[i+1],period = 'D',usedf = True)[1].index 175 | stocks = w.wset("sectorconstituent", "date="+date_list[i+1]+";windcode=000906.SH").Data[1] 176 | for j in range(div): 177 | d = zhuanhuan(df.iloc[n*j:n*(j+1)],date,stocks,columns = ['alpha'+str(j+1)]) 178 | data.append(d) 179 | data_alpha[i] = data 180 | # 将各因子不同时期的数据进行合并,得到一维列表data存放各因子的双重索引可用数据 181 | data = [] 182 | for j in range(div): 183 | d = pd.concat([data_alpha[0][j],data_alpha[1][j],data_alpha[2][j],data_alpha[3][j],data_alpha[4][j]]) 184 | data.append(d) 185 | -------------------------------------------------------------------------------- /03-单因子IC测试: -------------------------------------------------------------------------------- 1 | ## 股票池筛选,可交易,非新股,非PT,ST,涨跌停的股票 2 | def get_stocks(trDate): 3 | trDate = trDate.strftime('%Y-%m-%d') 4 | stocks_800 = w.wset("sectorconstituent", "date="+trDate+";windcode=000906.SH").Data[1] 5 | status = w.wss(stocks_800, "trade_status,maxupordown,riskwarning,ipo_date", tradeDate=trDate, usedf=True)[1] 6 | date_least=w.tdaysoffset(-6,trDate,'Period=M').Data[0][0] 7 | trade_codes=list(status[(status['TRADE_STATUS']=='交易')&(status['IPO_DATE']<=date_least)&(status['MAXUPORDOWN']==0)&(status['RISKWARNING']=='否')].index) 8 | return trade_codes 9 | trade_d = w.tdays("2014-01-02", "2019-05-30", Period='M',usedf=True).Data[0] 10 | stock_valid = [] 11 | for i in range(len(trade_d)): 12 | stock_valid.append(get_stocks(trade_d[i])) 13 | stock_valid_df = pd.DataFrame(stock_valid,index = trade_d) 14 | 15 | ## 获取下期收益数据 16 | Period = 'M' 17 | # 时间列表设置避免取到重复数据 18 | date_list = ['2014-01-02','2015-01-09','2016-01-08','2017-01-06','2018-01-05','2019-05-30'] 19 | date_listd = ['2015-01-08','2016-01-07','2017-01-05','2018-01-04','2019-04-30'] 20 | date_listw = ['2015-01-02','2016-01-01','2016-12-30','2017-12-29','2019-05-30'] 21 | date_listm = ['2015-01-01','2016-01-01','2017-01-01','2018-01-01','2019-04-30'] 22 | next_ret = [] 23 | bench_date = [] # 之后分层测试可视化需要用到 24 | for i in range(5): 25 | date = w.wsd('000001.SZ','close',date_list[i],date_listm[i],period = Period,usedf = True)[1].index 26 | stocks = w.wset("sectorconstituent", "date="+date_listm[i]+";windcode=000906.SH").Data[1] 27 | d1 = w.tdaysoffset(1, date[0], Period = Period,usedf=True).Data[0][0].strftime('%Y-%m-%d') # 时间往后推一个周期,并且格式转化为字符串 28 | d2 = w.tdaysoffset(1, date[-1],Period = Period, usedf=True).Data[0][0].strftime('%Y-%m-%d') 29 | bench_date.append(d1) 30 | bench_date.append(d2) 31 | next_ret_ = w.wsd(stocks, "pct_chg", d1, d2, usedf=True, Period = Period)[1].fillna(value = 0) 32 | f = lambda x: x/100 # 万矿收益率数据单位为100%,这里换算成小数 33 | next_ret_ = next_ret_.applymap(f) 34 | next_ret.append(zhuanhuan(next_ret_,date,stocks,columns = ['NEXT_RET'])) # 转化下期收益数据为双重索引 35 | next_ret = pd.concat(next_ret) # 合并5年的下期收益数据 36 | 37 | ## IC测试 38 | def ic_fenxi(df,next_ret,alpha = ['alpha']): 39 | d = list(next_ret.index.levels[0]) # 获取双索引中的日期索引 40 | ic_s = [] 41 | df = df.fillna(value = 0) # 异常值填充为0 42 | for i in range(len(date_)): 43 | stock_v = stock_valid_df.loc[d[i]].dropna().values # 获取当期有效股票池 44 | stock_v = list(set(stock_v) & set(df.loc[d[i]].index)) 45 | dff = df.loc[d[i]].loc[stock_v] # 提取当期可用因子值和下期收益数据 46 | # dff = dff.sort_values(alpha,ascending=False).iloc[:round(len(dff)/10)] # 提取top组的数据分析 47 | ic_s.append(dff['NEXT_RET'].corr(dff[alpha],method='spearman')) # 计算因子值与下期收益的秩相关系数,并存进ic值列表 48 | ic_s = pd.Series(ic_s) # 列表转化为Series,从而能够计算各统计量 49 | rate = len(ic_s[ic_s>=0])/len(ic_s) # ic值大于0的比例 50 | IC_mean = ic_s.mean() # ic均值 51 | IC_std = ic_s.std() # ic标准差 52 | IC_IR = ic_s.mean()/ic_s.std() # ic_IR用来衡量因子有效性 53 | stats = [IC_mean,IC_std,IC_IR,rate] 54 | return stats 55 | 56 | for i in range(len(data)): 57 | d = list(next_ret.index.levels[0]) 58 | lc = data[i].loc[d] # 根据调仓频率筛选数据 59 | lc['NEXT_RET'] = next_ret.NEXT_RET 60 | list_ic = ic_fenxi(lc,next_ret,alpha = ['alpha'+str(i+1)]) # 计算IC统计量 61 | print(list_ic) 62 | -------------------------------------------------------------------------------- /04-单因子分层测试: -------------------------------------------------------------------------------- 1 | ## 分层测试 2 | def return_fenxi(df,d,num,alpha = 'alpha'): 3 | df.fillna(value = 0,inplace = True) 4 | date_ = d 5 | return_s = [] 6 | for i in range(len(date_)): 7 | stock_v = stock_valid_df.loc[d[i]].dropna().values 8 | stock_v = list(set(stock_v) & set(df.loc[d[i]].index)) 9 | dff = df.loc[d[i]].loc[stock_v] 10 | x = dff[alpha] 11 | if x.sum() != 0: # 筛选掉因子值异常期 12 | df_i = dff.sort_values(alpha) 13 | return_list = [] 14 | for j in range(num): 15 | n1 = round(len(df_i)*j/num) 16 | n2 = round(len(df_i)*(j+1)/num) 17 | df_j = df_i.iloc[n1:n2] 18 | return_j = df_j['NEXT_RET'].mean()+1 19 | return_list.append(return_j) 20 | return_s.append(return_list) 21 | x = np.array(return_s).T # 二维列表转化为二维数组转置 22 | return_s = [list(i) for i in x] 23 | for i in range(num): 24 | x = return_s[i] 25 | for j in range(1,len(x)): 26 | x[j] = x[j]*x[j-1] 27 | return_s[i] = x 28 | culmu = [re[-1] for re in return_s] 29 | return culmu,return_s 30 | 31 | def fencengceshi(data,next_ret,num): 32 | data_re = [] 33 | data_re1 = [] 34 | for i in range(len(data)): 35 | lc = data[i] 36 | #lc.fillna(value = 0,inplace = True) 37 | d = list(next_ret.index.levels[0]) 38 | lc = data[i].loc[d] 39 | lc['NEXT_RET'] = next_ret.NEXT_RET 40 | list_re1,list_re = return_fenxi(lc,d,num,alpha = data[i].columns[0]) 41 | data_re.append(list_re) 42 | data_re1.append(list_re1) 43 | print(data_re1[i]) # 各因子最终各组累计收益 44 | return data_re 45 | 46 | df = fencengceshi(data,next_ret,num=20) # 各因子分组累计收益序列 47 | dff = fencengceshi(data,next_ret,num=1) # 计算等权基准线 48 | 49 | ## 各组收益时序图,top/middle/bottom 50 | n = 1 51 | x = df[n-1] 52 | y1 = x[0] 53 | y2 = x[num/2] 54 | y3 = x[-1] 55 | d = list(next_ret.index.levels[0])[-len(y1):] 56 | bench = dff[1][0][-len(y1):] 57 | 58 | plt.subplots(figsize=(15,5)) # 图的长宽设置 59 | plt.plot(d,y1,label='Group1') 60 | plt.plot(d,y2,label='Group2') 61 | plt.plot(d,y3,label='Group3') 62 | plt.plot(d,bench,label='ZZ800') 63 | plt.legend() 64 | plt.title('alpha'+str(n)+' 分层测试(月)') 65 | plt.xlabel('回测区间') 66 | plt.ylabel("净值") 67 | -------------------------------------------------------------------------------- /05-回测相关处理函数: -------------------------------------------------------------------------------- 1 | from WindPy import * 2 | import numpy as np 3 | import pandas as pd 4 | import numpy.linalg as la 5 | import seaborn as sns 6 | w.start() 7 | 8 | ## 组合多因子 9 | lc = data.copy() 10 | num_list = [42,44,36,15,4] 11 | num_list = [i-1 for i in num_list] 12 | m = [] 13 | alpha_m = lc[num_list[0]].copy() 14 | for i in num_list[1:]: 15 | alpha_m['alpha'+str(i+1)] = lc[i]['alpha'+str(i+1)] 16 | 17 | ## 因子数据预处理 18 | # 因子相关系数热力图 19 | fig=plt.figure(figsize=(10,6)) 20 | relations= alpha_m.corr() 21 | sns.heatmap(relations,annot=True,linewidths=0.05,linecolor='white',annot_kws={'size':8,'weight':'bold'}) 22 | 23 | # 中位数去极值 24 | def extreme_process_MAD(sample): # 输入的sample为时间截面的股票因子df数据 25 | factor_name = list(sample.columns) 26 | for name in factor_name: 27 | x = sample[name] 28 | median = x.median() 29 | MAD = abs(x - median).median() 30 | x[x>(median+3*1.4826*MAD)] = median+3*1.4826*MAD 31 | x[x<(median-3*1.4826*MAD)] = median-3*1.4826*MAD 32 | sample[name] = x 33 | return sample 34 | 35 | # 行业市值中性化 36 | def data_scale_neutral(sample,date): 37 | stocks = list(sample.index) 38 | ind=w.wss(stocks, "industry_citic","unit=1;tradeDate="+date+";industryType=1", usedf=True)[1] 39 | Incap=w.wss(stocks, "val_lnmv","unit=1;tradeDate="+date+";industryType=1", usedf=True)[1] 40 | data_med = pd.get_dummies(ind,columns=['INDUSTRY_CITIC']) # 生成0-1变量矩阵 41 | x = pd.concat([data_med,Incap],axis=1).dropna() 42 | X= np.array(x) 43 | sample = sample.loc[list(x.index)] 44 | factor_name = list(sample.columns) 45 | for name in factor_name: 46 | y = np.array(sample[name]) 47 | if la.matrix_rank(X.T.dot(X)) == X.shape[1]: 48 | beta_ols = la.inv(X.T.dot(X)).dot(X.T).dot(y) # 最小二乘法计算拟合值 49 | residual = y - X.dot(beta_ols) # 取残差为中性化后的因子值 50 | else: 51 | residual = y 52 | sample[name] = residual 53 | return sample 54 | 55 | # 标准化 56 | def standardize(sample): 57 | factor_name = list(sample.columns) 58 | for name in factor_name: 59 | x = sample[name] 60 | sample[name] = (x - np.mean(x))/(np.std(x)) 61 | return sample 62 | 63 | # 数据预处理 64 | def data_process(sample,date): 65 | sample = extreme_process_MAD(sample) 66 | sample = data_scale_neutral(sample,date) 67 | sample = standardize(sample) 68 | return sample 69 | 70 | # 回测筛选股票池 71 | def get_stocks(trDate,A_stocks): 72 | status = w.wss(A_stocks, "trade_status,maxupordown,riskwarning,ipo_date", tradeDate=trDate, usedf=True)[1] 73 | date_least=w.tdaysoffset(-6,trDate,'Period=M').Data[0][0] 74 | trade_codes=list(status[(status['TRADE_STATUS']=='交易')&(status['IPO_DATE']<=date_least)&(status['MAXUPORDOWN']==0)&(status['RISKWARNING']=='否')].index) 75 | return trade_codes 76 | 77 | # 最大化历史ICIR加权/历史IC均值加权 78 | def IR_weight(bar_datetime_str,stocks,alpha_data): 79 | Period="W" 80 | begin_time = w.tdaysoffset(-12, bar_datetime_str, Period=Period, usedf=True).Data[0][0].strftime('%Y-%m-%d') 81 | time_list = w.tdays(begin_time,bar_datetime_str,Period=Period, usedf=True).Data[0] 82 | time_list = [time.strftime('%Y-%m-%d') for time in time_list] 83 | IC_s = [] 84 | IC = [] 85 | next_ret = w.wsd(stocks, "pct_chg", begin_time, bar_datetime_str, usedf=True, Period = Period)[1].fillna(value = 0).iloc[1:] 86 | for i in range(12): 87 | factor = alpha_data.loc[time_list[i]].reset_index().set_index(['codes']).drop(columns = ['date']).loc[stocks] # 转化为股票单层索引 88 | factor_name = list(factor.columns) 89 | ic_s = [] 90 | for name in factor_name: 91 | try: 92 | ic_s.append(factor[name].corr(next_ret.iloc[i],method='spearman')) 93 | except: 94 | ic_s.append(factor[name].corr(next_ret.iloc[i-1],method='spearman')) 95 | IC_s.append(ic_s) 96 | IC_s = np.array(IC_s).T 97 | W = np.cov(IC_s) 98 | for i in range(len(IC_s)): 99 | IC.append(IC_s[i].mean()) 100 | IC_IR = np.dot(W,np.array(IC)) 101 | return IC_IR # np.array(IC) 102 | 103 | # 最大化历史收益率加权 104 | def rate_weight(bar_datetime_str,stocks,alpha_data,num=10): 105 | Period="W" 106 | begin_time = w.tdaysoffset(-12, bar_datetime_str, Period=Period, usedf=True).Data[0][0].strftime('%Y-%m-%d') 107 | time_list = w.tdays(begin_time,bar_datetime_str,Period=Period, usedf=True).Data[0] 108 | time_list = [time.strftime('%Y-%m-%d') for time in time_list] 109 | IC_s = [] 110 | IC = [] 111 | next_ret = w.wsd(stocks, "pct_chg", begin_time, bar_datetime_str, usedf=True, Period = Period)[1].fillna(value = 0).iloc[1:] 112 | for i in range(12): 113 | factor = alpha_data.loc[time_list[i]].reset_index().set_index(['codes']).drop(columns = ['date']).loc[stocks] 114 | factor_name = list(factor.columns) 115 | ic_s = [] 116 | for name in factor_name: 117 | g10 = list(factor.sort_values([name],ascending=False).iloc[:round(len(factor)/num)].index) 118 | try: 119 | ic_s = (next_ret.T.loc[g10]).T.iloc[i].mean() 120 | except: 121 | ic_s = (next_ret.T.loc[g10]).T.iloc[i-1].mean() 122 | IC_s.append(ic_s) 123 | IC_s = np.array(IC_s).T 124 | for i in range(len(IC_s)): 125 | IC.append(IC_s[i].mean()) 126 | rate = np.array(IC) 127 | return rate 128 | 129 | # 因子打分 130 | def factor_sum(sample,weight_list): 131 | factor_name = list(sample.columns) 132 | sample['alpha_sum'] = sample[factor_name[0]] * 0 133 | for i in range(len(factor_name)): 134 | sample['alpha_sum'] = sample['alpha_sum'] + sample[factor_name[i]] * weight_list[i] 135 | return sample 136 | -------------------------------------------------------------------------------- /06-有效因子回测: -------------------------------------------------------------------------------- 1 | from WindPy import * 2 | from datetime import * 3 | from WindAlgo import * 4 | from scipy.stats import rankdata 5 | import numpy as np 6 | import pandas as pd 7 | from datetime import datetime 8 | import scipy.stats as stats 9 | import numpy.linalg as la 10 | w.start() 11 | 12 | alpha_data = alpha_m 13 | 14 | def initialize(context): 15 | context.capital = 10000000 # 回测的初始资金 16 | context.securities = w.wset("sectorconstituent", "date=20140101;windcode=000906.SH").Data[1] 17 | context.start_date = "20140104" # 回测开始时间 18 | context.end_date = "20190530" # 回测结束时间 19 | context.commission = 0.0003 # 手续费 20 | context.alpha_data = alpha_data 21 | context.benchmark = '000906.SH' # 设置回测基准 22 | 23 | def handle_data(bar_datetime, context, bar_data): 24 | pass 25 | 26 | def my_schedule1(bar_datetime, context, bar_data): 27 | bar_datetime_str = bar_datetime.strftime('%Y-%m-%d') 28 | stock800 = w.wset("sectorconstituent", "date="+bar_datetime_str+";windcode=000906.SH").Data[1] 29 | stocks = get_stocks(bar_datetime_str,stock800) # 获取筛选后的股票池 30 | ## 提取因子数据法 31 | data = context.alpha_data.loc[bar_datetime_str].reset_index().set_index(['codes']).drop(columns = ['date']) #提取因子数据并调整结构 32 | stock_v = list(set(stocks) & set(data.index)) 33 | data = data.loc[stock_v].dropna(axis=0) # 筛选出有效股票池对应数据 34 | profit = w.wss(stock_v, "fa_oigr_ttm,pe_ttm", tradeDate=bar_datetime_str, usedf=True)[1] # 加入财务因子 35 | profit.index = data.index 36 | data['1/PEG'] = profit['FA_OIGR_TTM']/profit['PE_TTM'] 37 | data = data_process(data,bar_datetime_str) # 数据预处理 38 | weight = [] # 因子加权权重 39 | data = factor_sum(data,weight) # 因子打分 40 | ## 回测中计算法 41 | # start_time = w.tdaysoffset(-5, bar_datetime_str,usedf=True).Data[0][0].strftime('%Y-%m-%d') 42 | # close = w.wsd(stocks,'close',start_time,bar_datetime_str,usedf = True)[1].reset_index().drop(columns = ['index']) 43 | # vwap = w.wsd(stocks,'vwap',start_time,bar_datetime_str,usedf = True)[1].reset_index().drop(columns = ['index']) 44 | # alpha = {'alpha':alpha42(vwap,close).iloc[-1]} 45 | # data = pd.DataFrame(alpha,index=stocks) 46 | 47 | data = data.sort_values([data.columns.values[-1]],ascending=False) # 按打分大小排序 48 | code_list = list(data[:round(len(stocks)/5)].index) # 选出top组股票 49 | wa.change_securities(code_list) # 改变证券池 50 | context.securities = code_list 51 | list_sell = list(wa.query_position().get_field('code')) # 获取当前仓位股票池 52 | for code in list_sell: 53 | if code not in code_list: 54 | volumn = wa.query_position()[code]['volume'] # 找到每个股票的持仓量 55 | res = wa.order(code,volumn,'sell',price='close', volume_check=False) 56 | 57 | def my_schedule2(bar_datetime, context,bar_data): 58 | buy_code_list=list(set(context.securities)-(set(context.securities)-set(list(bar_data.get_field('code'))))) 59 | list_now = list(wa.query_position().get_field('code')) # 获取当前仓位股票池 60 | for code in buy_code_list: 61 | if code not in list_now: 62 | res = wa.order_percent(code,1/len(buy_code_list),'buy',price='close', volume_check=False) # 等权买入 63 | 64 | wa = BackTest(init_func = initialize, handle_data_func=handle_data) # 实例化回测对象 65 | wa.schedule(my_schedule1, "w", 0) # w表示在每周执行一次策略,0表示偏移,表示月初第一个交易日往后0天 66 | wa.schedule(my_schedule2, "w", 0) 67 | res = wa.run(show_progress=True) # 调用run()函数开始回测,show_progress可用于指定是否显示回测净值曲线图 68 | nav_df = wa.summary('nav') # 获取回测结果,回测周期内每一天的组合净值 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # alpha101 2 | 基于万矿平台对alpha101因子进行测试并构造多因子策略 3 | --------------------------------------------------------------------------------