├── .gitignore ├── .vscode └── settings.json ├── A.docx ├── GBDT.py ├── MLP.py ├── README.md ├── RF_line3.py ├── adaboost_model.py ├── data_exploration.ipynb ├── factor_analysis.py ├── find_factor.py ├── get_factor_report.py ├── lstm.py ├── multi_factor_lr.py ├── random_forest_reg.py ├── references ├── 2011年金融工程研讨会专题报告系列之二:大浪淘金,Alpha因子何处寻?.pdf ├── A.pdf ├── A股Alpha策略及产品回顾与展望——2018年金融工程年度报告.pdf ├── A题—通过机器学习优化股票多因子模型解题指引.pdf ├── SA20190100000_36930159.pdf ├── 人工智能选股框架及经典算法简介.pdf ├── 华泰证券-多因子系列之一:华泰多因子模型体系初探-160921.pdf ├── 单因子测试.PDF ├── 收益预测模型.PDF └── 风险模型与组合优化.PDF ├── run_test.bat ├── same_weight_model.py ├── single_factor_test.py ├── svm.py ├── time_roll_model.py ├── xgb_model.py ├── 价值类.png ├── 基础类.png ├── 情绪类.png ├── 每股指标类.png ├── 特色技术指标类.png ├── 行业分析师类.png └── 质量类.png /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | .idea 3 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "C:\\Users\\Qiuyh\\AppData\\Local\\Programs\\Python\\Python37\\python.exe" 3 | } -------------------------------------------------------------------------------- /A.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/A.docx -------------------------------------------------------------------------------- /GBDT.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:TotalAssetGrowRate 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | ---------------------------------------------------------- 20 | """ 21 | from atrader import * 22 | import pandas as pd 23 | import numpy as np 24 | from sklearn import svm 25 | import math 26 | from sklearn import preprocessing 27 | import datetime 28 | from sklearn.ensemble import AdaBoostRegressor 29 | from sklearn.ensemble import GradientBoostingRegressor 30 | # 作为全局变量进行测试 31 | 32 | 33 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 34 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 35 | 'AD', 'TotalAssetGrowRate', 'MA120'] 36 | 37 | 38 | # 中位数去极值法 39 | def filter_MAD(df, factor, n=3): 40 | """ 41 | :param df: 去极值的因子序列 42 | :param factor: 待去极值的因子 43 | :param n: 中位数偏差值的上下界倍数 44 | :return: 经过处理的因子dataframe 45 | """ 46 | median = df[factor].quantile(0.5) 47 | new_median = ((df[factor] - median).abs()).quantile(0.5) 48 | max_range = median + n * new_median 49 | min_range = median - n * new_median 50 | 51 | for i in range(df.shape[0]): 52 | if df.loc[i, factor] > max_range: 53 | df.loc[i, factor] = max_range 54 | elif df.loc[i, factor] < min_range: 55 | df.loc[i, factor] = min_range 56 | return df 57 | 58 | 59 | def init(context): 60 | # 账号设置:设置初始资金为 10000000 元 61 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 62 | price_loc=1, deal_type=0, limit_type=0) 63 | # 注册数据:日频数据 64 | reg_kdata('day', 1) 65 | global FactorCode # 全局单因子代号 66 | reg_factor(factor=FactorCode) 67 | print("init 函数, 注册因子为{}".format(FactorCode[0])) 68 | context.FactorCode = FactorCode # 69 | 70 | # 超参数设置: 71 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 72 | context.Num = 0 # 记录当前交易日个数 73 | 74 | # 较敏感的超参数,需要调节 75 | context.upper_pos = 75 # 股票预测收益率的上分位数,高于则买入 76 | context.down_pos = 25 # 股票预测收益率的下分位数,低于则卖出 77 | context.cash_rate = 0.5 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 78 | 79 | # 确保月初调仓 80 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 81 | months = np.vectorize(lambda x: x.month)(days) 82 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 83 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 84 | 85 | 86 | def on_data(context): 87 | context.Num = context.Num + 1 88 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 89 | return 90 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 91 | return 92 | 93 | # 获取数据: 94 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 95 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 96 | df=True) # 获取因子数据 97 | 98 | # 特征构建: 99 | Fcode = context.FactorCode # 标签不需要代号了 100 | 101 | # 数据存储变量: 102 | # Close 字段为标签,Fcode 为标签 103 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 104 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 105 | 106 | # K线数据序号对齐 107 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 108 | 109 | # 按标的处理数据: 110 | for i in range(300): 111 | # 训练特征集及训练标签构建: 112 | # 临时数据存储变量: 113 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 114 | columns=(['idx', 'benefit'] + Fcode)) 115 | # 存储预测特征样本 116 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 117 | 118 | # 因子数据 序号对齐, 提取当前标的的因子数据 119 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 120 | 121 | # 按特征处理数据: 122 | for FC in context.FactorCode: 123 | # 提取当前标的中与当前因子FC相同的部分 124 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 125 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 126 | 127 | # 按标签处理数据: 128 | # 提取当前标的的前一个月的K线面板数据 129 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 130 | # 计算当前标的在上一个月的收益率 131 | benefit = (close[context.Len - 1] - close[0]) / close[0] 132 | 133 | FactorData0['benefit'] = benefit 134 | # idx: 建立当前标的在训练样本集中的索引 135 | FactorData0['idx'] = tempIdx[i] 136 | # 合并数据:组成训练样本 137 | FactorData = FactorData.append(FactorData0, ignore_index=True) 138 | 139 | # 预测特征集构建:建立标的索引 140 | FactorDataTest0['idx'] = tempIdx[i] 141 | # 按特征处理数据,过程同建立训练特征 142 | for FC in context.FactorCode: 143 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 144 | FactorDataTest0[FC] = FCData[context.Len - 1] 145 | 146 | # 合并测试数据 147 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 148 | 149 | """ 150 | 训练集和测试集的表头字段如下 151 | FactorData DataFrame: 152 | idx | benefit | Factor 1 | Factor 2| .... 153 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 154 | FactorDataTest DataFrame: 155 | idx | Factor 1 | Factor 2 | ... 156 | 本月初的因子作为预测特征 157 | """ 158 | 159 | # 数据清洗: 160 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 161 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 162 | Idx = FactorDataTest['idx'] # 剩余标的序号 163 | 164 | # 按特征进行预处理 165 | for Factor in context.FactorCode: 166 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 167 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 168 | 169 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 170 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 171 | 172 | 173 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 174 | X = np.ones([FactorData.shape[0], len(Fcode)]) 175 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 176 | 177 | # 循环填充特征到numpy数组中 178 | for i in range(X.shape[1]): 179 | X[:, i] = FactorData[Fcode[i]] 180 | Xtest[:, i] = FactorDataTest[Fcode[i]] 181 | 182 | # 训练样本的标签,为浮点数的收益率 183 | Y = (np.array(FactorData['benefit']).astype(float) > 0) 184 | 185 | gbdt_reg = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, 186 | max_depth=8, random_state=1, loss='ls') 187 | 188 | # 模型训练: 189 | gbdt_reg.fit(X, Y) 190 | 191 | # LR分类预测: 192 | y = gbdt_reg.predict(Xtest) 193 | 194 | # 交易设置: 195 | positions = context.account().positions['volume_long'] # 多头持仓数量 196 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 197 | 198 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 199 | 200 | # 获取收益率的高分位数和低分位数 201 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 202 | 203 | for i in range(len(Idx)): 204 | position = positions.iloc[Idx[i]] 205 | # if position == 0 and y[i] == True and valid_cash > 0: # 若预测结果为true(收益率>0),买入 206 | # print('开仓') 207 | if position == 0 and y[i] > high_return and valid_cash > 0 : 208 | # 开仓数量 + 1防止分母为0 209 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 210 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 211 | 212 | # 控制委托量,不要过大或过小,需要保证是100的倍数 213 | if Num < 1000: 214 | Num *= 10 215 | if Num > 100000: 216 | Num = int(Num / 10) 217 | Num -= Num % 100 218 | if Num <= 0: # 不开仓 219 | continue 220 | 221 | print("开仓数量为:{}".format(Num)) 222 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 223 | price=0) # 指定委托量开仓 224 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 225 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 226 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 227 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 228 | print("平仓,数量为: {}".format(position / 10)) 229 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 230 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 231 | 232 | 233 | if __name__ == '__main__': 234 | file_path = 'GBDT.py' 235 | block = 'hs300' 236 | 237 | begin_date = '2016-01-01' 238 | end_date = '2018-09-30' 239 | 240 | strategy_name = 'GBDT' 241 | 242 | run_backtest(strategy_name=strategy_name, file_path=file_path, 243 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 244 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 245 | -------------------------------------------------------------------------------- /MLP.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:TotalAssetGrowRate 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | ---------------------------------------------------------- 20 | """ 21 | from atrader import * 22 | import pandas as pd 23 | import numpy as np 24 | from sklearn import svm 25 | import math 26 | from sklearn import preprocessing 27 | import datetime 28 | from sklearn.neural_network import MLPRegressor 29 | 30 | # 作为全局变量进行测试 31 | 32 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'TVMA20', 'EnterpriseFCFPS', 33 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'GrossProfit', 'FY12P', 34 | 'AD', 'TotalAssetGrowRate', 'MA120'] 35 | 36 | 37 | # 中位数去极值法 38 | def filter_MAD(df, factor, n=3): 39 | """ 40 | :param df: 去极值的因子序列 41 | :param factor: 待去极值的因子 42 | :param n: 中位数偏差值的上下界倍数 43 | :return: 经过处理的因子dataframe 44 | """ 45 | median = df[factor].quantile(0.5) 46 | new_median = ((df[factor] - median).abs()).quantile(0.5) 47 | max_range = median + n * new_median 48 | min_range = median - n * new_median 49 | 50 | for i in range(df.shape[0]): 51 | if df.loc[i, factor] > max_range: 52 | df.loc[i, factor] = max_range 53 | elif df.loc[i, factor] < min_range: 54 | df.loc[i, factor] = min_range 55 | return df 56 | 57 | 58 | def init(context): 59 | # 账号设置:设置初始资金为 10000000 元 60 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 61 | price_loc=1, deal_type=0, limit_type=0) 62 | # 注册数据:日频数据 63 | reg_kdata('day', 1) 64 | global FactorCode # 全局单因子代号 65 | reg_factor(factor=FactorCode) 66 | context.FactorCode = FactorCode # 67 | 68 | # 超参数设置: 69 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 70 | context.Num = 0 # 记录当前交易日个数 71 | 72 | # 较敏感的超参数,需要调节 73 | context.upper_pos = 85 # 股票预测收益率的上分位数,高于则买入 74 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 75 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 76 | 77 | # 确保月初调仓 78 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 79 | months = np.vectorize(lambda x: x.month)(days) 80 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 81 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 82 | 83 | 84 | def on_data(context): 85 | context.Num = context.Num + 1 86 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 87 | return 88 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 89 | return 90 | 91 | # 获取数据: 92 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 93 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 94 | df=True) # 获取因子数据 95 | 96 | # 特征构建: 97 | Fcode = context.FactorCode # 标签不需要代号了 98 | 99 | # 数据存储变量: 100 | # Close 字段为标签,Fcode 为标签 101 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 102 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 103 | 104 | # K线数据序号对齐 105 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 106 | 107 | # 按标的处理数据: 108 | for i in range(300): 109 | # 训练特征集及训练标签构建: 110 | # 临时数据存储变量: 111 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 112 | columns=(['idx', 'benefit'] + Fcode)) 113 | # 存储预测特征样本 114 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 115 | 116 | # 因子数据 序号对齐, 提取当前标的的因子数据 117 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 118 | 119 | # 按特征处理数据: 120 | for FC in context.FactorCode: 121 | # 提取当前标的中与当前因子FC相同的部分 122 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 123 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 124 | 125 | # 按标签处理数据: 126 | # 提取当前标的的前一个月的K线面板数据 127 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 128 | # 计算当前标的在上一个月的收益率 129 | benefit = (close[context.Len - 1] - close[0]) / close[0] 130 | 131 | FactorData0['benefit'] = benefit 132 | # idx: 建立当前标的在训练样本集中的索引 133 | FactorData0['idx'] = tempIdx[i] 134 | # 合并数据:组成训练样本 135 | FactorData = FactorData.append(FactorData0, ignore_index=True) 136 | 137 | # 预测特征集构建:建立标的索引 138 | FactorDataTest0['idx'] = tempIdx[i] 139 | # 按特征处理数据,过程同建立训练特征 140 | for FC in context.FactorCode: 141 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 142 | FactorDataTest0[FC] = FCData[context.Len - 1] 143 | 144 | # 合并测试数据 145 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 146 | 147 | """ 148 | 训练集和测试集的表头字段如下 149 | FactorData DataFrame: 150 | idx | benefit | Factor 1 | Factor 2| .... 151 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 152 | FactorDataTest DataFrame: 153 | idx | Factor 1 | Factor 2 | ... 154 | 本月初的因子作为预测特征 155 | """ 156 | 157 | # 数据清洗: 158 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 159 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 160 | Idx = FactorDataTest['idx'] # 剩余标的序号 161 | 162 | # 按特征进行预处理 163 | for Factor in context.FactorCode: 164 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 165 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 166 | 167 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 168 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 169 | 170 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 171 | X = np.ones([FactorData.shape[0], len(Fcode)]) 172 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 173 | 174 | # 循环填充特征到numpy数组中 175 | for i in range(X.shape[1]): 176 | X[:, i] = FactorData[Fcode[i]] 177 | Xtest[:, i] = FactorDataTest[Fcode[i]] 178 | 179 | # 训练样本的标签,为浮点数的收益率 180 | Y = (np.array(FactorData['benefit']).astype(float) > 0) 181 | 182 | mlp = MLPRegressor(hidden_layer_sizes=4, activation='logistic', solver='adam', 183 | max_iter=50) 184 | 185 | # 模型训练: 186 | mlp.fit(X, Y) 187 | 188 | # LR分类预测: 189 | y = mlp.predict(Xtest) 190 | 191 | # 交易设置: 192 | positions = context.account().positions['volume_long'] # 多头持仓数量 193 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 194 | 195 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 196 | 197 | # 获取收益率的高分位数和低分位数 198 | low_return,high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 199 | 200 | for i in range(len(Idx)): 201 | position = positions.iloc[Idx[i]] 202 | # if position == 0 and y[i] == True and valid_cash > 0: # 若预测结果为true(收益率>0),买入 203 | # print('开仓') 204 | if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓,且该股票收益大于高70%分位数,则开仓,买入 205 | # 开仓数量 + 1防止分母为0 206 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 207 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 208 | 209 | # 控制委托量,不要过大或过小,需要保证是100的倍数 210 | if Num < 1000: 211 | Num *= 10 212 | if Num > 100000: 213 | Num = int(Num / 10) 214 | Num -= Num % 100 215 | if Num <= 0: # 不开仓 216 | continue 217 | 218 | print("开仓数量为:{}".format(Num)) 219 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 220 | price=0) # 指定委托量开仓 221 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 222 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 223 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 224 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 225 | print("平仓,数量为: {}".format(position / 10)) 226 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 227 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 228 | 229 | 230 | if __name__ == '__main__': 231 | 232 | file_path = 'MLP.py' 233 | block = 'hs300' 234 | 235 | begin_date = '2016-01-01' 236 | end_date = '2018-09-30' 237 | 238 | strategy_name = 'MLP' 239 | 240 | run_backtest(strategy_name=strategy_name, file_path=file_path, 241 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 242 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 243 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 9 | # Tidy-QuantTrading 10 | 泰迪杯数据挖掘比赛协作仓库。——基于机器学习方法构建多因子选股模型。 11 | > Group Members:XiaoRu Chen,Xiaoling Ling,Yihao Qiu 12 | 13 | ## 成果 14 | 最优的随机森林模型:累计收益60%左右,经择时策略风险控制后,最大回撤率控制在9%左右,夏普率为0.9左右。 15 | 16 | ### 单因子测试 17 | 18 | 确定一个单因子测试文件,定义待测因子列表,执行多次单因子runtest。 19 | - 保留回测报告,获取字段,保存在CSV文件。 20 | - 结果可视化。 21 | - 筛选得到最优因子。 22 | - 因子做共线性分析,获取最终因子。 23 | 24 | > _**官方提示:**_ 单因子问题,可以手动实现一下,或者可以弱化一下,通过间接的方法去实现这个问题,并不一定要实现一个完整的回测框架才能解决单因子分析的问题 25 | 26 | ### 选用机器学习模型回测 27 | 28 | - 特征和标签构建。 29 | - 等权重线性模型。 30 | - 建立baseline models,尝试使用多种模型。SVR,RNN(LSTM),xgboost, random_forest,adaboost... 31 | - 交易逻辑确定。 32 | - 回测结果记录,分析。 33 | 34 | 35 | #### 关于模型的一些设想 36 | - 可参考论文[GBDT提取特征 + SVM二分类的方法](https://github.com/JoshuaQYH/TIDIBEI/blob/master/references/SA20190100000_36930159.pdf) 37 | - [LSTM进行选股](https://qiniu-images.datayes.com/huatai9.pdf)(在月频数据较少,可能效果不好) 38 | - [Adaboost](http://pg.jrj.com.cn/acc/Res/CN_RES/INVEST/2016/5/31/ed36ae43-0f6e-4051-bb9c-2e9a67632d74.pdf), randomforest, svm([启发式](http://or.nsfc.gov.cn/bitstream/00001903-5/353458/1/1000008947591.pdf)),[xgboost](https://cloud.tencent.com/developer/article/1137060)等等进行集成。如[Stacking](https://cloud.tencent.com/developer/article/1137060),bagging. 39 | 40 | ### 风险控制 41 | - 风险模型:barra模型 42 | - 择时模型:三均线择时策略。 43 | 44 | ## 文件说明 45 | - `data_exploration.ipynb`: atrader API调用测试文件。 46 | - `get_factor_report.py`: 当单因子回测结束之后,执行文件,得到策略字段。 47 | - `single_factor_test.py`: 单因子测试文件。 48 | - `find_factor.py`: 自实现的因子绩效分析文件(**已弃用**) 49 | - `run_test.bat`: 脚本自动化运行python程序,实现多次执行策略。 50 | - 'factor_analysis': 类内因子共线性分析文件,绘制相关系数矩阵。 51 | - 以模型名标识模型回测文件。 52 | 53 | ## LINK 54 | - [AutoTrader 官方API文档](https://www.digquant.com.cn/documents/17#h1-u5FEBu901Fu5F00u59CB-0) 55 | - [股票交易名词解释: 多头,空头,平仓,持仓,调仓....](http://stock.hexun.com/menu/stepbystep/step3.html) 56 | - [头寸解释](https://wiki.mbalib.com/wiki/%E5%A4%B4%E5%AF%B8) 57 | - [阮一峰常用git命令清单](http://www.ruanyifeng.com/blog/2015/12/git-cheat-sheet.html) 58 | - [点宽因子数据字典](https://www.digquant.com.cn/documents/23) 59 | - [名词解释:IC/IR](https://xueqiu.com/1652627245/108835836) 60 | - [名词解释:alpha值/beta值](https://blog.csdn.net/yezi113yezi/article/details/81078128) 61 | - [A题华师现场解读](https://edu.tipdm.org/) 62 | - [人工智能阿尔法策略框架-对282个因子的分析](https://www.jiqizhixin.com/articles/2019-01-26-5) 63 | -------------------------------------------------------------------------------- /RF_line3.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:TotalAssetGrowRate 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | 20 | ---------------------------------------------------------- 21 | """ 22 | from atrader import * 23 | import pandas as pd 24 | import numpy as np 25 | from sklearn.ensemble import RandomForestRegressor 26 | import math 27 | from sklearn import preprocessing 28 | import datetime 29 | 30 | # 作为全局变量进行测试 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 32 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 33 | 'AD', 'TotalAssetGrowRate', 'MA120'] 34 | 35 | # 中位数去极值法 36 | def filter_MAD(df, factor, n=3): 37 | """ 38 | :param df: 去极值的因子序列 39 | :param factor: 待去极值的因子 40 | :param n: 中位数偏差值的上下界倍数 41 | :return: 经过处理的因子dataframe 42 | """ 43 | median = df[factor].quantile(0.5) 44 | new_median = ((df[factor] - median).abs()).quantile(0.5) 45 | max_range = median + n * new_median 46 | min_range = median - n * new_median 47 | 48 | for i in range(df.shape[0]): 49 | if df.loc[i, factor] > max_range: 50 | df.loc[i, factor] = max_range 51 | elif df.loc[i, factor] < min_range: 52 | df.loc[i, factor] = min_range 53 | return df 54 | 55 | 56 | def init(context): 57 | 58 | # context.SVM = svm.SVC(gamma='scale') 59 | # 账号设置:设置初始资金为 10000000 元 60 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 61 | price_loc=1, deal_type=0, limit_type=0) 62 | # 注册数据:日频数据 63 | reg_kdata('day', 1) 64 | global FactorCode # 全局单因子代号 65 | reg_factor(factor=FactorCode) 66 | 67 | context.FactorCode = FactorCode # 68 | 69 | # 超参数设置: 70 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 71 | context.Num = 0 # 记录当前交易日个数 72 | 73 | # 较敏感的超参数,需要调节 74 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 75 | context.down_pos = 40 # 股票预测收益率的下分位数,低于则卖出 76 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 77 | 78 | # 确保月初调仓 79 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 80 | months = np.vectorize(lambda x: x.month)(days) 81 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 82 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 83 | 84 | # 三均线择时策略 85 | # 无持仓的情况下,5日和20日均线都大于60日均线,买入,等价于5日和20日均线上穿60日均线,买入; 86 | # 有持仓的情况下,5日和20日均线都小于60日均线,卖出,等价于5日和20日均线上穿60日均线,买入; 87 | context.win = 61 # 计算所需总数据长度 88 | context.win5 = 5 # 5日均线参数 89 | context.win20 = 20 # 20日均线参数 90 | context.win60 = 60 # 60日均线参数 91 | 92 | def on_data(context): 93 | context.Num = context.Num + 1 94 | if context.Num < context.win: # 如果交易日个数小于win,则进入下一个交易日进行回测 95 | return 96 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 97 | return 98 | 99 | # 获取数据: 100 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 101 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 102 | df=True) # 获取因子数据 103 | 104 | # 特征构建: 105 | Fcode = context.FactorCode # 标签不需要代号了 106 | 107 | # 数据存储变量: 108 | # Close 字段为标签,Fcode 为标签 109 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 110 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 111 | 112 | # K线数据序号对齐 113 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 114 | 115 | # 按标的处理数据: 116 | for i in range(300): 117 | # 训练特征集及训练标签构建: 118 | # 临时数据存储变量: 119 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 120 | columns=(['idx', 'benefit'] + Fcode)) 121 | # 存储预测特征样本 122 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 123 | 124 | # 因子数据 序号对齐, 提取当前标的的因子数据 125 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 126 | 127 | # 按特征处理数据: 128 | for FC in context.FactorCode: 129 | # 提取当前标的中与当前因子FC相同的部分 130 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 131 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 132 | 133 | # 按标签处理数据: 134 | # 提取当前标的的前一个月的K线面板数据 135 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 136 | # 计算当前标的在上一个月的收益率 137 | benefit = (close[context.Len - 1] - close[0]) / close[0] 138 | 139 | FactorData0['benefit'] = benefit 140 | # idx: 建立当前标的在训练样本集中的索引 141 | FactorData0['idx'] = tempIdx[i] 142 | # 合并数据:组成训练样本 143 | FactorData = FactorData.append(FactorData0, ignore_index=True) 144 | 145 | # 预测特征集构建:建立标的索引 146 | FactorDataTest0['idx'] = tempIdx[i] 147 | # 按特征处理数据,过程同建立训练特征 148 | for FC in context.FactorCode: 149 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 150 | FactorDataTest0[FC] = FCData[context.Len - 1] 151 | 152 | # 合并测试数据 153 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 154 | 155 | """ 156 | 训练集和测试集的表头字段如下 157 | FactorData DataFrame: 158 | idx | benefit | Factor 1 | Factor 2| .... 159 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 160 | FactorDataTest DataFrame: 161 | idx | Factor 1 | Factor 2 | ... 162 | 本月初的因子作为预测特征 163 | """ 164 | 165 | # 数据清洗: 166 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 167 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 168 | Idx = FactorDataTest['idx'] # 剩余标的序号 169 | 170 | # 按特征进行预处理 171 | for Factor in context.FactorCode: 172 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 173 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 174 | 175 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 176 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 177 | 178 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 179 | X = np.ones([FactorData.shape[0], len(Fcode)]) 180 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 181 | 182 | # 循环填充特征到numpy数组中 183 | for i in range(X.shape[1]): 184 | X[:, i] = FactorData[Fcode[i]] 185 | Xtest[:, i] = FactorDataTest[Fcode[i]] 186 | 187 | # 训练样本的标签,为浮点数的收益率 188 | Y = np.array(FactorData['benefit']).astype(float) 189 | 190 | random_forest = RandomForestRegressor(max_depth=5, n_estimators=50) 191 | 192 | # 模型训练: 193 | random_forest.fit(X, Y) 194 | 195 | # LR分类预测: 196 | y = random_forest.predict(Xtest) 197 | # 交易设置: 198 | positions = context.account().positions['volume_long'] # 多头持仓数量 199 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 200 | 201 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 202 | 203 | # 获取收益率的高分位数和低分位数 204 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 205 | 206 | # 进行择时准备 207 | # 获取前61天的数据 208 | data = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.win, fill_up=True, 209 | df=True) # data值为数据帧DataFrame类型,存储所有标的的K线行情数据。 210 | # 获取收盘价数据 211 | close = data.close.values.reshape(-1, context.win).astype(float) # 从data行情数据中获取收盘价,并转为ndarray数据类型 212 | # 计算均线值: 213 | ma5 = close[:, -context.win5:].mean(axis=1) # 5日均线 214 | ma20 = close[:, -context.win20:].mean(axis=1) # 20日均线 215 | ma60 = close[:, -context.win60:].mean(axis=1) # 60日均线 216 | 217 | # 获取标的序号:从0~299 218 | target = np.array(range(300)) 219 | positions_val = context.account().positions['volume_long'].values # 多头持仓数量 220 | # 计算买入信号: 221 | buy_signal = np.logical_and(positions_val == 0, ma5 > ma60, 222 | ma20 > ma60) # 无持仓的情况下,5日和20日均线都大于60日均线,买入,等价于5日和20日均线上穿60日均线,买入; 223 | # 计算卖出信号: 224 | sell_signal = np.logical_and(positions_val > 0, ma5 < ma60, 225 | ma20 < ma60) # 有持仓的情况下,5日和20日均线都小于60日均线,卖出,等价于5日和20日均线上穿60日均线,买入; 226 | # 获取买入信号标的的序号 227 | target_buy = target[buy_signal].tolist() # 一个记录了标的是否要买 228 | # 获取卖出信号标的的序号 229 | target_sell = target[sell_signal].tolist() # 同上 230 | for i in range(len(Idx)): 231 | position = positions.iloc[Idx[i]] 232 | 233 | # 当前无仓,且该股票收益大于高80%分位数,且5日和20日均线都大于或等于60日均线 则开仓,买入 234 | if position == 0 and y[i] > high_return and valid_cash > 0 and Idx[i] in target_buy: 235 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 236 | # 控制委托量,不要过大或过小,需要保证是100的倍数 237 | if Num < 1000: 238 | Num *= 10 239 | if Num > 100000: 240 | Num = int(Num / 10) 241 | Num -= Num % 100 242 | if Num <= 0: # 不开仓 243 | continue 244 | print("开仓数量为:{}".format(Num)) 245 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 246 | price=0) # 指定委托量开仓 247 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 248 | stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=15, order_type=2) 249 | 250 | # 当前持仓,且该股票收益小于低20%分位数,5日和20日均线都小于60日均线 则平仓,卖出 251 | elif position > 0 and y[i] < low_return and Idx[i] in target_sell: 252 | print("平仓,数量为: {}".format(position)) 253 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position), 254 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 255 | 256 | 257 | if __name__ == '__main__': 258 | file_path = 'RF_line3.py' 259 | block = 'hs300' 260 | 261 | begin_date = '2016-01-01' 262 | end_date = '2018-09-30' 263 | 264 | strategy_name = 'RF_line3' 265 | 266 | run_backtest(strategy_name=strategy_name, file_path=file_path, 267 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 268 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 269 | -------------------------------------------------------------------------------- /adaboost_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20/PVI 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit/NIAP 14 | - 行业分析师:FY12P 15 | - 动量类:AD 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | ---------------------------------------------------------- 20 | """ 21 | from atrader import * 22 | import pandas as pd 23 | import numpy as np 24 | from sklearn import svm 25 | import math 26 | from sklearn import preprocessing 27 | import datetime 28 | from sklearn.tree import DecisionTreeRegressor 29 | from sklearn.ensemble import AdaBoostRegressor 30 | from sklearn.decomposition import PCA 31 | 32 | # 作为全局变量进行测试 33 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 34 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 35 | 'AD', 'TotalAssetGrowRate', 'MA120'] 36 | 37 | # 中位数去极值法 38 | def filter_MAD(df, factor, n=3): 39 | """ 40 | :param df: 去极值的因子序列 41 | :param factor: 待去极值的因子 42 | :param n: 中位数偏差值的上下界倍数 43 | :return: 经过处理的因子dataframe 44 | """ 45 | median = df[factor].quantile(0.5) 46 | new_median = ((df[factor] - median).abs()).quantile(0.5) 47 | max_range = median + n * new_median 48 | min_range = median - n * new_median 49 | 50 | for i in range(df.shape[0]): 51 | if df.loc[i, factor] > max_range: 52 | df.loc[i, factor] = max_range 53 | elif df.loc[i, factor] < min_range: 54 | df.loc[i, factor] = min_range 55 | return df 56 | 57 | 58 | def init(context): 59 | # 账号设置:设置初始资金为 10000000 元 60 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 61 | price_loc=1, deal_type=0, limit_type=0) 62 | # 注册数据:日频数据 63 | reg_kdata('day', 1) 64 | global FactorCode # 全局单因子代号 65 | reg_factor(factor=FactorCode) 66 | print("init 函数, 注册因子为{}".format(FactorCode[0])) 67 | context.FactorCode = FactorCode # 68 | 69 | # 超参数设置: 70 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 71 | context.Num = 0 # 记录当前交易日个数 72 | 73 | # 较敏感的超参数,需要调节 74 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 75 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 76 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 77 | 78 | # 确保月初调仓 79 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 80 | months = np.vectorize(lambda x: x.month)(days) 81 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 82 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 83 | 84 | 85 | def on_data(context): 86 | context.Num = context.Num + 1 87 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 88 | return 89 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 90 | return 91 | 92 | # 获取数据: 93 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 94 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 95 | df=True) # 获取因子数据 96 | 97 | # 特征构建: 98 | Fcode = context.FactorCode # 标签不需要代号了 99 | 100 | # 数据存储变量: 101 | # Close 字段为标签,Fcode 为标签 102 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 103 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 104 | 105 | # K线数据序号对齐 106 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 107 | 108 | # 按标的处理数据: 109 | for i in range(300): 110 | # 训练特征集及训练标签构建: 111 | # 临时数据存储变量: 112 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 113 | columns=(['idx', 'benefit'] + Fcode)) 114 | # 存储预测特征样本 115 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 116 | 117 | # 因子数据 序号对齐, 提取当前标的的因子数据 118 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 119 | 120 | # 按特征处理数据: 121 | for FC in context.FactorCode: 122 | # 提取当前标的中与当前因子FC相同的部分 123 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 124 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 125 | 126 | # 按标签处理数据: 127 | # 提取当前标的的前一个月的K线面板数据 128 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 129 | # 计算当前标的在上一个月的收益率 130 | benefit = (close[context.Len - 1] - close[0]) / close[0] 131 | 132 | FactorData0['benefit'] = benefit 133 | # idx: 建立当前标的在训练样本集中的索引 134 | FactorData0['idx'] = tempIdx[i] 135 | # 合并数据:组成训练样本 136 | FactorData = FactorData.append(FactorData0, ignore_index=True) 137 | 138 | # 预测特征集构建:建立标的索引 139 | FactorDataTest0['idx'] = tempIdx[i] 140 | # 按特征处理数据,过程同建立训练特征 141 | for FC in context.FactorCode: 142 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 143 | FactorDataTest0[FC] = FCData[context.Len - 1] 144 | 145 | # 合并测试数据 146 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 147 | 148 | """ 149 | 训练集和测试集的表头字段如下 150 | FactorData DataFrame: 151 | idx | benefit | Factor 1 | Factor 2| .... 152 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 153 | FactorDataTest DataFrame: 154 | idx | Factor 1 | Factor 2 | ... 155 | 本月初的因子作为预测特征 156 | """ 157 | 158 | # 数据清洗: 159 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 160 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 161 | Idx = FactorDataTest['idx'] # 剩余标的序号 162 | 163 | # 按特征进行预处理 164 | for Factor in context.FactorCode: 165 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 166 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 167 | 168 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 169 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 170 | 171 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 172 | X = np.ones([FactorData.shape[0], len(Fcode)]) 173 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 174 | 175 | # 循环填充特征到numpy数组中 176 | for i in range(X.shape[1]): 177 | X[:, i] = FactorData[Fcode[i]] 178 | Xtest[:, i] = FactorDataTest[Fcode[i]] 179 | 180 | # 训练样本的标签,为浮点数的收益率 181 | Y = (np.array(FactorData['benefit']).astype(float) > 0) 182 | 183 | rng = np.random.RandomState(1) 184 | adaboost = AdaBoostRegressor(DecisionTreeRegressor(max_depth=9), 185 | n_estimators=60, random_state=rng) 186 | 187 | pca = PCA(n_components=5) 188 | 189 | X_pca = pca.fit_transform(X) 190 | Xtest_pca = pca.fit_transform(Xtest) 191 | 192 | # 模型训练: 193 | adaboost.fit(X_pca, Y) 194 | 195 | # 分类预测: 196 | y = adaboost.predict(Xtest_pca) 197 | 198 | # 交易设置: 199 | positions = context.account().positions['volume_long'] # 多头持仓数量 200 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 201 | 202 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 203 | 204 | # 获取收益率的高分位数和低分位数 205 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 206 | 207 | for i in range(len(Idx)): 208 | position = positions.iloc[Idx[i]] 209 | # if position == 0 and y[i] == True and valid_cash > 0: # 若预测结果为true(收益率>0),买入 210 | # print('开仓') 211 | if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓,且该股票收益大于高70%分位数,则开仓,买入 212 | # 开仓数量 + 1防止分母为0 213 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 214 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 215 | 216 | # 控制委托量,不要过大或过小,需要保证是100的倍数 217 | if Num < 1000: 218 | Num *= 10 219 | if Num > 100000: 220 | Num = int(Num / 10) 221 | Num -= Num % 100 222 | if Num <= 0: # 不开仓 223 | continue 224 | 225 | print("开仓数量为:{}".format(Num)) 226 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 227 | price=0) # 指定委托量开仓 228 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 229 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 230 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 231 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 232 | print("平仓,数量为: {}".format(position / 10 )) 233 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 234 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 235 | 236 | 237 | if __name__ == '__main__': 238 | 239 | file_path = 'adaboost_model.py' 240 | block = 'hs300' 241 | 242 | begin_date = '2016-01-01' 243 | end_date = '2018-09-30' 244 | 245 | strategy_name = 'adaboost' 246 | 247 | run_backtest(strategy_name=strategy_name, file_path=file_path, 248 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 249 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 250 | -------------------------------------------------------------------------------- /factor_analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: qiuyihao 3 | date: 2019-04-22 4 | description: 同类因子进行共线性分析,绘制相关系数矩阵 5 | 获取每一类因子中的历史序列,该序列每一个因子由同时期股票的非空因子平均求得。 6 | 计算相关序列的相关系数,绘制相关系数矩阵 7 | """ 8 | import numpy as np 9 | import pandas as pd 10 | import atrader as at 11 | import seaborn as sns 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def draw_heatmap(df, filename): 16 | dfData = df.corr() 17 | plt.subplots(figsize=(13, 13)) 18 | sns.heatmap(dfData, annot=True, vmax=1, vmin=0, square=True, cmap='Blues') 19 | plt.savefig(filename) 20 | plt.show() 21 | 22 | 23 | def analysis_factor(factor_list, code_list, filename): 24 | print(factor_list, code_list[0]) 25 | factor_data = at.get_factor_by_code(factor_list=factor_list, target=code_list[0], 26 | begin_date='2016-01-01', end_date='2018-09-30') 27 | 28 | factor_data = factor_data.drop(['date'], axis=1) 29 | 30 | not_full_num = len(code_list) 31 | 32 | for tf in factor_data.isnull().any(): 33 | if tf == True: 34 | factor_data = pd.DataFrame(np.full([factor_data.shape[0], factor_data.shape[1]], 0.0), 35 | columns=[factor_list]) 36 | not_full_num -= 1 37 | break 38 | 39 | factor_data.columns = factor_list 40 | 41 | for i in range(len(code_list) - 1): 42 | tmp_data = at.get_factor_by_code(factor_list, target=code_list[i+1], 43 | begin_date='2016-01-01', end_date='2018-09-30') 44 | tmp_data = tmp_data.drop(['date'], axis=1) 45 | null_flag = False 46 | for tf in tmp_data.isnull().any(): 47 | if tf == True: 48 | null_flag = True 49 | not_full_num -= 1 50 | print("NAN... pass ") 51 | break 52 | if not null_flag: 53 | if tmp_data.iloc[:, 0].mean() >= 10000000: 54 | tmp_data /= 100000 # 某些因子数据过于庞大,需要缩小 55 | factor_data = factor_data + tmp_data 56 | print("add ... ") 57 | factor_data /= not_full_num 58 | draw_heatmap(factor_data, filename) 59 | 60 | 61 | if __name__ == '__main__': 62 | A = at.get_code_list('hs300', date='2016-01-01') 63 | code_list = A['code'].tolist() 64 | 65 | file_name_list = ["Q1_基础类", "Q1_质量类"] 66 | #, "情绪类", "价值类", "每股指标类", 67 | # "行业分析师类", "特色技术指标类"] 68 | 69 | factor_list = [['AdminExpenseTTM', 'NIAP', 'FinanExpenseTTM', 'NetIntExpense'], # 基础类 70 | ['DebtEquityRatio', 'SuperQuickRatio'] # 质量类 71 | ] 72 | # ['TVMA20', 'VOL20', 'OBV20', 'JDQS20'], # 情绪类 73 | # ['PE', 'PB', 'PS', 'NLSIZE', 'TA2EV', 'CTOP'], # 成长因子类 74 | # ['BasicEPS', 'EPS', 'EnterpriseFCFPS'], # 每股指标类 75 | # ['RSTR24', 'FY12P', 'SFY12P', 'PEIndu', 'EPIBS'], # 行业分析师类 76 | # ['AVGPRICE', 'BOP', 'KAMA', 'LINEARREG', 'STDDEV'] # 特色技术指标类 77 | 78 | for i, factor in enumerate(factor_list): 79 | #if i != 1: 80 | # continue 81 | print(file_name_list[i]) 82 | analysis_factor(factor, code_list, file_name_list[i]) # 最终得到因子相关系数矩阵 83 | 84 | -------------------------------------------------------------------------------- /find_factor.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: qiuyihao 3 | date: 2019/04/13 - 04-15 4 | description: 单因子测试 5 | """ 6 | import pandas as pd 7 | import numpy as np 8 | import atrader as at 9 | from sklearn import preprocessing 10 | from sklearn import linear_model 11 | import time 12 | from scipy.stats import pearsonr 13 | import datetime 14 | 15 | 16 | # 中位数去极值法 17 | def filter_MAD(df, factor, n=5): 18 | """ 19 | :param df: 去极值的因子序列 20 | :param factor: 待去极值的因子 21 | :param n: 中位数偏差值的上下界倍数 22 | :return: 经过处理的因子dataframe 23 | """ 24 | # print(df) 25 | 26 | median = df[factor].quantile(0.5) 27 | new_median = ((df[factor] - median).abs()).quantile(0.5) 28 | max_range = median + n * new_median 29 | min_range = median - n * new_median 30 | 31 | for i in range(df.shape[0]): 32 | if df.loc[i, factor] > max_range: 33 | df.loc[i, factor] = max_range 34 | elif df.loc[i, factor] < min_range: 35 | df.loc[i, factor] = min_range 36 | return df 37 | 38 | 39 | # 判断某一个日期是否为周末,如果为周末,需要返回一个非周末的字符串。 40 | # 当时间是月末时,时间需要向前,当时间时月初是,时间需要向后 41 | # 采用递归实现,最终返回一个非周末的时间串 42 | # (其实这个函数的作用就是帮助减少几次获取因子而已,,,事后发现还不如直接靠get_factor_by_day判断 43 | def find_day_str(day_str): 44 | """ 45 | :param day_str: 要求标准的时间串 如 2016-01-01 46 | :return: 返回一个合适的时间串 47 | """ 48 | year = int(day_str[0:4]) 49 | month = int(day_str[5:7]) 50 | day = int(day_str[8:10]) 51 | any_day = datetime.datetime(year, month, day).strftime("%w") 52 | result_str = day_str 53 | if any_day == '6' or any_day == '0': 54 | if day < 15: 55 | day += 1 56 | if day < 10: 57 | day = '0' + str(day) 58 | else: 59 | day = str(day) 60 | elif day > 15: 61 | day -= 1 62 | day = str(day) 63 | result_str = find_day_str(day_str[0:8] + day) 64 | return result_str 65 | 66 | 67 | # 生成起始日期对 68 | def create_date(begin_date, end_date): 69 | """ 70 | :param begin_date: 开始日期 指明起始年月 如 '2018-01' 71 | :param end_date: 结束日期 指明结束年月 如 '2018-10' 72 | :return: 一个起始年月日列表,一个结束年月日列表 73 | 以一个月的第一天和最后一天作为一对日期 如 ['2018-01-01',..] ['2018-01-31',..] 74 | 注:需要排斥这两天为周末或者法定假期的时候 75 | """ 76 | # 解析字符串 77 | begin_year = int(begin_date[0:4]) 78 | begin_month = int(begin_date[5:7]) 79 | end_year = int(end_date[0:4]) 80 | end_month = int(end_date[5:7]) 81 | 82 | # 待拼接的年日月 83 | year = begin_year 84 | month = begin_month 85 | 86 | begin_date_list = [] 87 | end_date_list = [] 88 | 89 | big_month = [1, 3, 5, 7, 8, 10, 12] 90 | small_month = [4, 6, 9, 11] # 二月另外判断 91 | while year <= end_year and month <= end_month: 92 | start = '' 93 | end = '' 94 | 95 | if month >= 10: 96 | start = str(year) + '-' + str(month) + '-' + '01' 97 | end = str(year) + '-' + str(month) + '-' 98 | else: 99 | start = str(year) + '-0' + str(month) + '-' + '01' 100 | end = str(year) + '-0' + str(month) + '-' 101 | 102 | # 避免出现节假日或者周末,若出现则往后推一天 103 | while at.get_factor_by_day(factor_list=["PE"], target_list=["SZSE.000001"], date=start) is None: 104 | start_day = int(start[8:10]) + 1 105 | if start_day < 10: 106 | start = start[0:8] + '0' + str(start_day) 107 | else: 108 | start = start[0:8] + str(start_day) 109 | 110 | begin_date_list.append(start) # 插入一个非周末非法定假期的开始时间串 111 | 112 | # 判断月为大,为小 113 | if month in big_month: 114 | end = end + '31' 115 | elif month in small_month: 116 | end = end + '30' 117 | elif month == 2: 118 | if year % 4 == 0 and year % 100 != 0 or year % 400 == 0: 119 | end = end + '29' 120 | else: 121 | end = end + '28' 122 | 123 | while at.get_factor_by_day(factor_list=["PE"], target_list=["SZSE.000001"], date=end) is None: 124 | end_day = int(end[8:10]) - 1 125 | end = end[0:8] + str(end_day) 126 | 127 | end_date_list.append(end) # 插入一个非周末,非法定假期的结束时间串 128 | 129 | month += 1 130 | if month == 13: 131 | year += 1 132 | month = 1 133 | return begin_date_list, end_date_list 134 | 135 | 136 | # 计算每一个月的单个股票平均收益率 137 | def cal_yield_rate(code, begin_date, end_date): 138 | """ 139 | :param code: 股票代码 140 | :param begin_date: K线起始日期,月初 141 | :param end_date: K线结束日期,月末 142 | :return: 在该时间内股票的平均收益率 143 | """ 144 | day_data = at.get_kdata(target_list=[code], frequency='day', fre_num=1, begin_date=begin_date, 145 | end_date=end_date, fill_up=False, df=True, fq=1, sort_by_date=True) 146 | yield_rate = 0.0 147 | try: 148 | yield_rate = (day_data['close'][len(day_data) - 1] - day_data['close'][0])/day_data['close'][0] 149 | except Exception: 150 | yield_rate = -1 151 | return yield_rate 152 | 153 | 154 | # 股票分层函数: 按流通市值进行划分,分为大,中,小市值。 155 | def stock_layered(code_list, sign = 0): 156 | """ 157 | :param code_list: 未分层的标的代号 158 | :param sign: = 0,表示不分层;= 1,返回小市值,= 2,返回中市值; = 3, 返回大市值 159 | :return: 分层后的标的代码 160 | """ 161 | if sign == 0: 162 | return code_list 163 | pass 164 | 165 | # 单因子测试函数 166 | def test_factor(factor, block, begin_date_list, end_date_list, layer_sign = 0): 167 | """ 168 | :param factor: 待测的单因子 169 | :param block : 股市指数 170 | :param begin_date_list: 获取每一期因子的开始时间 (12个月,每月一次,从月初开始和月末结束) 171 | :param end_date_list: 获取每一期因子的结束时间 172 | :return: 年化夏普率,IC等等,见函数尾部 173 | 注:使用沪深300股作为测试 174 | """ 175 | # 记录每一个月的股票池总体收益率 176 | yield_rate_list = [] 177 | 178 | # 记录每一个月股票池各股收益率 179 | single_yield_rate_list = [] 180 | 181 | # 因子每期收益率 182 | factor_return_list = [] 183 | 184 | # 因子每期的IC值 185 | IC_list = [] 186 | 187 | 188 | 189 | # 遍历每一月,月初调仓 190 | for i in range(len(begin_date_list)): 191 | 192 | # --------------------------------------------- # 193 | # 1. 提取 K 线数据 和 股票信息 194 | # --------------------------------------------- # 195 | 196 | print("{} - {}: 获取K线数据!".format(begin_date_list[i], end_date_list[i])) 197 | code_list = at.get_code_list(block, date=begin_date_list[i]) 198 | 199 | code_list = stock_layered(code_list, layer_sign) # 分层 200 | 201 | # 若要分层回测,这里需要股票池划分 202 | target_list = code_list['code'].tolist() # 本月股票池代码 203 | weight_list = np.array(code_list['weight'].tolist()) # 本月各股票权重 204 | # 获取因子月初数据 205 | print("{} - {}: 获取因子数据!".format(begin_date_list[i], end_date_list[i])) 206 | factor_data = at.get_factor_by_day(factor_list=[factor], target_list=target_list, 207 | date=begin_date_list[i]) 208 | 209 | # ----------------------------------------------- # 210 | # 2. 数据预处理 211 | # ----------------------------------------------- # 212 | 213 | # 平均值填充缺失值 中位数去极值 & z-score 规范化 214 | factor_data = factor_data.fillna(factor_data[factor].mean()) 215 | factor_data = filter_MAD(factor_data, factor, n=5) 216 | factor_data[factor] = preprocessing.scale(factor_data[factor]) 217 | 218 | # 提取因子列,变为np array 219 | factor_data = np.array(factor_data[factor].tolist()) 220 | 221 | # ------------------------------------------------- # 222 | # 3.从 K 线和股票数据中计算本月的个股收益率和权重 223 | # 以及IC值 224 | # ------------------------------------------------- # 225 | 226 | yield_rate = [] # 股票池个股本月平均收益率 227 | tmp_target_list = target_list 228 | for j, target in enumerate(target_list): 229 | rate = cal_yield_rate(target, begin_date_list[i], end_date_list[i]) 230 | if rate != -1: # 计算标的股票的本月收益率 231 | yield_rate.append(cal_yield_rate(target, begin_date_list[i], end_date_list[i])) 232 | else: # 收益率计算出现错误,从股票池中删除,权重列表中删除,因子列表中删除 233 | tmp_target_list = np.delete(tmp_target_list, [j]) 234 | weight_list = np.delete(weight_list, [j]) 235 | factor_data = np.delete(factor_data, [j]) 236 | 237 | IC = pearsonr(yield_rate, factor_data)[0] # 获取IC值 238 | 239 | IC_list.append(IC) # 记录IC值 240 | 241 | weight_list = weight_list / weight_list.sum() # 权重归一化 242 | weight_list = weight_list.reshape(-1, 1) 243 | factor_data = factor_data.reshape(-1, 1) 244 | yield_rate = np.array(yield_rate).reshape(-1, 1) 245 | 246 | # ----------------------------------------------- # 247 | # 4. 月初因子和本月收益率进行拟合, 获取因子收益率 248 | # ----------------------------------------------- # 249 | 250 | print("{} - {}: 开始拟合!".format(begin_date_list[i], end_date_list[i])) 251 | LR = linear_model.LinearRegression() # 线性拟合器 252 | LR.fit(factor_data, yield_rate) # 拟合月初因子和本月平均收益率 253 | 254 | coef_list = list(LR.coef_)[0] 255 | coef = coef_list[0] 256 | factor_return_list.append(coef) # 记录当期的因子收益率 保留小数点两位 257 | 258 | # -------------------------------------------------- # 259 | # 5. 预测各股票本月收益率,计算股票池整体收益。 260 | # -------------------------------------------------- # 261 | print("{} - {}: 开始预测!".format(begin_date_list[i], end_date_list[i])) 262 | pred_yield_rate = LR.predict(factor_data) # 预测的各股票收益率 263 | 264 | rate_list = list(pred_yield_rate)[0] 265 | rate_list = [round(r, 2) for r in rate_list] 266 | single_yield_rate_list.append(rate_list) # 记录当月各股票收益率 小数点两位 267 | 268 | # 利用权重和个股收益计算股票池整体平均收益率 269 | mean_yield_rate = (pred_yield_rate * weight_list).sum() 270 | 271 | # 记录当月股票整体平均收益率 272 | yield_rate_list.append(round(float(mean_yield_rate), 2)) # 小数点两位 273 | 274 | print("{} - {}: 股票平均收益率拟合完毕!".format(begin_date_list[i], end_date_list[i])) 275 | 276 | # --------------------------------------------------- # 277 | # 汇总数据 278 | # --------------------------------------------------- # 279 | 280 | # 计算超额收益率 281 | yield_rate_array = np.array(yield_rate_list) 282 | over_rate = yield_rate_array - 0.004 # 0.004 代表无风险利率 283 | # 超额收益率均值和标准差 284 | mean_over_rate = over_rate.mean() 285 | std_over_rate = over_rate.std() 286 | 287 | # 单位时间夏普率 288 | sharp_ratio = mean_over_rate / std_over_rate 289 | # 年化夏普率 290 | sharp_ratio = np.sqrt(12) * sharp_ratio 291 | 292 | # 计算股票收益率均值方差 293 | yield_rate_array = np.array(yield_rate_list) 294 | average_yield_rate = np.mean(yield_rate_array) 295 | var_yield_rate = np.var(yield_rate_array) 296 | 297 | # 计算因子收益率的均值 标准差 298 | factor_return_array = np.array(factor_return_list) 299 | average_factor_return = np.mean(factor_return_array) 300 | std_factor_return = np.std(factor_return_array) 301 | 302 | # 计算因子收益率大于0的概率 303 | factor_greater_than_zero = sum([1 for i in factor_return_list if i > 0]) / len(factor_return_list) 304 | 305 | # 计算IC的平均值和标准差 306 | average_IC = np.mean(np.array(IC_list)) 307 | std_IC = np.std(np.array(IC_list)) 308 | # 计算 IC > 0的概率 309 | IC_greater_than_zero = sum([1 for i in IC_list if i > 0]) / len(IC_list) 310 | 311 | # 返回夏普率,波动率(收益率方差),因子收益均值,因子收益率, 312 | test_result_dict = dict() 313 | test_result_dict["年化夏普率"] = sharp_ratio 314 | test_result_dict["波动率"] = var_yield_rate 315 | test_result_dict["因子收益均值"] = average_factor_return 316 | test_result_dict["因子收益标准差"] = std_factor_return 317 | test_result_dict["因子收益>0概率"] = factor_greater_than_zero 318 | test_result_dict["IC均值"] = average_IC 319 | test_result_dict["IC标准差"] = std_IC 320 | test_result_dict["IC>0概率"] = IC_greater_than_zero 321 | 322 | return test_result_dict 323 | 324 | 325 | # 同时多次测试因子,返回一个DataFrame 326 | def test_all_factors(factor_list, block, begin_date, end_date, layer_sign=0): 327 | """ 328 | :param factor_list: 因子列表 329 | :param block: 股市指数 330 | :param begin_date: 开始年月 331 | :param end_date: 结束年月 332 | :return: 返回各因子的测试指标结果 333 | """ 334 | begin_date_list, end_date_list = create_date(begin_date, end_date) 335 | result_dict_list = list() 336 | for factor in factor_list: 337 | result_dict = test_factor(factor, block, begin_date_list, end_date_list, layer_sign) 338 | result_dict_list.append(result_dict) 339 | 340 | return pd.DataFrame(result_dict_list, index=factor_list) 341 | 342 | 343 | result = test_all_factors(["NLSIZE", "MktValue", "BIAS10", "NegMktValue", "CurrentAssetsRatio", 344 | "MLEV", "Variance20", "ROAEBIT"], 345 | 'hs300', '2016-01', '2018-09', 346 | layer_sign=0) # 0 不分层 1 低流通市值 2 中流通市值 3 高流通市值 347 | 348 | result.to_csv("single_factor_test.csv", sep=',') 349 | 350 | 351 | 352 | -------------------------------------------------------------------------------- /get_factor_report.py: -------------------------------------------------------------------------------- 1 | import atrader as at 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | """ 6 | 运行之前!!!!!!!!!!! 7 | 修改输出的csv文件名!!!!!! 8 | 注意不要重复,可能会覆盖原来的文件!!! 9 | """ 10 | 11 | csv_file = "final_Q1_ChengZhang_ChangYongJiShuZhiBiao_DongLiang_factors.csv" 12 | strategy_dicts = at.get_strategy_id() 13 | save_dict = {"测试因子": [], 14 | '年化收益率': [], 15 | '年化夏普率': [], 16 | '最大回撤率': [], 17 | 'alpha': [], 18 | 'beta': [], 19 | '信息比率': [] 20 | } 21 | for strategy in strategy_dicts: 22 | strategy_id = strategy["strategy_id"] 23 | result = at.get_performance(strategy_id) 24 | save_dict['测试因子'].append(result['strategy_name']) 25 | save_dict['年化收益率'].append(result['annu_return']) 26 | save_dict['年化夏普率'].append(result['sharpe_ratio']*np.sqrt(12)) 27 | save_dict['最大回撤率'].append(result['max_drawback_rate']) 28 | save_dict['alpha'].append(result['alpha']) 29 | save_dict['beta'].append(result['beta']) 30 | save_dict['信息比率'].append(result['info_ratio']) 31 | 32 | df = pd.DataFrame(save_dict) 33 | df.to_csv(csv_file, sep=',') 34 | print(df) -------------------------------------------------------------------------------- /lstm.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择: 7 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 8 | - 质量类:ROIC, CashToCurrentLiability 9 | - 收益风险类:DDNCR 10 | - 情绪类:PVI 11 | - 成长类:TotalAssetGrowRate 12 | - 常用技术指标类:MA120 13 | - 动量类:AD 14 | - 价值类:PS 15 | - 每股指标类:EnterpriseFCFPS 16 | - 行业分析师:FY12P 17 | - 特色技术指标:STDDEV 18 | 4. 单因子回归测试模型思路: 19 | 1. 先获得 21 天以上的K线数据和因子数据,预处理 20 | 2. 使用上月初的多个因子和上月收益率进行线性回归 21 | 3. 使用【LSTM模型】进行训练 22 | 4. 回到当前时间点,使用本月初的因子作为预测样本特征,预测本月的各股票平均收益率的大小。 23 | 5. 选股逻辑: 24 | 将符合预测结果的股票按均等分配可用资金进行下单交易。持有一个月后 ,再次进行调仓,训练预测。 25 | 6. 交易逻辑: 26 | 每次调仓时,若当前有持仓,并且符合选股条件,则仓位不动; 27 | 若不符合选股条件,则对收益低的标的进行仓位平仓; 28 | 若当前无仓,并且符合选股条件,则多开仓,对收益高的标的进行开仓; 29 | 若不符合选股条件,则不开仓,无需操作。 30 | ---------------------------------------------------------- 31 | """ 32 | from atrader import * 33 | import pandas as pd 34 | import numpy as np 35 | from sklearn import svm 36 | import math 37 | from sklearn import preprocessing 38 | import datetime 39 | import torch 40 | from torch import nn 41 | from torch.autograd import Variable 42 | import torchvision.datasets as dsets 43 | import torch.utils.data as Data 44 | import matplotlib.pyplot as plt 45 | import torchvision 46 | 47 | # 作为全局变量进行测试 48 | 49 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 50 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'GrossProfit', 'FY12P', 51 | 'AD', 'TotalAssetGrowRate', 'MA120'] 52 | class lstm(nn.Module): 53 | def __init__(self): 54 | super(lstm, self).__init__() 55 | 56 | self.rnn = nn.LSTM( 57 | input_size=len(FactorCode), 58 | hidden_size=64, 59 | num_layers=2, 60 | batch_first=True, 61 | dropout=0.2 62 | ) 63 | 64 | self.out = nn.Linear(64, 1) 65 | 66 | def forward(self, x): 67 | r_out, (h_n, h_c) = self.rnn(x, None) 68 | 69 | # print(x.shape) 70 | # print(r_out.shape) 71 | 72 | out = self.out(r_out[:, -1, :]) 73 | # print(r_out[:, -1, :].shape) 74 | # input() 75 | return out 76 | LR = 0.01 77 | EPOCH = 3 78 | BATCH_SIZE = 5 79 | 80 | 81 | # 中位数去极值法 82 | def filter_MAD(df, factor, n=3): 83 | """ 84 | :param df: 去极值的因子序列 85 | :param factor: 待去极值的因子 86 | :param n: 中位数偏差值的上下界倍数 87 | :return: 经过处理的因子dataframe 88 | """ 89 | median = df[factor].quantile(0.5) 90 | new_median = ((df[factor] - median).abs()).quantile(0.5) 91 | max_range = median + n * new_median 92 | min_range = median - n * new_median 93 | 94 | for i in range(df.shape[0]): 95 | if df.loc[i, factor] > max_range: 96 | df.loc[i, factor] = max_range 97 | elif df.loc[i, factor] < min_range: 98 | df.loc[i, factor] = min_range 99 | return df 100 | 101 | 102 | def init(context): 103 | context.SVM = svm.SVC(gamma='scale') 104 | # 账号设置:设置初始资金为 10000000 元 105 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 106 | price_loc=1, deal_type=0, limit_type=0) 107 | # 注册数据:日频数据 108 | reg_kdata('day', 1) 109 | global FactorCode # 全局单因子代号 110 | reg_factor(factor=FactorCode) 111 | print("init 函数, 注册因子为{}".format(FactorCode[0])) 112 | context.FactorCode = FactorCode # 113 | 114 | # 超参数设置: 115 | context.Len = 21*2 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 116 | context.Num = 0 # 记录当前交易日个数 117 | 118 | # lstm 119 | context.lstm = lstm() 120 | context.optimizer = torch.optim.Adam(context.lstm.parameters(), lr=LR) 121 | context.loss_func = nn.MSELoss() 122 | context.EPOCH = EPOCH 123 | context.BATCH_SIZE = BATCH_SIZE 124 | 125 | # 较敏感的超参数,需要调节 126 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 127 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 128 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 129 | 130 | # 确保月初调仓 131 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 132 | months = np.vectorize(lambda x: x.month)(days) 133 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 134 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 135 | 136 | 137 | def on_data(context): 138 | context.Num = context.Num + 1 139 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 140 | return 141 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 142 | return 143 | 144 | # 获取数据: 145 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 146 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 147 | df=True) # 获取因子数据 148 | 149 | 150 | # 特征构建: 151 | Fcode = context.FactorCode # 标签不需要代号了 152 | 153 | # 数据存储变量: 154 | # Close 字段为标签,Fcode 为标签 155 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 156 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 157 | 158 | # K线数据序号对齐 159 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 160 | 161 | # 按标的处理数据: 162 | for i in range(300): 163 | # 训练特征集及训练标签构建: 164 | # 临时数据存储变量: 165 | FactorData0 = pd.DataFrame(np.full([int(context.Len/2), len(Fcode) + 2], np.nan), 166 | columns=(['idx', 'benefit'] + Fcode)) 167 | # 存储预测特征样本 168 | FactorDataTest0 = pd.DataFrame(np.full([int(context.Len/2), len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 169 | 170 | # 因子数据 序号对齐, 提取当前标的的因子数据 171 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 172 | 173 | # 按特征处理数据: 174 | for FC in context.FactorCode: 175 | # 提取当前标的中与当前因子FC相同的部分 176 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 177 | #print(FCData.shape) 178 | #print(FCData[int(context.Len / 2):]) 179 | #print(FCData[:int(context.Len/2)]) 180 | #input() 181 | FactorData0[FC] = FCData[:int(context.Len/2)] # 存储上一个月初的股票因子数据 182 | 183 | # 按标签处理数据: 184 | # 提取当前标的的前一个月的K线面板数据 185 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 186 | # 计算当前标的在上一个月的收益率 187 | benefit = (close[-1] - close[int(context.Len/2) - 1]) / close[int(context.Len/2) - 1] 188 | 189 | FactorData0['benefit'] = benefit 190 | # idx: 建立当前标的在训练样本集中的索引 191 | FactorData0['idx'] = tempIdx[i] 192 | # 合并数据:组成训练样本 193 | FactorData = FactorData.append(FactorData0, ignore_index=True) 194 | 195 | # 预测特征集构建:建立标的索引 196 | FactorDataTest0['idx'] = tempIdx[i] 197 | # 按特征处理数据,过程同建立训练特征 198 | for FC in context.FactorCode: 199 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 200 | #print(FCData.shape) 201 | #print(FCData[int(context.Len / 2):]) 202 | #print(FCData[:int(context.Len / 2)]) 203 | #input() 204 | FactorDataTest0[FC] = FCData[int(context.Len/2):].reset_index(drop=True) 205 | 206 | # 合并测试数据 207 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 208 | 209 | """ 210 | 训练集和测试集的表头字段如下 211 | FactorData DataFrame: 212 | idx | benefit | Factor 1 | Factor 2| .... 213 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 214 | FactorDataTest DataFrame: 215 | idx | Factor 1 | Factor 2 | ... 216 | 本月初的因子作为预测特征 217 | """ 218 | 219 | # 数据清洗: 220 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 221 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 222 | 223 | count1 = FactorData.groupby('idx').count().reset_index() 224 | remain1 = count1[count1[count1.columns[1]] == int(context.Len / 2)]['idx'] 225 | count2 = FactorDataTest.groupby('idx').count().reset_index() 226 | remain2 = count2[count2[count2.columns[1]] == int(context.Len / 2)]['idx'] 227 | remain = pd.merge(remain1, remain2, on=['idx']).reset_index(drop=True) 228 | Idx = remain['idx'] # 剩余标的序号 229 | 230 | 231 | 232 | FactorData = FactorData[FactorData['idx'].isin(remain['idx'])].reset_index(drop=True) 233 | FactorDataTest = FactorDataTest[FactorDataTest['idx'].isin(remain['idx'])].reset_index(drop=True) 234 | 235 | #print(count[count.columns[0:2]]) 236 | 237 | # 按特征进行预处理 238 | for Factor in context.FactorCode: 239 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 240 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 241 | 242 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 243 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 244 | 245 | # print(FactorData.head(1)) 246 | # print(FactorDataTest.head(1)) 247 | 248 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 249 | X = np.ones([FactorData.shape[0], len(Fcode)]) 250 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 251 | 252 | 253 | 254 | # 循环填充特征到numpy数组中 255 | for i in range(X.shape[1]): 256 | X[:, i] = FactorData[Fcode[i]] 257 | Xtest[:, i] = FactorDataTest[Fcode[i]] 258 | 259 | Xtest = np.array([Xtest[i * int(context.Len / 2):(i + 1) * int(context.Len / 2)] for i in range(len(remain))]) 260 | Xtest = torch.from_numpy(Xtest).float() 261 | 262 | # 训练样本的标签,为浮点数的收益率 263 | Y = FactorData[['idx', 'benefit']] 264 | Y = Y.groupby('idx').mean().reset_index(drop=True) 265 | Y = np.array(Y['benefit']).astype(float) 266 | 267 | # print(X.shape) 268 | # print(X[:2]) 269 | # print(Y) 270 | # input() 271 | 272 | # 模型训练: 273 | class trainset(Data.Dataset): 274 | def __init__(self): 275 | self.X = X 276 | self.Y = Y 277 | def __getitem__(self, index): 278 | len = int(context.Len/2) 279 | head = len * index 280 | tail = len * (index + 1) 281 | data = self.X[head:tail] 282 | label = self.Y[index] 283 | return data, label 284 | def __len__(self): 285 | return int(self.X.shape[0]/(context.Len/2)) 286 | 287 | train_loader = Data.DataLoader(dataset=trainset(), batch_size=context.BATCH_SIZE, shuffle=True) 288 | 289 | for epoch in range(EPOCH): 290 | for step, (x, y) in enumerate(train_loader): 291 | # b_x = Variable(x.view(-1, 28, 28)) 292 | # b_y = Variable(y) 293 | b_x = x.float() 294 | b_y = y.float() 295 | 296 | output = context.lstm(b_x) 297 | loss = context.loss_func(output, b_y) 298 | context.optimizer.zero_grad() 299 | loss.backward() 300 | context.optimizer.step() 301 | 302 | 303 | # 预测: 304 | y = context.lstm(Xtest) 305 | y = y.detach().numpy().reshape((-1)) 306 | 307 | # 交易设置: 308 | positions = context.account().positions['volume_long'] # 多头持仓数量 309 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 310 | 311 | 312 | # 获取收益率的高分位数和低分位数 313 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 314 | high_return, low_return = np.percentile(y, [context.upper_pos, context.down_pos]) 315 | 316 | 317 | for i in range(len(Idx)): 318 | position = positions.iloc[Idx[i]] 319 | if position == 0 and y[i] > high_return and y[i] > 0 and valid_cash > 0: # 若预测结果为true(收益率>0),买入 320 | # print('开仓') 321 | # if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓,且该股票收益大于高70%分位数,则开仓,买入 322 | # 开仓数量 + 1防止分母为0 323 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 324 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i] * 21 + 20] + 1)) * 100) 325 | 326 | # 控制委托量,不要过大或过小,需要保证是100的倍数 327 | if Num < 1000: 328 | Num *= 10 329 | if Num > 100000: 330 | Num = int(Num / 10) 331 | Num -= Num % 100 332 | if Num <= 0: # 不开仓 333 | continue 334 | 335 | print("开仓数量为:{}".format(Num)) 336 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 337 | price=0) # 指定委托量开仓 338 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 339 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 340 | 341 | elif position > 0 and y[i] < low_return: #预测结果为false(收益率<0),卖出 342 | # elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 343 | # print("平仓") 344 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position), side=2, position_effect=2, 345 | order_type=2, price=0) # 指定委托量平仓 346 | 347 | 348 | if __name__ == '__main__': 349 | 350 | file_path = 'lstm.py' 351 | block = 'hs300' 352 | 353 | begin_date = '2016-01-01' 354 | end_date = '2018-09-30' 355 | 356 | strategy_name = 'lstm' 357 | 358 | run_backtest(strategy_name=strategy_name, file_path=file_path, 359 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 360 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 361 | -------------------------------------------------------------------------------- /multi_factor_lr.py: -------------------------------------------------------------------------------- 1 | """ 2 | ------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:AD 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | 4. 回归测试模型思路: 19 | 1. 先获得 21 天以上的K线数据和因子数据,预处理 20 | 2. 使用上月初因子和上月收益率进行线性回归 21 | 3. 使用单变量线性模型进行训练 22 | 4. 回到当前时间点,使用本月初的因子作为预测样本特征,预测本月的各股票平均收益率的大小。 23 | 5. 选股逻辑: 24 | 将符合预测结果的股票按均等分配可用资金进行下单交易。持有一个月后 ,再次进行调仓,训练预测。 25 | 6. 交易逻辑: 26 | 每次调仓时,若当前有持仓,并且符合选股条件,则仓位不动; 27 | 若不符合选股条件,则对收益低的标的进行仓位平仓; 28 | 若当前无仓,并且符合选股条件,则多开仓,对收益高的标的进行开仓; 29 | 若不符合选股条件,则不开仓,无需操作。 30 | --------------------------------------------------------- 31 | 运行方法: 32 | 1. 在 main 中定义同一类的因子列表。 33 | 2. 逐个因子执行回测。 34 | 3. 获取回测报告ID,通过ID获取绩效报告字段。 35 | 4. 保留字段到CSV文件中。 36 | """ 37 | 38 | from atrader import * 39 | import pandas as pd 40 | import numpy as np 41 | from sklearn.linear_model import LinearRegression 42 | import math 43 | from sklearn import preprocessing 44 | import datetime 45 | import sys 46 | 47 | # 作为全局变量进行测试 48 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 49 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 50 | 'AD', 'TotalAssetGrowRate', 'MA120'] 51 | 52 | # 中位数去极值法 53 | def filter_MAD(df, factor, n=3): 54 | """ 55 | :param df: 去极值的因子序列 56 | :param factor: 待去极值的因子 57 | :param n: 中位数偏差值的上下界倍数 58 | :return: 经过处理的因子dataframe 59 | """ 60 | median = df[factor].quantile(0.5) 61 | new_median = ((df[factor] - median).abs()).quantile(0.5) 62 | max_range = median + n * new_median 63 | min_range = median - n * new_median 64 | 65 | for i in range(df.shape[0]): 66 | if df.loc[i, factor] > max_range: 67 | df.loc[i, factor] = max_range 68 | elif df.loc[i, factor] < min_range: 69 | df.loc[i, factor] = min_range 70 | return df 71 | 72 | 73 | def init(context): 74 | # 账号设置:设置初始资金为 10000000 元 75 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 76 | price_loc=1, deal_type=0, limit_type=0) 77 | # 注册数据:日频数据 78 | reg_kdata('day', 1) 79 | global FactorCode # 全局单因子代号 80 | reg_factor(factor=FactorCode) 81 | print("init 函数, 注册因子为{}".format(FactorCode[0])) 82 | context.FactorCode = FactorCode # 83 | 84 | # 超参数设置: 85 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 86 | context.Num = 0 # 记录当前交易日个数 87 | 88 | # 较敏感的超参数,需要调节 89 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 90 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 91 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 92 | 93 | # 确保月初调仓 94 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 95 | months = np.vectorize(lambda x: x.month)(days) 96 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 97 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 98 | 99 | 100 | def on_data(context): 101 | context.Num = context.Num + 1 102 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 103 | return 104 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 105 | return 106 | 107 | # 获取数据: 108 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 109 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 110 | df=True) # 获取因子数据 111 | 112 | # 特征构建: 113 | Fcode = context.FactorCode # 标签不需要代号了 114 | 115 | # 数据存储变量: 116 | # Close 字段为标签,Fcode 为标签 117 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 118 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 119 | 120 | # K线数据序号对齐 121 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 122 | 123 | # 按标的处理数据: 124 | for i in range(300): 125 | # 训练特征集及训练标签构建: 126 | # 临时数据存储变量: 127 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 128 | columns=(['idx', 'benefit'] + Fcode)) 129 | # 存储预测特征样本 130 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 131 | 132 | # 因子数据 序号对齐, 提取当前标的的因子数据 133 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 134 | 135 | # 按特征处理数据: 136 | for FC in context.FactorCode: 137 | # 提取当前标的中与当前因子FC相同的部分 138 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 139 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 140 | 141 | # 按标签处理数据: 142 | # 提取当前标的的前一个月的K线面板数据 143 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 144 | # 计算当前标的在上一个月的收益率 145 | benefit = (close[context.Len - 1] - close[0]) / close[0] 146 | 147 | FactorData0['benefit'] = benefit 148 | # idx: 建立当前标的在训练样本集中的索引 149 | FactorData0['idx'] = tempIdx[i] 150 | # 合并数据:组成训练样本 151 | FactorData = FactorData.append(FactorData0, ignore_index=True) 152 | 153 | # 预测特征集构建:建立标的索引 154 | FactorDataTest0['idx'] = tempIdx[i] 155 | # 按特征处理数据,过程同建立训练特征 156 | for FC in context.FactorCode: 157 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 158 | FactorDataTest0[FC] = FCData[context.Len - 1] 159 | 160 | # 合并测试数据 161 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 162 | 163 | """ 164 | 训练集和测试集的表头字段如下 165 | FactorData DataFrame: 166 | idx | benefit | Factor 1 | Factor 2| .... 167 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 168 | FactorDataTest DataFrame: 169 | idx | Factor 1 | Factor 2 | ... 170 | 本月初的因子作为预测特征 171 | """ 172 | 173 | # 数据清洗: 174 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 175 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 176 | Idx = FactorDataTest['idx'] # 剩余标的序号 177 | 178 | # 按特征进行预处理 179 | for Factor in context.FactorCode: 180 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 181 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 182 | 183 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 184 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 185 | 186 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 187 | X = np.ones([FactorData.shape[0], len(Fcode)]) 188 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 189 | 190 | # 循环填充特征到numpy数组中 191 | for i in range(X.shape[1]): 192 | X[:, i] = FactorData[Fcode[i]] 193 | Xtest[:, i] = FactorDataTest[Fcode[i]] 194 | 195 | # 训练样本的标签,为浮点数的收益率 196 | Y = np.array(FactorData['benefit']).astype(float) 197 | 198 | LRModel = LinearRegression(normalize=True) 199 | 200 | # 模型训练: 201 | LRModel.fit(X, Y) 202 | 203 | # LR分类预测: 204 | y = LRModel.predict(Xtest) 205 | 206 | # 交易设置: 207 | positions = context.account().positions['volume_long'] # 多头持仓数量 208 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 209 | 210 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 211 | 212 | # 获取收益率的高分位数和低分位数 213 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 214 | 215 | for i in range(len(Idx)): 216 | position = positions.iloc[Idx[i]] 217 | if position == 0 and y[i] > high_return and valid_cash > 0: 218 | # 开仓数量 + 1防止分母为0 219 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 220 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 221 | 222 | # 控制委托量,不要过大或过小,需要保证是100的倍数 223 | if Num < 1000: 224 | Num *= 10 225 | if Num > 100000: 226 | Num = int(Num / 10) 227 | Num -= Num % 100 228 | if Num <= 0: # 不开仓 229 | continue 230 | 231 | print("开仓数量为:{}".format(Num)) 232 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 233 | price=0) # 指定委托量开仓 234 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 235 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 236 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 237 | print("平仓,数量为: {}".format(position / 10 + 100)) 238 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 239 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 240 | 241 | 242 | if __name__ == '__main__': 243 | 244 | file_path = 'multi_factor_lr.py' 245 | block = 'hs300' 246 | 247 | begin_date = '2016-01-01' 248 | end_date = '2018-09-30' 249 | 250 | strategy_name = 'multi_factor_lr' 251 | 252 | run_backtest(strategy_name=strategy_name, file_path=file_path, 253 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 254 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 255 | 256 | -------------------------------------------------------------------------------- /random_forest_reg.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:TotalAssetGrowRate 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | 20 | ---------------------------------------------------------- 21 | """ 22 | from atrader import * 23 | import pandas as pd 24 | import numpy as np 25 | from sklearn.ensemble import RandomForestRegressor 26 | import math 27 | from sklearn import preprocessing 28 | import datetime 29 | 30 | # 作为全局变量进行测试 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 32 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 33 | 'AD', 'TotalAssetGrowRate', 'MA120'] 34 | 35 | # 中位数去极值法 36 | def filter_MAD(df, factor, n=3): 37 | """ 38 | :param df: 去极值的因子序列 39 | :param factor: 待去极值的因子 40 | :param n: 中位数偏差值的上下界倍数 41 | :return: 经过处理的因子dataframe 42 | """ 43 | median = df[factor].quantile(0.5) 44 | new_median = ((df[factor] - median).abs()).quantile(0.5) 45 | max_range = median + n * new_median 46 | min_range = median - n * new_median 47 | 48 | for i in range(df.shape[0]): 49 | if df.loc[i, factor] > max_range: 50 | df.loc[i, factor] = max_range 51 | elif df.loc[i, factor] < min_range: 52 | df.loc[i, factor] = min_range 53 | return df 54 | 55 | 56 | def init(context): 57 | 58 | # context.SVM = svm.SVC(gamma='scale') 59 | # 账号设置:设置初始资金为 10000000 元 60 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 61 | price_loc=1, deal_type=0, limit_type=0) 62 | # 注册数据:日频数据 63 | reg_kdata('day', 1) 64 | global FactorCode # 全局单因子代号 65 | reg_factor(factor=FactorCode) 66 | print("init 函数, 注册因子为{}".format(FactorCode[0])) 67 | context.FactorCode = FactorCode # 68 | 69 | # 超参数设置: 70 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 71 | context.Num = 0 # 记录当前交易日个数 72 | 73 | # 较敏感的超参数,需要调节 74 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 75 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 76 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 77 | 78 | # 确保月初调仓 79 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 80 | months = np.vectorize(lambda x: x.month)(days) 81 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 82 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 83 | 84 | 85 | def on_data(context): 86 | context.Num = context.Num + 1 87 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 88 | return 89 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 90 | return 91 | 92 | # 获取数据: 93 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 94 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 95 | df=True) # 获取因子数据 96 | 97 | # 特征构建: 98 | Fcode = context.FactorCode # 标签不需要代号了 99 | 100 | # 数据存储变量: 101 | # Close 字段为标签,Fcode 为标签 102 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 103 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 104 | 105 | # K线数据序号对齐 106 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 107 | 108 | # 按标的处理数据: 109 | for i in range(300): 110 | # 训练特征集及训练标签构建: 111 | # 临时数据存储变量: 112 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 113 | columns=(['idx', 'benefit'] + Fcode)) 114 | # 存储预测特征样本 115 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 116 | 117 | # 因子数据 序号对齐, 提取当前标的的因子数据 118 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 119 | 120 | # 按特征处理数据: 121 | for FC in context.FactorCode: 122 | # 提取当前标的中与当前因子FC相同的部分 123 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 124 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 125 | 126 | # 按标签处理数据: 127 | # 提取当前标的的前一个月的K线面板数据 128 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 129 | # 计算当前标的在上一个月的收益率 130 | benefit = (close[context.Len - 1] - close[0]) / close[0] 131 | 132 | FactorData0['benefit'] = benefit 133 | # idx: 建立当前标的在训练样本集中的索引 134 | FactorData0['idx'] = tempIdx[i] 135 | # 合并数据:组成训练样本 136 | FactorData = FactorData.append(FactorData0, ignore_index=True) 137 | 138 | # 预测特征集构建:建立标的索引 139 | FactorDataTest0['idx'] = tempIdx[i] 140 | # 按特征处理数据,过程同建立训练特征 141 | for FC in context.FactorCode: 142 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 143 | FactorDataTest0[FC] = FCData[context.Len - 1] 144 | 145 | # 合并测试数据 146 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 147 | 148 | """ 149 | 训练集和测试集的表头字段如下 150 | FactorData DataFrame: 151 | idx | benefit | Factor 1 | Factor 2| .... 152 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 153 | FactorDataTest DataFrame: 154 | idx | Factor 1 | Factor 2 | ... 155 | 本月初的因子作为预测特征 156 | """ 157 | 158 | # 数据清洗: 159 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 160 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 161 | Idx = FactorDataTest['idx'] # 剩余标的序号 162 | 163 | # 按特征进行预处理 164 | for Factor in context.FactorCode: 165 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 166 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 167 | 168 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 169 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 170 | 171 | # print(FactorData.head(1)) 172 | # print(FactorDataTest.head(1)) 173 | 174 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 175 | X = np.ones([FactorData.shape[0], len(Fcode)]) 176 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 177 | 178 | # 循环填充特征到numpy数组中 179 | for i in range(X.shape[1]): 180 | X[:, i] = FactorData[Fcode[i]] 181 | Xtest[:, i] = FactorDataTest[Fcode[i]] 182 | 183 | # 训练样本的标签,为浮点数的收益率 184 | Y = np.array(FactorData['benefit']).astype(float) 185 | 186 | random_forest = RandomForestRegressor(max_depth=5, n_estimators=50) 187 | 188 | # 模型训练: 189 | random_forest.fit(X, Y) 190 | 191 | # LR分类预测: 192 | y = random_forest.predict(Xtest) 193 | # 交易设置: 194 | positions = context.account().positions['volume_long'] # 多头持仓数量 195 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 196 | 197 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 198 | 199 | # 获取收益率的高分位数和低分位数 200 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 201 | 202 | for i in range(len(Idx)): 203 | position = positions.iloc[Idx[i]] 204 | if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓,且该股票收益大于高70%分位数,则开仓,买入 205 | # 开仓数量 + 1防止分母为0 206 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 207 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 208 | 209 | # 控制委托量,不要过大或过小,需要保证是100的倍数 210 | if Num < 1000: 211 | Num *= 10 212 | if Num > 100000: 213 | Num = int(Num / 10) 214 | Num -= Num % 100 215 | if Num <= 0: # 不开仓 216 | continue 217 | 218 | print("开仓数量为:{}".format(Num)) 219 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 220 | price=0) # 指定委托量开仓 221 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 222 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 223 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 224 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 225 | print("平仓,数量为: {}".format(position / 10)) 226 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position/10), 227 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 228 | 229 | 230 | if __name__ == '__main__': 231 | file_path = 'random_forest_reg.py' 232 | block = 'hs300' 233 | 234 | begin_date = '2016-01-01' 235 | end_date = '2018-09-30' 236 | 237 | strategy_name = 'random_forest_reg' 238 | 239 | run_backtest(strategy_name=strategy_name, file_path=file_path, 240 | target_list=list(get_code_list('zz500', date=begin_date)['code']), 241 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 242 | -------------------------------------------------------------------------------- /references/2011年金融工程研讨会专题报告系列之二:大浪淘金,Alpha因子何处寻?.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/2011年金融工程研讨会专题报告系列之二:大浪淘金,Alpha因子何处寻?.pdf -------------------------------------------------------------------------------- /references/A.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/A.pdf -------------------------------------------------------------------------------- /references/A股Alpha策略及产品回顾与展望——2018年金融工程年度报告.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/A股Alpha策略及产品回顾与展望——2018年金融工程年度报告.pdf -------------------------------------------------------------------------------- /references/A题—通过机器学习优化股票多因子模型解题指引.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/A题—通过机器学习优化股票多因子模型解题指引.pdf -------------------------------------------------------------------------------- /references/SA20190100000_36930159.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/SA20190100000_36930159.pdf -------------------------------------------------------------------------------- /references/人工智能选股框架及经典算法简介.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/人工智能选股框架及经典算法简介.pdf -------------------------------------------------------------------------------- /references/华泰证券-多因子系列之一:华泰多因子模型体系初探-160921.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/华泰证券-多因子系列之一:华泰多因子模型体系初探-160921.pdf -------------------------------------------------------------------------------- /references/单因子测试.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/单因子测试.PDF -------------------------------------------------------------------------------- /references/收益预测模型.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/收益预测模型.PDF -------------------------------------------------------------------------------- /references/风险模型与组合优化.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/风险模型与组合优化.PDF -------------------------------------------------------------------------------- /run_test.bat: -------------------------------------------------------------------------------- 1 | :: 添加因子ID,进行单因子回测 2 | 3 | :: 基础科目衍生类(全部) -------------------------------------- 4 | ::python single_factor_test.py NetWorkingCapital 5 | ::python single_factor_test.py NetDebt 6 | ::python single_factor_test.py RetainedEarnings 7 | ::python single_factor_test.py GrossProfit 8 | ::python single_factor_test.py FCFF 9 | ::python single_factor_test.py TotalPaidinCapital 10 | ::python single_factor_test.py IntFreeNCL 11 | ::python single_factor_test.py IntFreeCL 12 | ::python single_factor_test.py EBIAT 13 | ::python single_factor_test.py EBIT 14 | ::python single_factor_test.py EBITDA 15 | ::python single_factor_test.py NIAPCut 16 | ::python single_factor_test.py WorkingCapital 17 | ::python single_factor_test.py IntDebt 18 | ::python single_factor_test.py IntCL 19 | ::python single_factor_test.py NRProfitLoss 20 | ::python single_factor_test.py FCFE 21 | ::python single_factor_test.py TotalFixedAssets 22 | ::python single_factor_test.py ValueChgProfit 23 | ::python single_factor_test.py OperateNetIncome 24 | ::python single_factor_test.py DA 25 | ::python single_factor_test.py NetIntExpense 26 | ::python single_factor_test.py NetTangibleAssets 27 | ::python single_factor_test.py TEAP 28 | ::python single_factor_test.py ASSI 29 | ::python single_factor_test.py TotalAssets 30 | ::python single_factor_test.py CostTTM 31 | ::python single_factor_test.py OperateProfitTTM 32 | ::python single_factor_test.py RevenueTTM 33 | ::python single_factor_test.py TCostTTM 34 | ::python single_factor_test.py TRevenueTTM 35 | ::python single_factor_test.py AssetImpairLossTTM 36 | ::python single_factor_test.py TProfitTTM 37 | ::python single_factor_test.py NonOperatingNPTTM 38 | ::python single_factor_test.py GrossProfitTTM 39 | ::python single_factor_test.py AdminExpenseTTM 40 | ::python single_factor_test.py NetProfitAPTTM 41 | ::python single_factor_test.py NetProfitTTM 42 | ::python single_factor_test.py FinanExpenseTTM 43 | ::python single_factor_test.py SalesExpenseTTM 44 | ::python single_factor_test.py NetInvestCFTTM 45 | ::python single_factor_test.py NetOperateCFTTM 46 | ::python single_factor_test.py NetFinanceCFTTM 47 | ::python single_factor_test.py SaleServiceRenderCashTTM 48 | ::python single_factor_test.py NIAP 49 | ::python single_factor_test.py COperAdelpct 50 | ::python single_factor_test.py COperAdelQpct 51 | ::python single_factor_test.py COperApct 52 | ::python single_factor_test.py COperATTMpct 53 | ::python single_factor_test.py COperDdelpct 54 | ::python single_factor_test.py COperDdelQpct 55 | ::python single_factor_test.py COperDpct 56 | ::python single_factor_test.py COperDTTMpct 57 | ::python single_factor_test.py COnonperDdelpct 58 | ::python single_factor_test.py COnonperDdelQpct 59 | ::python single_factor_test.py COnonperDpct 60 | ::python single_factor_test.py COnonperDTTMpct 61 | ::python single_factor_test.py COnonperAdelpct 62 | ::python single_factor_test.py COnonperAdelQpct 63 | ::python single_factor_test.py COnonperApct 64 | ::python single_factor_test.py COnonperATTMpct 65 | 66 | 67 | 68 | :: 质量类(全部) --------------------------------------------- 69 | ::python single_factor_test.py DebtEquityRatio 70 | ::python single_factor_test.py SuperQuickRatio 71 | ::python single_factor_test.py NonCurrentAssetsRatio 72 | ::python single_factor_test.py EquityToAsset 73 | ::python single_factor_test.py EquityFixedAssetRatio 74 | ::python single_factor_test.py FixAssetRatio 75 | ::python single_factor_test.py CurrentRatio 76 | ::python single_factor_test.py CurrentAssetsRatio 77 | ::python single_factor_test.py QuickRatio 78 | ::python single_factor_test.py IntangibleAssetRatio 79 | ::python single_factor_test.py BondsPayableToAsset 80 | ::python single_factor_test.py DebtsAssetRatio 81 | ::python single_factor_test.py LongDebtToWorkingCapital 82 | ::python single_factor_test.py LongTermDebtToAsset 83 | ::python single_factor_test.py LongDebtToAsset 84 | ::python single_factor_test.py BLEV 85 | ::python single_factor_test.py DebtTangibleEquityRatio 86 | ::python single_factor_test.py CashToCurrentLiability 87 | ::python single_factor_test.py OperCashInToCurrentLiability 88 | ::python single_factor_test.py CurrentAssetsTRate 89 | ::python single_factor_test.py AccountsPayablesTRate 90 | ::python single_factor_test.py ROA 91 | ::python single_factor_test.py NOCFToTLiability 92 | ::python single_factor_test.py OperCashInToAsset 93 | ::python single_factor_test.py MLEV 94 | ::python single_factor_test.py TSEPToTotalCapital 95 | ::python single_factor_test.py TotalAssetsTRate 96 | ::python single_factor_test.py EquityTRate 97 | ::python single_factor_test.py FinancialExpenseRate 98 | ::python single_factor_test.py TotalProfitCostRatio 99 | ::python single_factor_test.py AdminiExpenseRate 100 | ::python single_factor_test.py NPToTOR 101 | ::python single_factor_test.py SalesCostRatio 102 | ::python single_factor_test.py NetProfitRatio 103 | ::python single_factor_test.py GrossIncomeRatio 104 | ::python single_factor_test.py TaxRatio 105 | ::python single_factor_test.py OperatingExpenseRate 106 | ::python single_factor_test.py OperatingProfitRatio 107 | ::python single_factor_test.py OperatingProfitToTOR 108 | ::python single_factor_test.py EBITToTOR 109 | ::python single_factor_test.py NetNonOIToTP 110 | ::python single_factor_test.py ROAEBITTTMROE 111 | ::python single_factor_test.py ROE 112 | ::python single_factor_test.py InventoryTRate 113 | ::python single_factor_test.py FixedAssetsTRate 114 | ::python single_factor_test.py NOCFToOperatingNI 115 | ::python single_factor_test.py CashRateOfSales 116 | ::python single_factor_test.py SaleServiceCashToOR 117 | ::python single_factor_test.py SalesServiceCashToORLatest 118 | ::python single_factor_test.py CashRateOfSalesLatest 119 | ::python single_factor_test.py NetNonOIToTPLatest 120 | ::python single_factor_test.py PeriodCostsRate 121 | ::python single_factor_test.py InvestRAssociatesToTP 122 | ::python single_factor_test.py InvestRAssociatesToTPLatest 123 | ::python single_factor_test.py DividendCover 124 | ::python single_factor_test.py OperatingNIToTPLatest 125 | ::python single_factor_test.py NPCutToNP 126 | ::python single_factor_test.py OperatingNIToTP 127 | ::python single_factor_test.py DividendPaidRatio 128 | ::python single_factor_test.py RetainedEarningRatio 129 | ::python single_factor_test.py DEGM 130 | ::python single_factor_test.py ACCA 131 | ::python single_factor_test.py CFO2EV 132 | ::python single_factor_test.py NOCFToOperatingNILatest 133 | ::python single_factor_test.py NOCFToNetDebt 134 | ::python single_factor_test.py NetProfitCashCover 135 | ::python single_factor_test.py InventoryTDays 136 | ::python single_factor_test.py OperatingCycle 137 | ::python single_factor_test.py AccountsPayablesTDays 138 | ::python single_factor_test.py ARTRate 139 | ::python single_factor_test.py ARTDays 140 | ::python single_factor_test.py CashConversionCycle 141 | ::python single_factor_test.py InteBearDebtToTotalCapital 142 | ::python single_factor_test.py TangibleAToInteBearDebt 143 | ::python single_factor_test.py TangibleAToNetDebt 144 | ::python single_factor_test.py TSEPToInterestBearDebt 145 | ::python single_factor_test.py NOCFToInterestBearDebt 146 | ::python single_factor_test.py InterestCover 147 | ::python single_factor_test.py ROIC 148 | ::python single_factor_test.py ROEDiluted 149 | ::python single_factor_test.py ROEAvg 150 | ::python single_factor_test.py ROECut 151 | ::python single_factor_test.py ROECutWeighted 152 | ::python single_factor_test.py ROEWeighted 153 | ::python single_factor_test.py ROAEBIT 154 | ::python single_factor_test.py ROE5 155 | ::python single_factor_test.py ROA5 156 | 157 | 158 | 159 | :: 收益风险类(部分) ------------------------------------ 160 | :: 60日方差 161 | ::python single_factor_test.py Variance60 162 | :: 股价偏度 163 | ::python single_factor_test.py Skewness20 164 | :: 历史波动 165 | ::python single_factor_test.py HSIGMA 166 | :: 20日信息比率 167 | ::python single_factor_test.py InformationRatio20 168 | :: 20日夏普率 169 | ::python single_factor_test.py Sharperatio20 170 | :: 超额收益标准差 171 | ::python single_factor_test.py DASTD 172 | :: 股权向后复权因子 173 | ::python single_factor_test.py BackwardADJ 174 | :: 个股收益的120日峰度 175 | ::python single_factor_test.py Kurtosis120 176 | :: 个股收益的20日峰度 177 | ::python single_factor_test.py Kurtosis20 178 | :: 下跌贝塔 179 | ::python single_factor_test.py DDNBT 180 | :: 下跌相关系数 181 | ::python single_factor_test.py DDNCR 182 | :: 下跌波动 183 | ::python single_factor_test.py DDNSR 184 | :: 60日信息比率 185 | ::python single_factor_test.py InformationRatio60 186 | :: 60日夏普率 187 | ::python single_factor_test.py Sharperatio60 188 | 189 | 190 | 191 | :: 情绪类(部分) -------------------------------- 192 | :: 20日成交金额的移动平均值 193 | ::python single_factor_test.py TVMA20 194 | :: 20日平均换手率 195 | ::python single_factor_test.py VOL20 196 | :: 20日成交量标准差 197 | ::python single_factor_test.py VSTD20 198 | :: 正成交量指标 199 | ::python single_factor_test.py PVI 200 | :: 成交量比率 201 | ::python single_factor_test.py VR 202 | :: 20日资金流量 203 | ::python single_factor_test.py MONEYFLOW20 204 | :: 20日收集派发指标 205 | ::python single_factor_test.py ACD20 206 | :: 人气指标 207 | ::python single_factor_test.py AR 208 | :: 20日能量潮指标 209 | ::python single_factor_test.py OBV20 210 | :: 阶段强势指标 211 | ::python single_factor_test.py JDQS20 212 | :: 资本利得突出量 213 | ::python single_factor_test.py CGO_10 214 | :: 显著性因子 20 215 | ::python single_factor_test.py ST_20 216 | :: 综合效用因子 20 217 | ::python single_factor_test.py TK_20 218 | :: 抢跑因子 219 | ::python single_factor_test.py FR_pure 220 | 221 | 222 | 223 | ::成长类因子(部分) -------------------------------- 224 | ::python single_factor_test.py OperatingRevenueGrowRate 225 | ::python single_factor_test.py TotalAssetGrowRate 226 | ::python single_factor_test.py EGRO 227 | ::python single_factor_test.py FinancingCashGrowRate 228 | ::python single_factor_test.py NPParentCompanyGrowRate 229 | ::python single_factor_test.py NetProfitGrowRate 230 | ::python single_factor_test.py SGRO 231 | ::python single_factor_test.py TotalProfitGrowRate 232 | 233 | 234 | 235 | ::常用技术指标因子(部分) -------------------------------- 236 | ::python single_factor_test.py MA10 237 | ::python single_factor_test.py MA120 238 | ::python single_factor_test.py MTM 239 | ::python single_factor_test.py DBCD 240 | ::python single_factor_test.py EMA12 241 | ::python single_factor_test.py CR20 242 | ::python single_factor_test.py UOS 243 | ::python single_factor_test.py CHAIKINVOLATILITY 244 | 245 | 246 | 247 | ::动量类因子(部分) -------------------------------- 248 | ::python single_factor_test.py BIAS20 249 | ::python single_factor_test.py CMO 250 | ::python single_factor_test.py PVT 251 | ::python single_factor_test.py CCI5 252 | ::python single_factor_test.py SRMI 253 | ::python single_factor_test.py CMOSD 254 | ::python single_factor_test.py BEARPOWER 255 | ::python single_factor_test.py AD 256 | 257 | 258 | 259 | ::价值类因子(全部) ----------------------------------- 260 | ::python single_factor_test.py NegMktValue 261 | ::python single_factor_test.py PE 262 | ::python single_factor_test.py PB 263 | ::python single_factor_test.py PS 264 | ::python single_factor_test.py MktValue 265 | ::python single_factor_test.py PCF 266 | ::python single_factor_test.py LFLO 267 | ::python single_factor_test.py LCAP 268 | ::python single_factor_test.py NLSIZE 269 | ::python single_factor_test.py ForwardPE 270 | ::python single_factor_test.py StaticPE 271 | ::python single_factor_test.py ETOP 272 | ::python single_factor_test.py CETOP 273 | ::python single_factor_test.py PEG3Y 274 | ::python single_factor_test.py PEG5Y 275 | ::python single_factor_test.py CTOP 276 | ::python single_factor_test.py TA2EV 277 | ::python single_factor_test.py ETP5 278 | ::python single_factor_test.py CTP5 279 | 280 | 281 | :: 模式识别类(部分) ----------------------------------- 282 | :: 藏婴吞没(CDLCONCEALBABYSWALL) 283 | ::python single_factor_test.py CDLCONCEALBABYSWALL 284 | :: 射击之星(CDLSHOOTINGSTAR) 285 | ::python single_factor_test.py CDLSHOOTINGSTAR 286 | :: 十字暮星(CDLEVENINGDOJISTAR) 287 | ::python single_factor_test.py CDLEVENINGDOJISTAR 288 | :: 吞噬模式(CDLENGULFING) 289 | ::python single_factor_test.py CDLENGULFING 290 | :: 刺透形态(CDLPIERCING) 291 | ::python single_factor_test.py CDLPIERCING 292 | :: 倒锤头(CDLINVERTEDHAMMER) 293 | ::python single_factor_test.py CDLINVERTEDHAMMER 294 | 295 | 296 | :: 每股指标类(部分) ---------------------------------- 297 | :: 基本每股收益(BasicEPS) 298 | ::python single_factor_test.py BasicEPS 299 | :: 每股收益TTM值(EPS) 300 | ::python single_factor_test.py EPS 301 | :: 每股净资产(NetAssetPS) 302 | ::python single_factor_test.py NetAssetPS 303 | :: 每股营业总收入(TORPS) 304 | ::python single_factor_test.py TORPS 305 | :: 每股营业利润(OperatingProfitPS) 306 | ::python single_factor_test.py OperatingProfitPS 307 | :: 每股息税前利润(EBITPS) 308 | ::python single_factor_test.py EBIPTS 309 | :: 每股现金流量净额(CashFlowPS) 310 | ::python single_factor_test.py CashFlowPS 311 | :: 每股企业自由现金流量(EnterpriseFCFPS) 312 | ::python single_factor_test.py EnterpriseFCFPS 313 | 314 | 315 | :: 行业与分析师类(部分) ------------------------------ 316 | :: 12月相对强势(RSTR12) 317 | ::python single_factor_test.py RSTR12 318 | :: 24月相对强势(RSTR24) 319 | ::python single_factor_test.py RSTR24 320 | :: 分析师盈利预测(FY12P) 321 | ::python single_factor_test.py FY12P 322 | :: 分析师营收预测(SFY12P) 323 | ::python single_factor_test.py SFY12P 324 | :: (PB–PB的行业均值)/PB的行业标准差(PBIndu) 325 | ::python single_factor_test.py PBIndu 326 | :: PCF–PCF的行业均值)/PCF的行业标准差(PCFIndu) 327 | ::python single_factor_test.py PCFIndu 328 | ::(PE–PE的行业均值)/PE的行业标准差(PEIndu) 329 | ::python single_factor_test.py PEIndu 330 | :: (PS–PS的行业均值)/PS的行业标准差(PSIndu) 331 | ::python single_factor_test.py PSIndu 332 | :: 投资回报率预测(EPIBS) 333 | ::python single_factor_test.py EPIBS 334 | :: 未来预期盈利增长(FEARNG) 335 | ::python single_factor_test.py FEARNG 336 | :: 未来预期盈收增长(FSALESG) 337 | ::python single_factor_test.py FSALESG 338 | :: 长期盈利增长预测(EgibsLong) 339 | ::python single_factor_test.py EgibsLong 340 | 341 | ::python get_factor_report.py 行业与分析师类 342 | 343 | :: 特色技术指标(部分) -------------------------------- 344 | :: 绝对价格振荡器(APO) 345 | ::python single_factor_test.py APO 346 | :: 平均价格(AVGPRICE) 347 | ::python single_factor_test.py AVGPRICE 348 | :: 均势指标(BOP) 349 | ::python single_factor_test.py BOP 350 | :: 考夫曼自适应移动平均线(KAMA) 351 | ::python single_factor_test.py KAMA 352 | :: 线性回归(LINEARREG) 353 | ::python single_factor_test.py LINEARREG 354 | :: 标准差(STDDEV) 355 | ::python single_factor_test.py STDDEV 356 | :: 时间序列预测(TSF) 357 | ::python single_factor_test.py TSF -------------------------------------------------------------------------------- /same_weight_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20/PVI 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit/NIAP 14 | - 行业分析师:FY12P 15 | - 动量类:AD 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | 20 | ---------------------------------------------------------- 21 | """ 22 | from atrader import * 23 | import pandas as pd 24 | import numpy as np 25 | from sklearn.ensemble import RandomForestRegressor 26 | import math 27 | from sklearn import preprocessing 28 | import datetime 29 | 30 | # 作为全局变量进行测试 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 32 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 33 | 'AD', 'TotalAssetGrowRate', 'MA120'] 34 | 35 | # 中位数去极值法 36 | def filter_MAD(df, factor, n=3): 37 | """ 38 | :param df: 去极值的因子序列 39 | :param factor: 待去极值的因子 40 | :param n: 中位数偏差值的上下界倍数 41 | :return: 经过处理的因子dataframe 42 | """ 43 | median = df[factor].quantile(0.5) 44 | new_median = ((df[factor] - median).abs()).quantile(0.5) 45 | max_range = median + n * new_median 46 | min_range = median - n * new_median 47 | 48 | for i in range(df.shape[0]): 49 | if df.loc[i, factor] > max_range: 50 | df.loc[i, factor] = max_range 51 | elif df.loc[i, factor] < min_range: 52 | df.loc[i, factor] = min_range 53 | return df 54 | 55 | 56 | def init(context): 57 | # 账号设置:设置初始资金为 10000000 元 58 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 59 | price_loc=1, deal_type=0, limit_type=0) 60 | # 注册数据:日频数据 61 | reg_kdata('day', 1) 62 | global FactorCode # 全局单因子代号 63 | reg_factor(factor=FactorCode) 64 | 65 | context.FactorCode = FactorCode # 66 | 67 | # 超参数设置: 68 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 69 | context.Num = 0 # 记录当前交易日个数 70 | 71 | # 较敏感的超参数,需要调节 72 | context.upper_pos = 85 # 股票预测收益率的上分位数,高于则买入 73 | context.down_pos = 10 # 股票预测收益率的下分位数,低于则卖出 74 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 75 | 76 | # 确保月初调仓 77 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 78 | months = np.vectorize(lambda x: x.month)(days) 79 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 80 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 81 | 82 | 83 | def on_data(context): 84 | context.Num = context.Num + 1 85 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 86 | return 87 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 88 | return 89 | 90 | # 获取数据: 91 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 92 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 93 | df=True) # 获取因子数据 94 | 95 | # 特征构建: 96 | Fcode = context.FactorCode # 标签不需要代号了 97 | 98 | # 数据存储变量: 99 | # Close 字段为标签,Fcode 为标签 100 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 101 | 102 | # K线数据序号对齐 103 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 104 | 105 | # 按标的处理数据: 106 | for i in range(300): 107 | # 存储预测特征样本 108 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 109 | # 因子数据 序号对齐, 提取当前标的的因子数据 110 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 111 | # 预测特征集构建:建立标的索引 112 | FactorDataTest0['idx'] = tempIdx[i] 113 | # 按特征处理数据,过程同建立训练特征 114 | for FC in context.FactorCode: 115 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 116 | FactorDataTest0[FC] = FCData[context.Len - 1] 117 | 118 | # 合并测试数据 119 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 120 | 121 | """ 122 | 训练集和测试集的表头字段如下 123 | FactorData DataFrame: 124 | idx | benefit | Factor 1 | Factor 2| .... 125 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 126 | FactorDataTest DataFrame: 127 | idx | Factor 1 | Factor 2 | ... 128 | 本月初的因子作为预测特征 129 | """ 130 | 131 | # 数据清洗: 132 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 133 | Idx = FactorDataTest['idx'] # 剩余标的序号 134 | 135 | # 按特征进行预处理 136 | for Factor in context.FactorCode: 137 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 138 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 139 | 140 | # print(FactorData.head(1)) 141 | # print(FactorDataTest.head(1)) 142 | 143 | # 预测特征构建:# 行(样本数)* 列(特征数) 144 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 145 | 146 | # 循环填充特征到numpy数组中 147 | for i in range(Xtest.shape[1]): 148 | Xtest[:, i] = FactorDataTest[Fcode[i]] 149 | 150 | y = np.average(Xtest, axis=1) / len(Fcode) # 对每一行的因子序列取均值 151 | 152 | # 交易设置: 153 | positions = context.account().positions['volume_long'] # 多头持仓数量 154 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 155 | 156 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 157 | 158 | # 获取收益率的高分位数和低分位数 159 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 160 | 161 | for i in range(len(Idx)): 162 | position = positions.iloc[Idx[i]] 163 | # if position == 0 and y[i] == True and valid_cash > 0: # 若预测结果为true(收益率>0),买入 164 | # print('开仓') 165 | if position == 0 and y[i] > high_return and valid_cash > 0 and y[i] > 0: # 当前无仓,且该股票收益大于高70%分位数,则开仓,买入 166 | # 开仓数量 + 1防止分母为0 167 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 168 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 169 | 170 | # 控制委托量,不要过大或过小,需要保证是100的倍数 171 | if Num < 1000: 172 | Num *= 10 173 | if Num > 100000: 174 | Num = int(Num / 10) 175 | Num -= Num % 100 176 | if Num <= 0: # 不开仓 177 | continue 178 | 179 | print("开仓数量为:{}".format(Num)) 180 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 181 | price=0) # 指定委托量开仓 182 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 183 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 184 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 185 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 186 | print("平仓,数量为: {}".format(position / 10)) 187 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 188 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 189 | 190 | 191 | if __name__ == '__main__': 192 | 193 | file_path = 'same_weight_model.py' 194 | block = 'hs300' 195 | 196 | begin_date = '2016-01-01' 197 | end_date = '2018-09-30' 198 | 199 | strategy_name = 'same-weight-model' 200 | 201 | run_backtest(strategy_name=strategy_name, file_path=file_path, 202 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 203 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 204 | -------------------------------------------------------------------------------- /single_factor_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | ------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:待测单因子 7 | 4. 单因子回归测试模型思路: 8 | 1. 先获得 21 天以上的K线数据和因子数据,预处理 9 | 2. 使用上月初因子和上月收益率进行线性回归 10 | 3. 使用单变量线性模型进行训练 11 | 4. 回到当前时间点,使用本月初的因子作为预测样本特征,预测本月的各股票平均收益率的大小。 12 | 5. 选股逻辑: 13 | 将符合预测结果的股票按均等分配可用资金进行下单交易。持有一个月后 ,再次进行调仓,训练预测。 14 | 6. 交易逻辑: 15 | 每次调仓时,若当前有持仓,并且符合选股条件,则仓位不动; 16 | 若不符合选股条件,则对收益低的标的进行仓位平仓; 17 | 若当前无仓,并且符合选股条件,则多开仓,对收益高的标的进行开仓; 18 | 若不符合选股条件,则不开仓,无需操作。 19 | 20 | --------------------------------------------------------- 21 | 运行方法: 22 | 1. 在 main 中定义同一类的因子列表。 23 | 2. 逐个因子执行回测。 24 | 3. 获取回测报告ID,通过ID获取绩效报告字段。 25 | 4. 保留字段到CSV文件中。 26 | """ 27 | 28 | from atrader import * 29 | import pandas as pd 30 | import numpy as np 31 | from sklearn.linear_model import LinearRegression 32 | import math 33 | from sklearn import preprocessing 34 | import datetime 35 | import sys 36 | 37 | # 作为全局变量进行测试 38 | factor = sys.argv[1] 39 | FactorCode = [factor] 40 | print("传入因子参数为" + factor) 41 | 42 | 43 | # 中位数去极值法 44 | def filter_MAD(df, factor, n=3): 45 | """ 46 | :param df: 去极值的因子序列 47 | :param factor: 待去极值的因子 48 | :param n: 中位数偏差值的上下界倍数 49 | :return: 经过处理的因子dataframe 50 | """ 51 | median = df[factor].quantile(0.5) 52 | new_median = ((df[factor] - median).abs()).quantile(0.5) 53 | max_range = median + n * new_median 54 | min_range = median - n * new_median 55 | 56 | for i in range(df.shape[0]): 57 | if df.loc[i, factor] > max_range: 58 | df.loc[i, factor] = max_range 59 | elif df.loc[i, factor] < min_range: 60 | df.loc[i, factor] = min_range 61 | return df 62 | 63 | 64 | def init(context): 65 | # 账号设置:设置初始资金为 10000000 元 66 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 67 | price_loc=1, deal_type=0, limit_type=0) 68 | # 注册数据:日频数据 69 | reg_kdata('day', 1) 70 | global FactorCode # 全局单因子代号 71 | reg_factor(factor=FactorCode) 72 | print("init 函数, 注册因子为{}".format(FactorCode[0])) 73 | context.FactorCode = FactorCode # 74 | 75 | # 超参数设置: 76 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 77 | context.Num = 0 # 记录当前交易日个数 78 | 79 | # 较敏感的超参数,需要调节 80 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 81 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 82 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 83 | 84 | # 确保月初调仓 85 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 86 | months = np.vectorize(lambda x: x.month)(days) 87 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 88 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 89 | 90 | 91 | 92 | def on_data(context): 93 | context.Num = context.Num + 1 94 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 95 | return 96 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 97 | return 98 | 99 | # 获取数据: 100 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 101 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 102 | df=True) # 获取因子数据 103 | 104 | # 特征构建: 105 | Fcode = context.FactorCode # 标签不需要代号了 106 | 107 | # 数据存储变量: 108 | # Close 字段为标签,Fcode 为标签 109 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 110 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 111 | 112 | # K线数据序号对齐 113 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 114 | 115 | # 按标的处理数据: 116 | for i in range(300): 117 | # 训练特征集及训练标签构建: 118 | # 临时数据存储变量: 119 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 120 | columns=(['idx', 'benefit'] + Fcode)) 121 | # 存储预测特征样本 122 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 123 | 124 | # 因子数据 序号对齐, 提取当前标的的因子数据 125 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 126 | 127 | # 按特征处理数据: 128 | for FC in context.FactorCode: 129 | # 提取当前标的中与当前因子FC相同的部分 130 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 131 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 132 | 133 | # 按标签处理数据: 134 | # 提取当前标的的前一个月的K线面板数据 135 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 136 | # 计算当前标的在上一个月的收益率 137 | benefit = (close[context.Len - 1] - close[0]) / close[0] 138 | 139 | FactorData0['benefit'] = benefit 140 | # idx: 建立当前标的在训练样本集中的索引 141 | FactorData0['idx'] = tempIdx[i] 142 | # 合并数据:组成训练样本 143 | FactorData = FactorData.append(FactorData0, ignore_index=True) 144 | 145 | # 预测特征集构建:建立标的索引 146 | FactorDataTest0['idx'] = tempIdx[i] 147 | # 按特征处理数据,过程同建立训练特征 148 | for FC in context.FactorCode: 149 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 150 | FactorDataTest0[FC] = FCData[context.Len - 1] 151 | 152 | # 合并测试数据 153 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 154 | 155 | """ 156 | 训练集和测试集的表头字段如下 157 | FactorData DataFrame: 158 | idx | benefit | Factor 1 | Factor 2| .... 159 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 160 | FactorDataTest DataFrame: 161 | idx | Factor 1 | Factor 2 | ... 162 | 本月初的因子作为预测特征 163 | """ 164 | 165 | # 数据清洗: 166 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 167 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 168 | Idx = FactorDataTest['idx'] # 剩余标的序号 169 | 170 | # 按特征进行预处理 171 | for Factor in context.FactorCode: 172 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 173 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 174 | 175 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 176 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 177 | 178 | # print(FactorData.head(1)) 179 | # print(FactorDataTest.head(1)) 180 | 181 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 182 | X = np.ones([FactorData.shape[0], len(Fcode)]) 183 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 184 | 185 | # 循环填充特征到numpy数组中 186 | for i in range(X.shape[1]): 187 | X[:, i] = FactorData[Fcode[i]] 188 | Xtest[:, i] = FactorDataTest[Fcode[i]] 189 | 190 | # 训练样本的标签,为浮点数的收益率 191 | Y = np.array(FactorData['benefit']).astype(float) 192 | 193 | # 构建模型: 194 | LRModel = LinearRegression(normalize=True) 195 | 196 | # 模型训练: 197 | LRModel.fit(X, Y) 198 | 199 | # LR分类预测: 200 | y = LRModel.predict(Xtest) 201 | 202 | # 交易设置: 203 | positions = context.account().positions['volume_long'] # 多头持仓数量 204 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 205 | 206 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 207 | 208 | # 获取收益率的高分位数和低分位数 209 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 210 | 211 | for i in range(len(Idx)): 212 | position = positions.iloc[Idx[i]] 213 | if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓,且该股票收益大于高80%分位数,则开仓,买入 214 | # 开仓数量 + 1防止分母为0 215 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 216 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 217 | 218 | # 控制委托量,不要过大或过小,需要保证是100的倍数 219 | if Num < 1000: 220 | Num *= 10 221 | if Num > 100000: 222 | Num = int(Num / 10) 223 | Num -= Num % 100 224 | if Num <= 0: # 不开仓 225 | continue 226 | 227 | print("开仓数量为:{}".format(Num)) 228 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 229 | price=0) # 指定委托量开仓 230 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 231 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 232 | 233 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低60%分位数,则平仓,卖出 234 | #print("平仓") 235 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position), side=2, position_effect=2, 236 | order_type=2, price=0) # 指定委托量平仓 237 | 238 | 239 | if __name__ == '__main__': 240 | 241 | file_path = 'single_factor_test.py' 242 | block = 'hs300' 243 | 244 | begin_date = '2016-01-01' 245 | end_date = '2018-09-30' 246 | 247 | strategy_name = factor 248 | 249 | run_backtest(strategy_name=strategy_name, file_path=file_path, 250 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 251 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 252 | 253 | -------------------------------------------------------------------------------- /svm.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:TotalAssetGrowRate 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | ---------------------------------------------------------- 20 | """ 21 | from atrader import * 22 | import pandas as pd 23 | import numpy as np 24 | from sklearn import svm 25 | import math 26 | from sklearn import preprocessing 27 | import datetime 28 | from sklearn.decomposition import PCA 29 | from sklearn import linear_model 30 | from sklearn.preprocessing import OneHotEncoder 31 | from sklearn.ensemble import GradientBoostingRegressor 32 | 33 | # 作为全局变量进行测试 34 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 35 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 36 | 'AD', 'TotalAssetGrowRate', 'MA120'] 37 | 38 | 39 | # 中位数去极值法 40 | def filter_MAD(df, factor, n=3): 41 | """ 42 | :param df: 去极值的因子序列 43 | :param factor: 待去极值的因子 44 | :param n: 中位数偏差值的上下界倍数 45 | :return: 经过处理的因子dataframe 46 | """ 47 | median = df[factor].quantile(0.5) 48 | new_median = ((df[factor] - median).abs()).quantile(0.5) 49 | max_range = median + n * new_median 50 | min_range = median - n * new_median 51 | 52 | for i in range(df.shape[0]): 53 | if df.loc[i, factor] > max_range: 54 | df.loc[i, factor] = max_range 55 | elif df.loc[i, factor] < min_range: 56 | df.loc[i, factor] = min_range 57 | return df 58 | 59 | 60 | def init(context): 61 | # 账号设置:设置初始资金为 10000000 元 62 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 63 | price_loc=1, deal_type=0, limit_type=0) 64 | # 注册数据:日频数据 65 | reg_kdata('day', 1) 66 | global FactorCode # 全局单因子代号 67 | reg_factor(factor=FactorCode) 68 | context.FactorCode = FactorCode # 69 | 70 | # 超参数设置: 71 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 72 | context.Num = 0 # 记录当前交易日个数 73 | 74 | # 较敏感的超参数,需要调节 75 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 76 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 77 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 78 | 79 | # 确保月初调仓 80 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 81 | months = np.vectorize(lambda x: x.month)(days) 82 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 83 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 84 | 85 | 86 | def on_data(context): 87 | context.Num = context.Num + 1 88 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 89 | return 90 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 91 | return 92 | 93 | # 获取数据: 94 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 95 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 96 | df=True) # 获取因子数据 97 | 98 | # 特征构建: 99 | Fcode = context.FactorCode # 标签不需要代号了 100 | 101 | # 数据存储变量: 102 | # Close 字段为标签,Fcode 为标签 103 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 104 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 105 | 106 | # K线数据序号对齐 107 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 108 | 109 | # 按标的处理数据: 110 | for i in range(300): 111 | # 训练特征集及训练标签构建: 112 | # 临时数据存储变量: 113 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 114 | columns=(['idx', 'benefit'] + Fcode)) 115 | # 存储预测特征样本 116 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 117 | 118 | # 因子数据 序号对齐, 提取当前标的的因子数据 119 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 120 | 121 | # 按特征处理数据: 122 | for FC in context.FactorCode: 123 | # 提取当前标的中与当前因子FC相同的部分 124 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 125 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 126 | 127 | # 按标签处理数据: 128 | # 提取当前标的的前一个月的K线面板数据 129 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 130 | # 计算当前标的在上一个月的收益率 131 | benefit = (close[context.Len - 1] - close[0]) / close[0] 132 | 133 | FactorData0['benefit'] = benefit 134 | # idx: 建立当前标的在训练样本集中的索引 135 | FactorData0['idx'] = tempIdx[i] 136 | # 合并数据:组成训练样本 137 | FactorData = FactorData.append(FactorData0, ignore_index=True) 138 | 139 | # 预测特征集构建:建立标的索引 140 | FactorDataTest0['idx'] = tempIdx[i] 141 | # 按特征处理数据,过程同建立训练特征 142 | for FC in context.FactorCode: 143 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 144 | FactorDataTest0[FC] = FCData[context.Len - 1] 145 | 146 | # 合并测试数据 147 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 148 | 149 | """ 150 | 训练集和测试集的表头字段如下 151 | FactorData DataFrame: 152 | idx | benefit | Factor 1 | Factor 2| .... 153 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 154 | FactorDataTest DataFrame: 155 | idx | Factor 1 | Factor 2 | ... 156 | 本月初的因子作为预测特征 157 | """ 158 | 159 | # 数据清洗: 160 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 161 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 162 | Idx = FactorDataTest['idx'] # 剩余标的序号 163 | 164 | # 按特征进行预处理 165 | for Factor in context.FactorCode: 166 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 167 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 168 | 169 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 170 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 171 | 172 | # print(FactorData.head(1)) 173 | # print(FactorDataTest.head(1)) 174 | 175 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 176 | X = np.ones([FactorData.shape[0], len(Fcode)]) 177 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 178 | 179 | # 循环填充特征到numpy数组中 180 | for i in range(X.shape[1]): 181 | X[:, i] = FactorData[Fcode[i]] 182 | Xtest[:, i] = FactorDataTest[Fcode[i]] 183 | 184 | # 训练样本的标签,为浮点数的收益率 185 | Y = (np.array(FactorData['benefit']).astype(float) > 0) 186 | 187 | SVM = svm.SVR(gamma='scale') 188 | 189 | gbr = GradientBoostingRegressor() 190 | gbr.fit(X, Y) 191 | enc = OneHotEncoder() 192 | enc.fit(gbr.apply(X)) 193 | 194 | new_X = enc.transform(gbr.apply(X)) 195 | new_X = new_X.toarray() 196 | 197 | X = new_X 198 | 199 | new_Xtest = enc.transform(gbr.apply(Xtest)) 200 | new_Xtest = new_Xtest.toarray() 201 | Xtest = new_Xtest 202 | 203 | # 模型训练: 204 | SVM.fit(X, Y) 205 | 206 | # LR分类预测: 207 | y = SVM.predict(Xtest) 208 | # 交易设置: 209 | positions = context.account().positions['volume_long'] # 多头持仓数量 210 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 211 | 212 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 213 | 214 | # 获取收益率的高分位数和低分位数 215 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 216 | 217 | for i in range(len(Idx)): 218 | position = positions.iloc[Idx[i]] 219 | #if position == 0 and y[i] == True and valid_cash > 0: # 若预测结果为true(收益率>0),买入 220 | # print('开仓') 221 | if position == 0 and y[i] > high_return and valid_cash > 0: 222 | # 开仓数量 + 1防止分母为0 223 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 224 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 225 | 226 | # 控制委托量,不要过大或过小,需要保证是100的倍数 227 | if Num < 1000: 228 | Num *= 10 229 | if Num > 100000: 230 | Num = int(Num / 10) 231 | Num -= Num % 100 232 | if Num <= 0: # 不开仓 233 | continue 234 | 235 | print("开仓数量为:{}".format(Num)) 236 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 237 | price=0) # 指定委托量开仓 238 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 239 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 240 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 241 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 242 | print("平仓,数量为: {}".format(position / 10)) 243 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 244 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 245 | 246 | 247 | if __name__ == '__main__': 248 | 249 | file_path = 'svm.py' 250 | block = 'hs300' 251 | 252 | begin_date = '2016-01-01' 253 | end_date = '2018-09-30' 254 | 255 | strategy_name = 'svm' 256 | 257 | run_backtest(strategy_name=strategy_name, file_path=file_path, 258 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 259 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 260 | -------------------------------------------------------------------------------- /time_roll_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS TA2EV 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense 14 | - 行业分析师:FY12P 15 | - 成长类:TotalAssetGrowRate 16 | - 常用技术类:MA120 17 | ... 其余逻辑参照single_factor_test.py 18 | ---------------------------------------------------------- 19 | 20 | 时间窗口滚动模型: 21 | 在原来的基础上增加了滚动选项。 22 | 原来的时间窗口固定为一个,即前20天为一个时间窗口。 23 | 现在支持时间窗口向前滚动获取数据,有:时间窗口第一天的因子值,时间窗口内各股票的平均收益率; 24 | 25 | """ 26 | from atrader import * 27 | import pandas as pd 28 | import numpy as np 29 | from sklearn import svm 30 | import math 31 | from sklearn import preprocessing 32 | import datetime 33 | from xgboost.sklearn import XGBRegressor 34 | from sklearn.ensemble import RandomForestRegressor 35 | 36 | # 中位数去极值法 37 | def filter_MAD(df, factor, n=3): 38 | """ 39 | :param df: 去极值的因子序列 40 | :param factor: 待去极值的因子 41 | :param n: 中位数偏差值的上下界倍数 42 | :return: 经过处理的因子dataframe 43 | """ 44 | median = df[factor].quantile(0.5) 45 | new_median = ((df[factor] - median).abs()).quantile(0.5) 46 | max_range = median + n * new_median 47 | min_range = median - n * new_median 48 | 49 | for i in range(df.shape[0]): 50 | if df.loc[i, factor] > max_range: 51 | df.loc[i, factor] = max_range 52 | elif df.loc[i, factor] < min_range: 53 | df.loc[i, factor] = min_range 54 | return df 55 | 56 | 57 | def init(context): 58 | 59 | # 账号设置:设置初始资金为 10000000 元 60 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 61 | price_loc=1, deal_type=0, limit_type=0) 62 | 63 | # 注册数据:日频数据 64 | reg_kdata('day', 1) 65 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 66 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 67 | 'AD', 'TotalAssetGrowRate', 'MA120'] 68 | reg_factor(factor=FactorCode) 69 | context.FactorCode = FactorCode 70 | 71 | # 参数设置: 72 | context.LEN = 21 # 时间窗口滑动最大范围 73 | context.N1 = 20 # 时间窗口中的训练/预测特征部分 74 | context.Num = 0 # 记录当前交易日个数,保证交易日个数需要大于时间窗口滑动的最大范围 75 | 76 | # 较敏感的超参数,需要调节 77 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 78 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 79 | context.cash_rate = 0.6 # 计算可用资金比例的分子, 80 | 81 | # 确保月初调仓 82 | days = get_trading_days('SZSE', '2016-01-01', '2018-09-30') 83 | months = np.vectorize(lambda x: x.month)(days) 84 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 85 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 86 | 87 | 88 | def on_data(context): 89 | context.Num = context.Num + 1 # 交易日数目+1 90 | if context.Num < context.LEN: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 91 | return 92 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 93 | return 94 | 95 | # -------------------------------------------- # 96 | # 获取 K线数据和因子数据 # 97 | # -------------------------------------------- # 98 | """ 99 | K 线数据 DataFrame结构: 100 | | target_idx | time | open | high | low | close | volume | amount | open_interest 101 | | 标的索引号 | 日期 | 开盘价 | 最高价 |最低价 | 收盘价 | 成交量 | 成交金额 | 持仓量 102 | 如果获取了 LEN 天的各股票对应的K线数据,那么行排列是: 103 | 0 至 LEN - 1 行先排第一个股票在LEN天内K线数据, 104 | 然后 LEN 至 2 LEN - 1行排第二个股票在LEN天内的K线数据。 105 | """ 106 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.LEN, fill_up=True, df=True) 107 | 108 | """ 109 | 因子数据 DataFrame结构: 110 | | target_idx | date | factor | value | 111 | | 标的序号 | 日期 | 因子名称 | 因子值| 112 | 行排列情况:先排一个股票在LEN天内的某一因子值,然后在排该股票下一个因子值,直到因子值排完, 113 | 然后再轮到下一个股票 114 | """ 115 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.LEN, 116 | df=True) # 获取因子数据 117 | 118 | # ------------------------------------- # 119 | # 特征构建 # 120 | # ------------------------------------- # 121 | Fcode = list() 122 | # 此处构建因子列名,取时间窗的第一天因子作为训练/预测数据样本 123 | Fcode = context.FactorCode 124 | 125 | FactorData_list = [] # 存储多个时间窗口的训练样本和标签 126 | """ 127 | 用于训练的DataFrame,每一列的含义如下: 128 | idx | benefit | factor1 | factor1 | .... | factorm 129 | idx 表示沪深300股中股票的序号,范围从 0~299,我们可以通过该序号定位股票 130 | benefit 表示该股票在某时间窗口后 N2 天内的平均收益率,即涨幅情况 131 | factorm_n 表示在时间窗口内的第一天的第 m 个因子 132 | 我们使用所有的factor作为训练特征,benefit作为训练标签。 133 | """ 134 | for i in range(context.LEN - context.N1 + 1): # 时间窗口个数 135 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 136 | FactorData_list.append(FactorData) # 将该时间窗的训练数据存入列表 137 | 138 | """ 139 | 用于预测的DataFrame,结构如下: 140 | idx | factor1_1 | factor2_1 | .... | factorm_n 141 | idx 表示沪深300股中股票的序号,范围从 0~299,我们可以通过该序号定位股票 142 | factorm_n 表示第 m 个因子在第 n 天的值 143 | 我们使用所有的factor作为预测特征,预测出未来 N2天的各股票的收益率情况 144 | """ 145 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 146 | 147 | # K线数据序号对齐 148 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 149 | 150 | # ----------------------------------------- # 151 | # 按标的处理数据,提取训练特征和标签 # 152 | # ----------------------------------------- # 153 | for window in range(context.LEN - context.N1 + 1): # 滚动时间窗 154 | for i in range(300): # 按标的处理 155 | # 训练特征集及训练标签构建: 156 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), columns=(['idx', 'benefit'] + Fcode)) 157 | 158 | # 因子数据 序号对齐, 提取当前标的的因子数据 159 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 160 | 161 | # 按特征处理数据: 162 | for FC in context.FactorCode: 163 | # 提取当前标的中与当前因子FC相同的部分 164 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 165 | FactorData0[FC] = FCData[window] 166 | 167 | FactorData0['idx'] = i 168 | 169 | # 按标签处理数据: 170 | # 提取当前标的的前一个月的K线面板数据 171 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 172 | 173 | # 当前时间窗之后的N2天内的股票收益率情况 174 | benefit = (close[window + context.N1 - 1] - close[window]) / close[window] 175 | 176 | FactorData0['benefit'] = benefit 177 | FactorData_list[window] = FactorData_list[window].append(FactorData0, ignore_index=True) 178 | print("window:{}, stock :{} ".format(window, i)) 179 | print("pass this window: {}".format(window)) 180 | # ----------------------------------- # 181 | # 提取预测样本特征 # 182 | # ----------------------------------- # 183 | for i in range(300): 184 | # 存储预测特征样本 185 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 186 | 187 | # 因子数据 序号对齐, 提取当前标的的因子数据 188 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 189 | 190 | # 预测特征集构建:建立标的索引 191 | FactorDataTest0['idx'] = tempIdx[i] 192 | 193 | # 按特征处理数据,过程同建立训练特征 194 | for FC in context.FactorCode: 195 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 196 | FactorDataTest0[FC] = FCData[context.LEN - 1] 197 | 198 | # 合并测试数据 199 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 200 | 201 | # 数据清洗: 202 | for i in range(len(FactorData_list)): 203 | FactorData_list[i] = FactorData_list[i].dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 204 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 205 | Idx = FactorDataTest['idx'] # 剩余标的序号 206 | 207 | # 按特征进行预处理 208 | for Factor in Fcode: 209 | # 处理多个时间窗口的训练数据。 210 | for window in range(len(FactorData_list)): 211 | FactorData_list[window] = filter_MAD(FactorData_list[window], Factor, 5) # 中位数去极值法 212 | FactorData_list[window][Factor] = preprocessing.scale(FactorData_list[window][Factor]) # 标准化 213 | 214 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 215 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 216 | 217 | """ 218 | xgb_params = {'learning_rate': 0.01, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 4, 'seed': 1000, 219 | 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1} 220 | 221 | xgb_model = XGBRegressor(**xgb_params) 222 | """ 223 | RF = RandomForestRegressor(max_depth=5, n_estimators=50) 224 | 225 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 226 | for window in range(len(FactorData_list)): 227 | X = np.ones([FactorData_list[window].shape[0], len(Fcode)]) 228 | 229 | # 循环填充特征到numpy数组中 230 | for i in range(X.shape[1]): 231 | X[:, i] = FactorData_list[window][Fcode[i]] 232 | 233 | # 训练样本的标签,为浮点数的收益率 234 | Y = (np.array(FactorData_list[window]['benefit']).astype(float) > 0) 235 | 236 | # 模型训练: 237 | print("FITTING!") 238 | RF.fit(X, Y) 239 | 240 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 241 | for i in range(X.shape[1]): 242 | Xtest[:, i] = FactorDataTest[Fcode[i]] 243 | 244 | # 分类预测: 245 | y = RF.predict(Xtest) 246 | 247 | # 交易设置: 248 | positions = context.account().positions['volume_long'] # 多头持仓数量 249 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 250 | 251 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 252 | 253 | # 获取收益率的高分位数和低分位数 254 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 255 | 256 | for i in range(len(Idx)): 257 | position = positions.iloc[Idx[i]] 258 | if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓,且该股票收益大于高70%分位数,则开仓,买入 259 | # 开仓数量 + 1防止分母为0 260 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 261 | 262 | # 控制委托量,不要过大或过小,需要保证是100的倍数 263 | if Num < 1000: 264 | Num *= 10 265 | if Num > 100000: 266 | Num = int(Num / 10) 267 | Num -= Num % 100 268 | if Num <= 0: # 不开仓 269 | continue 270 | 271 | print("开仓数量为:{}".format(Num)) 272 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, 273 | order_type=2, price=0) # 指定委托量开仓 274 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 275 | stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 276 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 277 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 278 | print("平仓,数量为: {}".format(position / 10)) 279 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 280 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 281 | 282 | 283 | if __name__ == '__main__': 284 | 285 | file_path = 'time_roll_model.py' 286 | block = 'hs300' 287 | 288 | begin_date = '2016-01-01' 289 | end_date = '2018-09-30' 290 | 291 | strategy_name = 'random_forest' 292 | 293 | run_backtest(strategy_name=strategy_name, file_path=file_path, 294 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 295 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 296 | -------------------------------------------------------------------------------- /xgb_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | ---------------------------------------------------------- 3 | 策略思路: 4 | 1. 回测标的:沪深300成分股 5 | 2. 回测时间段:2016-01-01 至 2018-09-30 6 | 3. 特征选择:每个大类夏普率最高的因子+夏普率高于1.5的因子 7 | - 质量类:ROIC, CashToCurrentLiability 8 | - 特色技术指标:STDDEV 9 | - 收益风险:DDNCR 10 | - 情绪类:TVMA20 11 | - 每股指标类:EnterpriseFCFPS 12 | - 价值类:PS 13 | - 基础类:AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit 14 | - 行业分析师:FY12P 15 | - 动量类:TotalAssetGrowRate 16 | - 成长类:TotalAssetGrowRate 17 | - 常用技术类:MA120 18 | ... 其余逻辑参照single_factor_test.py 19 | ---------------------------------------------------------- 20 | """ 21 | from atrader import * 22 | import pandas as pd 23 | import numpy as np 24 | from sklearn import svm 25 | import math 26 | from sklearn import preprocessing 27 | import datetime 28 | from xgboost.sklearn import XGBRegressor 29 | 30 | # 作为全局变量进行测试 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS', 32 | 'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P', 33 | 'AD', 'TotalAssetGrowRate', 'MA120'] 34 | 35 | 36 | # 中位数去极值法 37 | def filter_MAD(df, factor, n=3): 38 | """ 39 | :param df: 去极值的因子序列 40 | :param factor: 待去极值的因子 41 | :param n: 中位数偏差值的上下界倍数 42 | :return: 经过处理的因子dataframe 43 | """ 44 | median = df[factor].quantile(0.5) 45 | new_median = ((df[factor] - median).abs()).quantile(0.5) 46 | max_range = median + n * new_median 47 | min_range = median - n * new_median 48 | 49 | for i in range(df.shape[0]): 50 | if df.loc[i, factor] > max_range: 51 | df.loc[i, factor] = max_range 52 | elif df.loc[i, factor] < min_range: 53 | df.loc[i, factor] = min_range 54 | return df 55 | 56 | 57 | def init(context): 58 | # 账号设置:设置初始资金为 10000000 元 59 | set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0, 60 | price_loc=1, deal_type=0, limit_type=0) 61 | # 注册数据:日频数据 62 | reg_kdata('day', 1) 63 | global FactorCode # 全局单因子代号 64 | reg_factor(factor=FactorCode) 65 | 66 | context.FactorCode = FactorCode # 67 | 68 | # 超参数设置: 69 | context.Len = 21 # 时间长度: 当交易日个数小于该事件长度时,跳过该交易日,假设平均每个月 21 个交易日左右 250/12 70 | context.Num = 0 # 记录当前交易日个数 71 | 72 | # 较敏感的超参数,需要调节 73 | context.upper_pos = 80 # 股票预测收益率的上分位数,高于则买入 74 | context.down_pos = 20 # 股票预测收益率的下分位数,低于则卖出 75 | context.cash_rate = 0.6 # 计算可用资金比例的分子,利益大于0的股票越多,比例越小 76 | 77 | # 确保月初调仓 78 | days = get_trading_days('SSE', '2016-01-01', '2018-09-30') 79 | months = np.vectorize(lambda x: x.month)(days) 80 | month_begin = days[pd.Series(months) != pd.Series(months).shift(1)] 81 | context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist() 82 | 83 | 84 | def on_data(context): 85 | context.Num = context.Num + 1 86 | if context.Num < context.Len: # 如果交易日个数小于Len+1,则进入下一个交易日进行回测 87 | return 88 | if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin: # 调仓频率为月,月初开始调仓 89 | return 90 | 91 | # 获取数据: 92 | KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True) 93 | FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len, 94 | df=True) # 获取因子数据 95 | 96 | # 特征构建: 97 | Fcode = context.FactorCode # 标签不需要代号了 98 | 99 | # 数据存储变量: 100 | # Close 字段为标签,Fcode 为标签 101 | FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode)) # 存储训练特征及标签样本 102 | FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode)) # 存储预测特征样本 103 | 104 | # K线数据序号对齐 105 | tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True) 106 | 107 | # 按标的处理数据: 108 | for i in range(300): 109 | # 训练特征集及训练标签构建: 110 | # 临时数据存储变量: 111 | FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), 112 | columns=(['idx', 'benefit'] + Fcode)) 113 | # 存储预测特征样本 114 | FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode)) 115 | 116 | # 因子数据 序号对齐, 提取当前标的的因子数据 117 | FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True) 118 | 119 | # 按特征处理数据: 120 | for FC in context.FactorCode: 121 | # 提取当前标的中与当前因子FC相同的部分 122 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 123 | FactorData0[FC] = FCData[0] # 存储上一个月初的股票因子数据 124 | 125 | # 按标签处理数据: 126 | # 提取当前标的的前一个月的K线面板数据 127 | close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close']) 128 | # 计算当前标的在上一个月的收益率 129 | benefit = (close[context.Len - 1] - close[0]) / close[0] 130 | 131 | FactorData0['benefit'] = benefit 132 | # idx: 建立当前标的在训练样本集中的索引 133 | FactorData0['idx'] = tempIdx[i] 134 | # 合并数据:组成训练样本 135 | FactorData = FactorData.append(FactorData0, ignore_index=True) 136 | 137 | # 预测特征集构建:建立标的索引 138 | FactorDataTest0['idx'] = tempIdx[i] 139 | # 按特征处理数据,过程同建立训练特征 140 | for FC in context.FactorCode: 141 | FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True) 142 | FactorDataTest0[FC] = FCData[context.Len - 1] 143 | 144 | # 合并测试数据 145 | FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True) 146 | 147 | """ 148 | 训练集和测试集的表头字段如下 149 | FactorData DataFrame: 150 | idx | benefit | Factor 1 | Factor 2| .... 151 | benefit 作为标签,上月初Factor作为特征,此处是单因子测试,只有一个特征 152 | FactorDataTest DataFrame: 153 | idx | Factor 1 | Factor 2 | ... 154 | 本月初的因子作为预测特征 155 | """ 156 | 157 | # 数据清洗: 158 | FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 159 | FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True) # 清洗数据 160 | Idx = FactorDataTest['idx'] # 剩余标的序号 161 | 162 | # 按特征进行预处理 163 | for Factor in context.FactorCode: 164 | FactorData = filter_MAD(FactorData, Factor, 5) # 中位数去极值法 165 | FactorData[Factor] = preprocessing.scale(FactorData[Factor]) # 标准化 166 | 167 | FactorDataTest = filter_MAD(FactorDataTest, Factor, 5) # 中位数去极值法 168 | FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor]) # 标准化 169 | 170 | # 训练和预测特征构建:# 行(样本数)* 列(特征数) 171 | X = np.ones([FactorData.shape[0], len(Fcode)]) 172 | Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)]) 173 | 174 | # 循环填充特征到numpy数组中 175 | for i in range(X.shape[1]): 176 | X[:, i] = FactorData[Fcode[i]] 177 | Xtest[:, i] = FactorDataTest[Fcode[i]] 178 | 179 | # 训练样本的标签,为浮点数的收益率 180 | Y = (np.array(FactorData['benefit']).astype(float) > 0) 181 | 182 | xgb_params = {'learning_rate': 0.01, 'n_estimators': 50, 'max_depth': 10, 'min_child_weight': 5, 'seed': 1000, 183 | 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1} 184 | 185 | xgb_reg = XGBRegressor(**xgb_params) 186 | 187 | # 模型训练: 188 | xgb_reg.fit(X, Y) 189 | 190 | # 分类预测: 191 | y = xgb_reg.predict(Xtest) 192 | 193 | # 交易设置: 194 | positions = context.account().positions['volume_long'] # 多头持仓数量 195 | valid_cash = context.account(account_idx=0).cash['valid_cash'][0] # 可用资金 196 | 197 | P = context.cash_rate / (sum(y > 0) + 1) # 设置每只标的可用资金比例 + 1 防止分母为0 198 | 199 | # 获取收益率的高分位数和低分位数 200 | low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos]) 201 | 202 | for i in range(len(Idx)): 203 | position = positions.iloc[Idx[i]] 204 | # if position == 0 and y[i] == True and valid_cash > 0: # 若预测结果为true(收益率>0),买入 205 | # print('开仓') 206 | if position == 0 and y[i] > high_return and valid_cash > 0: 207 | # 开仓数量 + 1防止分母为0 208 | # print(valid_cash, P, KData['close'][Idx[i]]) # 这里的数目可考虑减少一点,,有时太多有时太少 209 | Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100) 210 | 211 | # 控制委托量,不要过大或过小,需要保证是100的倍数 212 | if Num < 1000: 213 | Num *= 10 214 | if Num > 100000: 215 | Num = int(Num / 10) 216 | Num -= Num % 100 217 | if Num <= 0: # 不开仓 218 | continue 219 | 220 | print("开仓数量为:{}".format(Num)) 221 | order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2, 222 | price=0) # 指定委托量开仓 223 | # 对订单号为order_id的委托单设置止损,止损距离10个整数点,触发时,委托的方式用市价委托 224 | # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2) 225 | # elif position > 0 and y[i] == False: #预测结果为false(收益率<0),卖出 226 | elif position > 0 and y[i] < low_return: # 当前持仓,且该股票收益小于低30%分位数,则平仓,卖出 227 | print("平仓,数量为: {}".format(position / 10 + 100)) 228 | order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10), 229 | side=2, position_effect=2, order_type=2, price=0) # 指定委托量平仓 230 | 231 | 232 | if __name__ == '__main__': 233 | 234 | file_path = 'xgb_model.py' 235 | block = 'hs300' 236 | 237 | begin_date = '2016-01-01' 238 | end_date = '2018-09-30' 239 | 240 | strategy_name = 'xgb' 241 | 242 | run_backtest(strategy_name=strategy_name, file_path=file_path, 243 | target_list=list(get_code_list('hs300', date=begin_date)['code']), 244 | frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1) 245 | -------------------------------------------------------------------------------- /价值类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/价值类.png -------------------------------------------------------------------------------- /基础类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/基础类.png -------------------------------------------------------------------------------- /情绪类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/情绪类.png -------------------------------------------------------------------------------- /每股指标类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/每股指标类.png -------------------------------------------------------------------------------- /特色技术指标类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/特色技术指标类.png -------------------------------------------------------------------------------- /行业分析师类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/行业分析师类.png -------------------------------------------------------------------------------- /质量类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/质量类.png --------------------------------------------------------------------------------