├── .gitignore
├── .vscode
    └── settings.json
├── A.docx
├── GBDT.py
├── MLP.py
├── README.md
├── RF_line3.py
├── adaboost_model.py
├── data_exploration.ipynb
├── factor_analysis.py
├── find_factor.py
├── get_factor_report.py
├── lstm.py
├── multi_factor_lr.py
├── random_forest_reg.py
├── references
    ├── 2011年金融工程研讨会专题报告系列之二：大浪淘金，Alpha因子何处寻？.pdf
    ├── A.pdf
    ├── A股Alpha策略及产品回顾与展望——2018年金融工程年度报告.pdf
    ├── A题—通过机器学习优化股票多因子模型解题指引.pdf
    ├── SA20190100000_36930159.pdf
    ├── 人工智能选股框架及经典算法简介.pdf
    ├── 华泰证券-多因子系列之一：华泰多因子模型体系初探-160921.pdf
    ├── 单因子测试.PDF
    ├── 收益预测模型.PDF
    └── 风险模型与组合优化.PDF
├── run_test.bat
├── same_weight_model.py
├── single_factor_test.py
├── svm.py
├── time_roll_model.py
├── xgb_model.py
├── 价值类.png
├── 基础类.png
├── 情绪类.png
├── 每股指标类.png
├── 特色技术指标类.png
├── 行业分析师类.png
└── 质量类.png


/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | .idea
3 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "C:\\Users\\Qiuyh\\AppData\\Local\\Programs\\Python\\Python37\\python.exe"
3 | }


--------------------------------------------------------------------------------
/A.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/A.docx


--------------------------------------------------------------------------------
/GBDT.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：TotalAssetGrowRate
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | ----------------------------------------------------------
 20 | """
 21 | from atrader import *
 22 | import pandas as pd
 23 | import numpy as np
 24 | from sklearn import svm
 25 | import math
 26 | from sklearn import preprocessing
 27 | import datetime
 28 | from sklearn.ensemble import AdaBoostRegressor
 29 | from sklearn.ensemble import GradientBoostingRegressor
 30 | # 作为全局变量进行测试
 31 | 
 32 | 
 33 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 34 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 35 |               'AD', 'TotalAssetGrowRate', 'MA120']
 36 | 
 37 | 
 38 | # 中位数去极值法
 39 | def filter_MAD(df, factor, n=3):
 40 |     """
 41 |     :param df: 去极值的因子序列
 42 |     :param factor: 待去极值的因子
 43 |     :param n: 中位数偏差值的上下界倍数
 44 |     :return: 经过处理的因子dataframe
 45 |     """
 46 |     median = df[factor].quantile(0.5)
 47 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 48 |     max_range = median + n * new_median
 49 |     min_range = median - n * new_median
 50 | 
 51 |     for i in range(df.shape[0]):
 52 |         if df.loc[i, factor] > max_range:
 53 |             df.loc[i, factor] = max_range
 54 |         elif df.loc[i, factor] < min_range:
 55 |             df.loc[i, factor] = min_range
 56 |     return df
 57 | 
 58 | 
 59 | def init(context):
 60 |     # 账号设置：设置初始资金为 10000000 元
 61 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 62 |                  price_loc=1, deal_type=0, limit_type=0)
 63 |     # 注册数据：日频数据
 64 |     reg_kdata('day', 1)
 65 |     global FactorCode  # 全局单因子代号
 66 |     reg_factor(factor=FactorCode)
 67 |     print("init 函数, 注册因子为{}".format(FactorCode[0]))
 68 |     context.FactorCode = FactorCode  #
 69 | 
 70 |     # 超参数设置：
 71 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 72 |     context.Num = 0   # 记录当前交易日个数
 73 | 
 74 |     # 较敏感的超参数，需要调节
 75 |     context.upper_pos = 75  # 股票预测收益率的上分位数，高于则买入
 76 |     context.down_pos = 25   # 股票预测收益率的下分位数，低于则卖出
 77 |     context.cash_rate = 0.5  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 78 | 
 79 |     # 确保月初调仓
 80 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 81 |     months = np.vectorize(lambda x: x.month)(days)
 82 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 83 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 84 | 
 85 | 
 86 | def on_data(context):
 87 |     context.Num = context.Num + 1
 88 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 89 |         return
 90 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 91 |         return
 92 | 
 93 |     # 获取数据：
 94 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 95 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 96 |                            df=True)  # 获取因子数据
 97 | 
 98 |     # 特征构建：
 99 |     Fcode = context.FactorCode  # 标签不需要代号了
100 | 
101 |     # 数据存储变量：
102 |     # Close 字段为标签，Fcode 为标签
103 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
104 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
105 | 
106 |     # K线数据序号对齐
107 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
108 | 
109 |     # 按标的处理数据：
110 |     for i in range(300):
111 |         # 训练特征集及训练标签构建：
112 |         # 临时数据存储变量:
113 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
114 |             columns=(['idx', 'benefit'] + Fcode))
115 |         # 存储预测特征样本
116 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
117 | 
118 |         # 因子数据 序号对齐, 提取当前标的的因子数据
119 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
120 | 
121 |         # 按特征处理数据：
122 |         for FC in context.FactorCode:
123 |             # 提取当前标的中与当前因子FC相同的部分
124 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
125 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
126 | 
127 |         # 按标签处理数据：
128 |         # 提取当前标的的前一个月的K线面板数据
129 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
130 |         # 计算当前标的在上一个月的收益率
131 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
132 | 
133 |         FactorData0['benefit'] = benefit
134 |         # idx: 建立当前标的在训练样本集中的索引
135 |         FactorData0['idx'] = tempIdx[i]
136 |         # 合并数据：组成训练样本
137 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
138 | 
139 |         # 预测特征集构建：建立标的索引
140 |         FactorDataTest0['idx'] = tempIdx[i]
141 |         # 按特征处理数据，过程同建立训练特征
142 |         for FC in context.FactorCode:
143 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
144 |             FactorDataTest0[FC] = FCData[context.Len - 1]
145 | 
146 |         # 合并测试数据
147 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
148 | 
149 |     """
150 |     训练集和测试集的表头字段如下
151 |     FactorData DataFrame:
152 |     idx  |  benefit |  Factor 1 | Factor 2| ....
153 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
154 |     FactorDataTest DataFrame: 
155 |     idx | Factor 1 | Factor 2 | ...
156 |     本月初的因子作为预测特征
157 |     """
158 | 
159 |     # 数据清洗：
160 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
161 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
162 |     Idx = FactorDataTest['idx']  # 剩余标的序号
163 | 
164 |     # 按特征进行预处理
165 |     for Factor in context.FactorCode:
166 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
167 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
168 | 
169 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
170 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
171 | 
172 | 
173 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
174 |     X = np.ones([FactorData.shape[0], len(Fcode)])
175 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
176 | 
177 |     # 循环填充特征到numpy数组中
178 |     for i in range(X.shape[1]):
179 |         X[:, i] = FactorData[Fcode[i]]
180 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
181 | 
182 |     # 训练样本的标签，为浮点数的收益率
183 |     Y = (np.array(FactorData['benefit']).astype(float) > 0)
184 | 
185 |     gbdt_reg = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1,
186 |                              max_depth=8, random_state=1, loss='ls')
187 | 
188 |     # 模型训练：
189 |     gbdt_reg.fit(X, Y)
190 | 
191 |     # LR分类预测：
192 |     y = gbdt_reg.predict(Xtest)
193 | 
194 |     # 交易设置：
195 |     positions = context.account().positions['volume_long']  # 多头持仓数量
196 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
197 | 
198 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
199 | 
200 |     # 获取收益率的高分位数和低分位数
201 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
202 | 
203 |     for i in range(len(Idx)):
204 |         position = positions.iloc[Idx[i]]
205 |         # if position == 0 and y[i] == True and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
206 |             # print('开仓')
207 |         if position == 0 and y[i] > high_return and valid_cash > 0 :
208 |             # 开仓数量 + 1防止分母为0
209 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
210 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
211 | 
212 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
213 |             if Num < 1000:
214 |                 Num *= 10
215 |             if Num > 100000:
216 |                 Num = int(Num / 10)
217 |                 Num -= Num % 100
218 |             if Num <= 0:  # 不开仓
219 |                 continue
220 | 
221 |             print("开仓数量为：{}".format(Num))
222 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
223 |                          price=0)  # 指定委托量开仓
224 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
225 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
226 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
227 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
228 |             print("平仓，数量为: {}".format(position / 10))
229 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
230 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
231 | 
232 | 
233 | if __name__ == '__main__':
234 |     file_path = 'GBDT.py'
235 |     block = 'hs300'
236 | 
237 |     begin_date = '2016-01-01'
238 |     end_date = '2018-09-30'
239 | 
240 |     strategy_name = 'GBDT'
241 | 
242 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
243 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
244 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
245 | 


--------------------------------------------------------------------------------
/MLP.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：TotalAssetGrowRate
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | ----------------------------------------------------------
 20 | """
 21 | from atrader import *
 22 | import pandas as pd
 23 | import numpy as np
 24 | from sklearn import svm
 25 | import math
 26 | from sklearn import preprocessing
 27 | import datetime
 28 | from sklearn.neural_network import MLPRegressor
 29 | 
 30 | # 作为全局变量进行测试
 31 | 
 32 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'TVMA20', 'EnterpriseFCFPS',
 33 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'GrossProfit', 'FY12P',
 34 |               'AD', 'TotalAssetGrowRate', 'MA120']
 35 | 
 36 | 
 37 | # 中位数去极值法
 38 | def filter_MAD(df, factor, n=3):
 39 |     """
 40 |     :param df: 去极值的因子序列
 41 |     :param factor: 待去极值的因子
 42 |     :param n: 中位数偏差值的上下界倍数
 43 |     :return: 经过处理的因子dataframe
 44 |     """
 45 |     median = df[factor].quantile(0.5)
 46 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 47 |     max_range = median + n * new_median
 48 |     min_range = median - n * new_median
 49 | 
 50 |     for i in range(df.shape[0]):
 51 |         if df.loc[i, factor] > max_range:
 52 |             df.loc[i, factor] = max_range
 53 |         elif df.loc[i, factor] < min_range:
 54 |             df.loc[i, factor] = min_range
 55 |     return df
 56 | 
 57 | 
 58 | def init(context):
 59 |     # 账号设置：设置初始资金为 10000000 元
 60 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 61 |                  price_loc=1, deal_type=0, limit_type=0)
 62 |     # 注册数据：日频数据
 63 |     reg_kdata('day', 1)
 64 |     global FactorCode  # 全局单因子代号
 65 |     reg_factor(factor=FactorCode)
 66 |     context.FactorCode = FactorCode  #
 67 | 
 68 |     # 超参数设置：
 69 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 70 |     context.Num = 0   # 记录当前交易日个数
 71 | 
 72 |     # 较敏感的超参数，需要调节
 73 |     context.upper_pos = 85  # 股票预测收益率的上分位数，高于则买入
 74 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 75 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 76 | 
 77 |     # 确保月初调仓
 78 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 79 |     months = np.vectorize(lambda x: x.month)(days)
 80 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 81 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 82 | 
 83 | 
 84 | def on_data(context):
 85 |     context.Num = context.Num + 1
 86 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 87 |         return
 88 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 89 |         return
 90 | 
 91 |     # 获取数据：
 92 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 93 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 94 |                            df=True)  # 获取因子数据
 95 | 
 96 |     # 特征构建：
 97 |     Fcode = context.FactorCode  # 标签不需要代号了
 98 | 
 99 |     # 数据存储变量：
100 |     # Close 字段为标签，Fcode 为标签
101 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
102 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
103 | 
104 |     # K线数据序号对齐
105 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
106 | 
107 |     # 按标的处理数据：
108 |     for i in range(300):
109 |         # 训练特征集及训练标签构建：
110 |         # 临时数据存储变量:
111 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
112 |             columns=(['idx', 'benefit'] + Fcode))
113 |         # 存储预测特征样本
114 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
115 | 
116 |         # 因子数据 序号对齐, 提取当前标的的因子数据
117 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
118 | 
119 |         # 按特征处理数据：
120 |         for FC in context.FactorCode:
121 |             # 提取当前标的中与当前因子FC相同的部分
122 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
123 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
124 | 
125 |         # 按标签处理数据：
126 |         # 提取当前标的的前一个月的K线面板数据
127 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
128 |         # 计算当前标的在上一个月的收益率
129 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
130 | 
131 |         FactorData0['benefit'] = benefit
132 |         # idx: 建立当前标的在训练样本集中的索引
133 |         FactorData0['idx'] = tempIdx[i]
134 |         # 合并数据：组成训练样本
135 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
136 | 
137 |         # 预测特征集构建：建立标的索引
138 |         FactorDataTest0['idx'] = tempIdx[i]
139 |         # 按特征处理数据，过程同建立训练特征
140 |         for FC in context.FactorCode:
141 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
142 |             FactorDataTest0[FC] = FCData[context.Len - 1]
143 | 
144 |         # 合并测试数据
145 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
146 | 
147 |     """
148 |     训练集和测试集的表头字段如下
149 |     FactorData DataFrame:
150 |     idx  |  benefit |  Factor 1 | Factor 2| ....
151 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
152 |     FactorDataTest DataFrame: 
153 |     idx | Factor 1 | Factor 2 | ...
154 |     本月初的因子作为预测特征
155 |     """
156 | 
157 |     # 数据清洗：
158 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
159 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
160 |     Idx = FactorDataTest['idx']  # 剩余标的序号
161 | 
162 |     # 按特征进行预处理
163 |     for Factor in context.FactorCode:
164 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
165 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
166 | 
167 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
168 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
169 | 
170 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
171 |     X = np.ones([FactorData.shape[0], len(Fcode)])
172 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
173 | 
174 |     # 循环填充特征到numpy数组中
175 |     for i in range(X.shape[1]):
176 |         X[:, i] = FactorData[Fcode[i]]
177 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
178 | 
179 |     # 训练样本的标签，为浮点数的收益率
180 |     Y = (np.array(FactorData['benefit']).astype(float) > 0)
181 | 
182 |     mlp = MLPRegressor(hidden_layer_sizes=4, activation='logistic', solver='adam',
183 |                         max_iter=50)
184 | 
185 |     # 模型训练：
186 |     mlp.fit(X, Y)
187 | 
188 |     # LR分类预测：
189 |     y = mlp.predict(Xtest)
190 | 
191 |     # 交易设置：
192 |     positions = context.account().positions['volume_long']  # 多头持仓数量
193 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
194 | 
195 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
196 | 
197 |     # 获取收益率的高分位数和低分位数
198 |     low_return,high_return = np.percentile(y, [context.down_pos, context.upper_pos])
199 | 
200 |     for i in range(len(Idx)):
201 |         position = positions.iloc[Idx[i]]
202 |         # if position == 0 and y[i] == True and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
203 |             # print('开仓')
204 |         if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓，且该股票收益大于高70%分位数，则开仓，买入
205 |             # 开仓数量 + 1防止分母为0
206 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
207 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
208 | 
209 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
210 |             if Num < 1000:
211 |                 Num *= 10
212 |             if Num > 100000:
213 |                 Num = int(Num / 10)
214 |                 Num -= Num % 100
215 |             if Num <= 0:  # 不开仓
216 |                 continue
217 | 
218 |             print("开仓数量为：{}".format(Num))
219 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
220 |                          price=0)  # 指定委托量开仓
221 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
222 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
223 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
224 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
225 |             print("平仓，数量为: {}".format(position / 10))
226 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
227 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
228 | 
229 | 
230 | if __name__ == '__main__':
231 | 
232 |     file_path = 'MLP.py'
233 |     block = 'hs300'
234 | 
235 |     begin_date = '2016-01-01'
236 |     end_date = '2018-09-30'
237 | 
238 |     strategy_name = 'MLP'
239 | 
240 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
241 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
242 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
243 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  * @Author: your name
 3 |  * @Date: 2020-04-10 00:48:27
 4 |  * @LastEditTime: 2020-04-10 00:49:57
 5 |  * @LastEditors: Please set LastEditors
 6 |  * @Description: In User Settings Edit
 7 |  * @FilePath: \TIDIBEI\README.md
 8 |  -->
 9 | # Tidy-QuantTrading
10 | 泰迪杯数据挖掘比赛协作仓库。——基于机器学习方法构建多因子选股模型。
11 | > Group Members：XiaoRu Chen，Xiaoling Ling，Yihao Qiu
12 | 
13 | ## 成果
14 | 最优的随机森林模型：累计收益60%左右，经择时策略风险控制后，最大回撤率控制在9%左右，夏普率为0.9左右。
15 | 
16 | ### 单因子测试
17 | 
18 | 确定一个单因子测试文件，定义待测因子列表，执行多次单因子runtest。
19 | - 保留回测报告，获取字段，保存在CSV文件。
20 | - 结果可视化。
21 | - 筛选得到最优因子。
22 | - 因子做共线性分析，获取最终因子。
23 | 
24 | > _**官方提示：**_ 单因子问题，可以手动实现一下，或者可以弱化一下，通过间接的方法去实现这个问题，并不一定要实现一个完整的回测框架才能解决单因子分析的问题
25 | 
26 | ### 选用机器学习模型回测
27 | 
28 | - 特征和标签构建。
29 | - 等权重线性模型。
30 | - 建立baseline models，尝试使用多种模型。SVR，RNN(LSTM)，xgboost, random_forest，adaboost...
31 | - 交易逻辑确定。
32 | - 回测结果记录，分析。
33 | 
34 | 
35 | #### 关于模型的一些设想
36 | - 可参考论文[GBDT提取特征 + SVM二分类的方法](https://github.com/JoshuaQYH/TIDIBEI/blob/master/references/SA20190100000_36930159.pdf)
37 | - [LSTM进行选股](https://qiniu-images.datayes.com/huatai9.pdf)（在月频数据较少，可能效果不好）
38 | - [Adaboost](http://pg.jrj.com.cn/acc/Res/CN_RES/INVEST/2016/5/31/ed36ae43-0f6e-4051-bb9c-2e9a67632d74.pdf), randomforest, svm([启发式](http://or.nsfc.gov.cn/bitstream/00001903-5/353458/1/1000008947591.pdf)），[xgboost](https://cloud.tencent.com/developer/article/1137060)等等进行集成。如[Stacking](https://cloud.tencent.com/developer/article/1137060),bagging. 
39 | 
40 | ### 风险控制
41 | - 风险模型：barra模型
42 | - 择时模型：三均线择时策略。
43 | 
44 | ## 文件说明
45 | - `data_exploration.ipynb`: atrader API调用测试文件。
46 | - `get_factor_report.py`: 当单因子回测结束之后，执行文件，得到策略字段。
47 | - `single_factor_test.py`: 单因子测试文件。
48 | - `find_factor.py`: 自实现的因子绩效分析文件(**已弃用**)
49 | - `run_test.bat`: 脚本自动化运行python程序，实现多次执行策略。
50 | - 'factor_analysis': 类内因子共线性分析文件，绘制相关系数矩阵。
51 | - 以模型名标识模型回测文件。
52 | 
53 | ## LINK 
54 | - [AutoTrader 官方API文档](https://www.digquant.com.cn/documents/17#h1-u5FEBu901Fu5F00u59CB-0)
55 | - [股票交易名词解释: 多头，空头，平仓，持仓，调仓....](http://stock.hexun.com/menu/stepbystep/step3.html)
56 | - [头寸解释](https://wiki.mbalib.com/wiki/%E5%A4%B4%E5%AF%B8)
57 | - [阮一峰常用git命令清单](http://www.ruanyifeng.com/blog/2015/12/git-cheat-sheet.html)
58 | - [点宽因子数据字典](https://www.digquant.com.cn/documents/23)
59 | - [名词解释：IC/IR](https://xueqiu.com/1652627245/108835836)
60 | - [名词解释：alpha值/beta值](https://blog.csdn.net/yezi113yezi/article/details/81078128)
61 | - [A题华师现场解读](https://edu.tipdm.org/)
62 | - [人工智能阿尔法策略框架-对282个因子的分析](https://www.jiqizhixin.com/articles/2019-01-26-5)
63 | 


--------------------------------------------------------------------------------
/RF_line3.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：TotalAssetGrowRate
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | 
 20 | ----------------------------------------------------------
 21 | """
 22 | from atrader import *
 23 | import pandas as pd
 24 | import numpy as np
 25 | from sklearn.ensemble import RandomForestRegressor
 26 | import math
 27 | from sklearn import preprocessing
 28 | import datetime
 29 | 
 30 | # 作为全局变量进行测试
 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 32 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 33 |               'AD', 'TotalAssetGrowRate', 'MA120']
 34 | 
 35 | # 中位数去极值法
 36 | def filter_MAD(df, factor, n=3):
 37 |     """
 38 |     :param df: 去极值的因子序列
 39 |     :param factor: 待去极值的因子
 40 |     :param n: 中位数偏差值的上下界倍数
 41 |     :return: 经过处理的因子dataframe
 42 |     """
 43 |     median = df[factor].quantile(0.5)
 44 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 45 |     max_range = median + n * new_median
 46 |     min_range = median - n * new_median
 47 | 
 48 |     for i in range(df.shape[0]):
 49 |         if df.loc[i, factor] > max_range:
 50 |             df.loc[i, factor] = max_range
 51 |         elif df.loc[i, factor] < min_range:
 52 |             df.loc[i, factor] = min_range
 53 |     return df
 54 | 
 55 | 
 56 | def init(context):
 57 | 
 58 |     # context.SVM = svm.SVC(gamma='scale')
 59 |     # 账号设置：设置初始资金为 10000000 元
 60 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 61 |                  price_loc=1, deal_type=0, limit_type=0)
 62 |     # 注册数据：日频数据
 63 |     reg_kdata('day', 1)
 64 |     global FactorCode  # 全局单因子代号
 65 |     reg_factor(factor=FactorCode)
 66 | 
 67 |     context.FactorCode = FactorCode  #
 68 | 
 69 |     # 超参数设置：
 70 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 71 |     context.Num = 0   # 记录当前交易日个数
 72 | 
 73 |     # 较敏感的超参数，需要调节
 74 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 75 |     context.down_pos = 40   # 股票预测收益率的下分位数，低于则卖出
 76 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 77 | 
 78 |     # 确保月初调仓
 79 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 80 |     months = np.vectorize(lambda x: x.month)(days)
 81 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 82 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 83 | 
 84 |     # 三均线择时策略
 85 |     # 无持仓的情况下，5日和20日均线都大于60日均线，买入，等价于5日和20日均线上穿60日均线，买入；
 86 |     # 有持仓的情况下，5日和20日均线都小于60日均线，卖出，等价于5日和20日均线上穿60日均线，买入；
 87 |     context.win = 61  # 计算所需总数据长度
 88 |     context.win5 = 5  # 5日均线参数
 89 |     context.win20 = 20  # 20日均线参数
 90 |     context.win60 = 60  # 60日均线参数
 91 | 
 92 | def on_data(context):
 93 |     context.Num = context.Num + 1
 94 |     if context.Num < context.win:  # 如果交易日个数小于win，则进入下一个交易日进行回测
 95 |         return
 96 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 97 |         return
 98 | 
 99 |     # 获取数据：
100 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
101 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
102 |                            df=True)  # 获取因子数据
103 | 
104 |     # 特征构建：
105 |     Fcode = context.FactorCode  # 标签不需要代号了
106 | 
107 |     # 数据存储变量：
108 |     # Close 字段为标签，Fcode 为标签
109 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
110 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
111 | 
112 |     # K线数据序号对齐
113 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
114 | 
115 |     # 按标的处理数据：
116 |     for i in range(300):
117 |         # 训练特征集及训练标签构建：
118 |         # 临时数据存储变量:
119 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
120 |             columns=(['idx', 'benefit'] + Fcode))
121 |         # 存储预测特征样本
122 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
123 | 
124 |         # 因子数据 序号对齐, 提取当前标的的因子数据
125 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
126 | 
127 |         # 按特征处理数据：
128 |         for FC in context.FactorCode:
129 |             # 提取当前标的中与当前因子FC相同的部分
130 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
131 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
132 | 
133 |         # 按标签处理数据：
134 |         # 提取当前标的的前一个月的K线面板数据
135 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
136 |         # 计算当前标的在上一个月的收益率
137 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
138 | 
139 |         FactorData0['benefit'] = benefit
140 |         # idx: 建立当前标的在训练样本集中的索引
141 |         FactorData0['idx'] = tempIdx[i]
142 |         # 合并数据：组成训练样本
143 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
144 | 
145 |         # 预测特征集构建：建立标的索引
146 |         FactorDataTest0['idx'] = tempIdx[i]
147 |         # 按特征处理数据，过程同建立训练特征
148 |         for FC in context.FactorCode:
149 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
150 |             FactorDataTest0[FC] = FCData[context.Len - 1]
151 | 
152 |         # 合并测试数据
153 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
154 | 
155 |     """
156 |     训练集和测试集的表头字段如下
157 |     FactorData DataFrame:
158 |     idx  |  benefit |  Factor 1 | Factor 2| ....
159 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
160 |     FactorDataTest DataFrame: 
161 |     idx | Factor 1 | Factor 2 | ...
162 |     本月初的因子作为预测特征
163 |     """
164 | 
165 |     # 数据清洗：
166 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
167 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
168 |     Idx = FactorDataTest['idx']  # 剩余标的序号
169 | 
170 |     # 按特征进行预处理
171 |     for Factor in context.FactorCode:
172 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
173 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
174 | 
175 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
176 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
177 | 
178 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
179 |     X = np.ones([FactorData.shape[0], len(Fcode)])
180 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
181 | 
182 |     # 循环填充特征到numpy数组中
183 |     for i in range(X.shape[1]):
184 |         X[:, i] = FactorData[Fcode[i]]
185 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
186 | 
187 |     # 训练样本的标签，为浮点数的收益率
188 |     Y = np.array(FactorData['benefit']).astype(float)
189 | 
190 |     random_forest = RandomForestRegressor(max_depth=5, n_estimators=50)
191 | 
192 |     # 模型训练：
193 |     random_forest.fit(X, Y)
194 | 
195 |     # LR分类预测：
196 |     y = random_forest.predict(Xtest)
197 |     # 交易设置：
198 |     positions = context.account().positions['volume_long']  # 多头持仓数量
199 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
200 | 
201 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
202 | 
203 |     # 获取收益率的高分位数和低分位数
204 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
205 | 
206 |     # 进行择时准备
207 |     # 获取前61天的数据
208 |     data = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.win, fill_up=True,
209 |                          df=True)  # data值为数据帧DataFrame类型，存储所有标的的K线行情数据。
210 |     # 获取收盘价数据
211 |     close = data.close.values.reshape(-1, context.win).astype(float)  # 从data行情数据中获取收盘价，并转为ndarray数据类型
212 |     # 计算均线值：
213 |     ma5 = close[:, -context.win5:].mean(axis=1)    # 5日均线
214 |     ma20 = close[:, -context.win20:].mean(axis=1)  # 20日均线
215 |     ma60 = close[:, -context.win60:].mean(axis=1)  # 60日均线
216 | 
217 |     # 获取标的序号：从0~299
218 |     target = np.array(range(300))
219 |     positions_val = context.account().positions['volume_long'].values  # 多头持仓数量
220 |     # 计算买入信号：
221 |     buy_signal = np.logical_and(positions_val == 0, ma5 > ma60,
222 |                                 ma20 > ma60)  # 无持仓的情况下，5日和20日均线都大于60日均线，买入，等价于5日和20日均线上穿60日均线，买入；
223 |     # 计算卖出信号：
224 |     sell_signal = np.logical_and(positions_val > 0, ma5 < ma60,
225 |                                  ma20 < ma60)  # 有持仓的情况下，5日和20日均线都小于60日均线，卖出，等价于5日和20日均线上穿60日均线，买入；
226 |     # 获取买入信号标的的序号
227 |     target_buy = target[buy_signal].tolist()  # 一个记录了标的是否要买
228 |     # 获取卖出信号标的的序号
229 |     target_sell = target[sell_signal].tolist() # 同上
230 |     for i in range(len(Idx)):
231 |         position = positions.iloc[Idx[i]]
232 | 
233 |         # 当前无仓，且该股票收益大于高80%分位数，且5日和20日均线都大于或等于60日均线 则开仓，买入
234 |         if position == 0 and y[i] > high_return and valid_cash > 0 and Idx[i] in target_buy:
235 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
236 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
237 |             if Num < 1000:
238 |                 Num *= 10
239 |             if Num > 100000:
240 |                 Num = int(Num / 10)
241 |                 Num -= Num % 100
242 |             if Num <= 0:  # 不开仓
243 |                 continue
244 |             print("开仓数量为：{}".format(Num))
245 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
246 |                          price=0)  # 指定委托量开仓
247 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
248 |             stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=15, order_type=2)
249 | 
250 |         # 当前持仓，且该股票收益小于低20%分位数，5日和20日均线都小于60日均线 则平仓，卖出
251 |         elif position > 0 and y[i] < low_return and Idx[i] in target_sell:
252 |             print("平仓，数量为: {}".format(position))
253 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position),
254 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
255 | 
256 | 
257 | if __name__ == '__main__':
258 |     file_path = 'RF_line3.py'
259 |     block = 'hs300'
260 | 
261 |     begin_date = '2016-01-01'
262 |     end_date = '2018-09-30'
263 | 
264 |     strategy_name = 'RF_line3'
265 | 
266 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
267 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
268 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
269 | 


--------------------------------------------------------------------------------
/adaboost_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20/PVI
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit/NIAP
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：AD
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | ----------------------------------------------------------
 20 | """
 21 | from atrader import *
 22 | import pandas as pd
 23 | import numpy as np
 24 | from sklearn import svm
 25 | import math
 26 | from sklearn import preprocessing
 27 | import datetime
 28 | from sklearn.tree import DecisionTreeRegressor
 29 | from sklearn.ensemble import AdaBoostRegressor
 30 | from sklearn.decomposition import PCA
 31 | 
 32 | # 作为全局变量进行测试
 33 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 34 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 35 |               'AD', 'TotalAssetGrowRate', 'MA120']
 36 | 
 37 | # 中位数去极值法
 38 | def filter_MAD(df, factor, n=3):
 39 |     """
 40 |     :param df: 去极值的因子序列
 41 |     :param factor: 待去极值的因子
 42 |     :param n: 中位数偏差值的上下界倍数
 43 |     :return: 经过处理的因子dataframe
 44 |     """
 45 |     median = df[factor].quantile(0.5)
 46 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 47 |     max_range = median + n * new_median
 48 |     min_range = median - n * new_median
 49 | 
 50 |     for i in range(df.shape[0]):
 51 |         if df.loc[i, factor] > max_range:
 52 |             df.loc[i, factor] = max_range
 53 |         elif df.loc[i, factor] < min_range:
 54 |             df.loc[i, factor] = min_range
 55 |     return df
 56 | 
 57 | 
 58 | def init(context):
 59 |     # 账号设置：设置初始资金为 10000000 元
 60 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 61 |                  price_loc=1, deal_type=0, limit_type=0)
 62 |     # 注册数据：日频数据
 63 |     reg_kdata('day', 1)
 64 |     global FactorCode  # 全局单因子代号
 65 |     reg_factor(factor=FactorCode)
 66 |     print("init 函数, 注册因子为{}".format(FactorCode[0]))
 67 |     context.FactorCode = FactorCode  #
 68 | 
 69 |     # 超参数设置：
 70 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 71 |     context.Num = 0   # 记录当前交易日个数
 72 | 
 73 |     # 较敏感的超参数，需要调节
 74 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 75 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 76 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 77 | 
 78 |     # 确保月初调仓
 79 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 80 |     months = np.vectorize(lambda x: x.month)(days)
 81 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 82 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 83 | 
 84 | 
 85 | def on_data(context):
 86 |     context.Num = context.Num + 1
 87 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 88 |         return
 89 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 90 |         return
 91 | 
 92 |     # 获取数据：
 93 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 94 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 95 |                            df=True)  # 获取因子数据
 96 | 
 97 |     # 特征构建：
 98 |     Fcode = context.FactorCode  # 标签不需要代号了
 99 | 
100 |     # 数据存储变量：
101 |     # Close 字段为标签，Fcode 为标签
102 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
103 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
104 | 
105 |     # K线数据序号对齐
106 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
107 | 
108 |     # 按标的处理数据：
109 |     for i in range(300):
110 |         # 训练特征集及训练标签构建：
111 |         # 临时数据存储变量:
112 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
113 |             columns=(['idx', 'benefit'] + Fcode))
114 |         # 存储预测特征样本
115 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
116 | 
117 |         # 因子数据 序号对齐, 提取当前标的的因子数据
118 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
119 | 
120 |         # 按特征处理数据：
121 |         for FC in context.FactorCode:
122 |             # 提取当前标的中与当前因子FC相同的部分
123 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
124 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
125 | 
126 |         # 按标签处理数据：
127 |         # 提取当前标的的前一个月的K线面板数据
128 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
129 |         # 计算当前标的在上一个月的收益率
130 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
131 | 
132 |         FactorData0['benefit'] = benefit
133 |         # idx: 建立当前标的在训练样本集中的索引
134 |         FactorData0['idx'] = tempIdx[i]
135 |         # 合并数据：组成训练样本
136 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
137 | 
138 |         # 预测特征集构建：建立标的索引
139 |         FactorDataTest0['idx'] = tempIdx[i]
140 |         # 按特征处理数据，过程同建立训练特征
141 |         for FC in context.FactorCode:
142 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
143 |             FactorDataTest0[FC] = FCData[context.Len - 1]
144 | 
145 |         # 合并测试数据
146 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
147 | 
148 |     """
149 |     训练集和测试集的表头字段如下
150 |     FactorData DataFrame:
151 |     idx  |  benefit |  Factor 1 | Factor 2| ....
152 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
153 |     FactorDataTest DataFrame: 
154 |     idx | Factor 1 | Factor 2 | ...
155 |     本月初的因子作为预测特征
156 |     """
157 | 
158 |     # 数据清洗：
159 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
160 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
161 |     Idx = FactorDataTest['idx']  # 剩余标的序号
162 | 
163 |     # 按特征进行预处理
164 |     for Factor in context.FactorCode:
165 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
166 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
167 | 
168 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
169 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
170 | 
171 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
172 |     X = np.ones([FactorData.shape[0], len(Fcode)])
173 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
174 | 
175 |     # 循环填充特征到numpy数组中
176 |     for i in range(X.shape[1]):
177 |         X[:, i] = FactorData[Fcode[i]]
178 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
179 | 
180 |     # 训练样本的标签，为浮点数的收益率
181 |     Y = (np.array(FactorData['benefit']).astype(float) > 0)
182 | 
183 |     rng = np.random.RandomState(1)
184 |     adaboost = AdaBoostRegressor(DecisionTreeRegressor(max_depth=9),
185 |                                n_estimators=60, random_state=rng)
186 | 
187 |     pca = PCA(n_components=5)
188 | 
189 |     X_pca = pca.fit_transform(X)
190 |     Xtest_pca = pca.fit_transform(Xtest)
191 | 
192 |     # 模型训练：
193 |     adaboost.fit(X_pca, Y)
194 | 
195 |     # 分类预测：
196 |     y = adaboost.predict(Xtest_pca)
197 | 
198 |     # 交易设置：
199 |     positions = context.account().positions['volume_long']  # 多头持仓数量
200 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
201 | 
202 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
203 | 
204 |     # 获取收益率的高分位数和低分位数
205 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
206 | 
207 |     for i in range(len(Idx)):
208 |         position = positions.iloc[Idx[i]]
209 |         # if position == 0 and y[i] == True and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
210 |             # print('开仓')
211 |         if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓，且该股票收益大于高70%分位数，则开仓，买入
212 |             # 开仓数量 + 1防止分母为0
213 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
214 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
215 | 
216 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
217 |             if Num < 1000:
218 |                 Num *= 10
219 |             if Num > 100000:
220 |                 Num = int(Num / 10)
221 |                 Num -= Num % 100
222 |             if Num <= 0:  # 不开仓
223 |                 continue
224 | 
225 |             print("开仓数量为：{}".format(Num))
226 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
227 |                          price=0)  # 指定委托量开仓
228 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
229 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
230 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
231 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
232 |             print("平仓，数量为: {}".format(position / 10 ))
233 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
234 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
235 | 
236 | 
237 | if __name__ == '__main__':
238 | 
239 |     file_path = 'adaboost_model.py'
240 |     block = 'hs300'
241 | 
242 |     begin_date = '2016-01-01'
243 |     end_date = '2018-09-30'
244 | 
245 |     strategy_name = 'adaboost'
246 | 
247 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
248 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
249 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
250 | 


--------------------------------------------------------------------------------
/factor_analysis.py:
--------------------------------------------------------------------------------
 1 | """
 2 | author: qiuyihao
 3 | date: 2019-04-22
 4 | description: 同类因子进行共线性分析,绘制相关系数矩阵
 5 |             获取每一类因子中的历史序列，该序列每一个因子由同时期股票的非空因子平均求得。
 6 |             计算相关序列的相关系数，绘制相关系数矩阵
 7 | """
 8 | import numpy as np
 9 | import pandas as pd
10 | import atrader as at
11 | import seaborn as sns
12 | import matplotlib.pyplot as plt
13 | 
14 | 
15 | def draw_heatmap(df, filename):
16 |     dfData = df.corr()
17 |     plt.subplots(figsize=(13, 13))
18 |     sns.heatmap(dfData, annot=True, vmax=1, vmin=0, square=True, cmap='Blues')
19 |     plt.savefig(filename)
20 |     plt.show()
21 | 
22 | 
23 | def analysis_factor(factor_list, code_list, filename):
24 |     print(factor_list, code_list[0])
25 |     factor_data = at.get_factor_by_code(factor_list=factor_list, target=code_list[0],
26 |                                         begin_date='2016-01-01', end_date='2018-09-30')
27 | 
28 |     factor_data = factor_data.drop(['date'], axis=1)
29 | 
30 |     not_full_num = len(code_list)
31 | 
32 |     for tf in factor_data.isnull().any():
33 |         if tf == True:
34 |             factor_data = pd.DataFrame(np.full([factor_data.shape[0], factor_data.shape[1]], 0.0),
35 |                                        columns=[factor_list])
36 |             not_full_num -= 1
37 |             break
38 | 
39 |     factor_data.columns = factor_list
40 | 
41 |     for i in range(len(code_list) - 1):
42 |         tmp_data = at.get_factor_by_code(factor_list, target=code_list[i+1],
43 |                                          begin_date='2016-01-01', end_date='2018-09-30')
44 |         tmp_data = tmp_data.drop(['date'], axis=1)
45 |         null_flag = False
46 |         for tf in tmp_data.isnull().any():
47 |             if tf == True:
48 |                 null_flag = True
49 |                 not_full_num -= 1
50 |                 print("NAN... pass ")
51 |                 break
52 |         if not null_flag:
53 |             if tmp_data.iloc[:, 0].mean() >= 10000000:
54 |                 tmp_data /= 100000  # 某些因子数据过于庞大，需要缩小
55 |             factor_data = factor_data + tmp_data
56 |             print("add ... ")
57 |     factor_data /= not_full_num
58 |     draw_heatmap(factor_data, filename)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     A = at.get_code_list('hs300', date='2016-01-01')
63 |     code_list = A['code'].tolist()
64 | 
65 |     file_name_list = ["Q1_基础类", "Q1_质量类"]
66 |         #, "情绪类", "价值类", "每股指标类",
67 |         #              "行业分析师类", "特色技术指标类"]
68 | 
69 |     factor_list = [['AdminExpenseTTM', 'NIAP', 'FinanExpenseTTM', 'NetIntExpense'],  # 基础类
70 |                    ['DebtEquityRatio', 'SuperQuickRatio']  # 质量类
71 |                   ]
72 |                  # ['TVMA20', 'VOL20', 'OBV20', 'JDQS20'],  # 情绪类
73 |                  #  ['PE', 'PB', 'PS', 'NLSIZE', 'TA2EV', 'CTOP'],  # 成长因子类
74 |                  #  ['BasicEPS', 'EPS', 'EnterpriseFCFPS'],  # 每股指标类
75 |                  #  ['RSTR24', 'FY12P', 'SFY12P', 'PEIndu', 'EPIBS'],  # 行业分析师类
76 |                  #  ['AVGPRICE', 'BOP', 'KAMA', 'LINEARREG', 'STDDEV']  # 特色技术指标类
77 | 
78 |     for i, factor in enumerate(factor_list):
79 |         #if i != 1:
80 |         #    continue
81 |         print(file_name_list[i])
82 |         analysis_factor(factor, code_list, file_name_list[i])  # 最终得到因子相关系数矩阵
83 | 
84 | 


--------------------------------------------------------------------------------
/find_factor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | author: qiuyihao
  3 | date: 2019/04/13 - 04-15
  4 | description: 单因子测试
  5 | """
  6 | import pandas as pd
  7 | import numpy as np
  8 | import atrader as at
  9 | from sklearn import preprocessing
 10 | from sklearn import linear_model
 11 | import time
 12 | from scipy.stats import pearsonr
 13 | import datetime
 14 | 
 15 | 
 16 | # 中位数去极值法
 17 | def filter_MAD(df, factor, n=5):
 18 |     """
 19 |     :param df: 去极值的因子序列
 20 |     :param factor: 待去极值的因子
 21 |     :param n: 中位数偏差值的上下界倍数
 22 |     :return: 经过处理的因子dataframe
 23 |     """
 24 |     # print(df)
 25 | 
 26 |     median = df[factor].quantile(0.5)
 27 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 28 |     max_range = median + n * new_median
 29 |     min_range = median - n * new_median
 30 | 
 31 |     for i in range(df.shape[0]):
 32 |         if df.loc[i, factor] > max_range:
 33 |             df.loc[i, factor] = max_range
 34 |         elif df.loc[i, factor] < min_range:
 35 |             df.loc[i, factor] = min_range
 36 |     return df
 37 | 
 38 | 
 39 | # 判断某一个日期是否为周末，如果为周末，需要返回一个非周末的字符串。
 40 | # 当时间是月末时，时间需要向前，当时间时月初是，时间需要向后
 41 | # 采用递归实现,最终返回一个非周末的时间串
 42 | # (其实这个函数的作用就是帮助减少几次获取因子而已，，，事后发现还不如直接靠get_factor_by_day判断
 43 | def find_day_str(day_str):
 44 |     """
 45 |     :param day_str: 要求标准的时间串 如 2016-01-01
 46 |     :return: 返回一个合适的时间串
 47 |     """
 48 |     year = int(day_str[0:4])
 49 |     month = int(day_str[5:7])
 50 |     day = int(day_str[8:10])
 51 |     any_day = datetime.datetime(year, month, day).strftime("%w")
 52 |     result_str = day_str
 53 |     if any_day == '6' or any_day == '0':
 54 |         if day < 15:
 55 |             day += 1
 56 |             if day < 10:
 57 |                 day = '0' + str(day)
 58 |             else:
 59 |                 day = str(day)
 60 |         elif day > 15:
 61 |             day -= 1
 62 |             day = str(day)
 63 |         result_str = find_day_str(day_str[0:8] + day)
 64 |     return result_str
 65 | 
 66 | 
 67 | # 生成起始日期对
 68 | def create_date(begin_date, end_date):
 69 |     """
 70 |     :param begin_date: 开始日期 指明起始年月  如 '2018-01'
 71 |     :param end_date: 结束日期 指明结束年月    如 '2018-10'
 72 |     :return: 一个起始年月日列表,一个结束年月日列表
 73 |      以一个月的第一天和最后一天作为一对日期 如 ['2018-01-01',..] ['2018-01-31',..]
 74 |      注：需要排斥这两天为周末或者法定假期的时候
 75 |     """
 76 |     # 解析字符串
 77 |     begin_year = int(begin_date[0:4])
 78 |     begin_month = int(begin_date[5:7])
 79 |     end_year = int(end_date[0:4])
 80 |     end_month = int(end_date[5:7])
 81 | 
 82 |     # 待拼接的年日月
 83 |     year = begin_year
 84 |     month = begin_month
 85 | 
 86 |     begin_date_list = []
 87 |     end_date_list = []
 88 | 
 89 |     big_month = [1, 3, 5, 7, 8, 10, 12]
 90 |     small_month = [4, 6, 9, 11]   # 二月另外判断
 91 |     while year <= end_year and month <= end_month:
 92 |         start = ''
 93 |         end = ''
 94 | 
 95 |         if month >= 10:
 96 |             start = str(year) + '-' + str(month) + '-' + '01'
 97 |             end = str(year) + '-' + str(month) + '-'
 98 |         else:
 99 |             start = str(year) + '-0' + str(month) + '-' + '01'
100 |             end = str(year) + '-0' + str(month) + '-'
101 | 
102 |         # 避免出现节假日或者周末,若出现则往后推一天
103 |         while at.get_factor_by_day(factor_list=["PE"], target_list=["SZSE.000001"], date=start) is None:
104 |             start_day = int(start[8:10]) + 1
105 |             if start_day < 10:
106 |                 start = start[0:8] + '0' + str(start_day)
107 |             else:
108 |                 start = start[0:8] + str(start_day)
109 | 
110 |         begin_date_list.append(start)  # 插入一个非周末非法定假期的开始时间串
111 | 
112 |         # 判断月为大，为小
113 |         if month in big_month:
114 |             end = end + '31'
115 |         elif month in small_month:
116 |             end = end + '30'
117 |         elif month == 2:
118 |             if year % 4 == 0 and year % 100 != 0 or year % 400 == 0:
119 |                 end = end + '29'
120 |             else:
121 |                 end = end + '28'
122 | 
123 |         while at.get_factor_by_day(factor_list=["PE"], target_list=["SZSE.000001"], date=end) is None:
124 |             end_day = int(end[8:10]) - 1
125 |             end = end[0:8] + str(end_day)
126 | 
127 |         end_date_list.append(end)  # 插入一个非周末，非法定假期的结束时间串
128 | 
129 |         month += 1
130 |         if month == 13:
131 |             year += 1
132 |             month = 1
133 |     return begin_date_list, end_date_list
134 | 
135 | 
136 | # 计算每一个月的单个股票平均收益率
137 | def cal_yield_rate(code, begin_date, end_date):
138 |     """
139 |     :param code: 股票代码
140 |     :param begin_date: K线起始日期，月初
141 |     :param end_date: K线结束日期，月末
142 |     :return: 在该时间内股票的平均收益率
143 |     """
144 |     day_data = at.get_kdata(target_list=[code], frequency='day', fre_num=1, begin_date=begin_date,
145 |                             end_date=end_date, fill_up=False, df=True, fq=1, sort_by_date=True)
146 |     yield_rate = 0.0
147 |     try:
148 |         yield_rate = (day_data['close'][len(day_data) - 1] - day_data['close'][0])/day_data['close'][0]
149 |     except Exception:
150 |         yield_rate = -1
151 |     return yield_rate
152 | 
153 | 
154 | # 股票分层函数: 按流通市值进行划分，分为大，中，小市值。
155 | def stock_layered(code_list, sign = 0):
156 |     """
157 |     :param code_list: 未分层的标的代号
158 |     :param sign: = 0，表示不分层；= 1，返回小市值，= 2，返回中市值； = 3， 返回大市值
159 |     :return: 分层后的标的代码
160 |     """
161 |     if sign == 0:
162 |         return code_list
163 |     pass
164 | 
165 | # 单因子测试函数
166 | def test_factor(factor, block, begin_date_list, end_date_list, layer_sign = 0):
167 |     """
168 |     :param factor:  待测的单因子
169 |     :param block : 股市指数
170 |     :param begin_date_list: 获取每一期因子的开始时间 （12个月，每月一次，从月初开始和月末结束）
171 |     :param end_date_list: 获取每一期因子的结束时间
172 |     :return: 年化夏普率，IC等等，见函数尾部
173 |     注：使用沪深300股作为测试
174 |     """
175 |     # 记录每一个月的股票池总体收益率
176 |     yield_rate_list = []
177 | 
178 |     # 记录每一个月股票池各股收益率
179 |     single_yield_rate_list = []
180 | 
181 |     # 因子每期收益率
182 |     factor_return_list = []
183 | 
184 |     # 因子每期的IC值
185 |     IC_list = []
186 | 
187 | 
188 | 
189 |     # 遍历每一月，月初调仓
190 |     for i in range(len(begin_date_list)):
191 | 
192 |         # --------------------------------------------- #
193 |         # 1. 提取 K 线数据 和 股票信息
194 |         # --------------------------------------------- #
195 | 
196 |         print("{} - {}: 获取K线数据！".format(begin_date_list[i], end_date_list[i]))
197 |         code_list = at.get_code_list(block, date=begin_date_list[i])
198 | 
199 |         code_list = stock_layered(code_list, layer_sign)  # 分层
200 | 
201 |         # 若要分层回测，这里需要股票池划分
202 |         target_list = code_list['code'].tolist()  # 本月股票池代码
203 |         weight_list = np.array(code_list['weight'].tolist())  # 本月各股票权重
204 |         # 获取因子月初数据
205 |         print("{} - {}: 获取因子数据！".format(begin_date_list[i], end_date_list[i]))
206 |         factor_data = at.get_factor_by_day(factor_list=[factor], target_list=target_list,
207 |                                            date=begin_date_list[i])
208 | 
209 |         # ----------------------------------------------- #
210 |         # 2. 数据预处理
211 |         # ----------------------------------------------- #
212 | 
213 |         # 平均值填充缺失值 中位数去极值 & z-score 规范化
214 |         factor_data = factor_data.fillna(factor_data[factor].mean())
215 |         factor_data = filter_MAD(factor_data, factor, n=5)
216 |         factor_data[factor] = preprocessing.scale(factor_data[factor])
217 | 
218 |         # 提取因子列，变为np array
219 |         factor_data = np.array(factor_data[factor].tolist())
220 | 
221 |         # ------------------------------------------------- #
222 |         # 3.从 K 线和股票数据中计算本月的个股收益率和权重
223 |         # 以及IC值
224 |         # ------------------------------------------------- #
225 | 
226 |         yield_rate = []  # 股票池个股本月平均收益率
227 |         tmp_target_list = target_list
228 |         for j, target in enumerate(target_list):
229 |             rate = cal_yield_rate(target, begin_date_list[i], end_date_list[i])
230 |             if rate != -1:  # 计算标的股票的本月收益率
231 |                 yield_rate.append(cal_yield_rate(target, begin_date_list[i], end_date_list[i]))
232 |             else:  # 收益率计算出现错误，从股票池中删除，权重列表中删除，因子列表中删除
233 |                 tmp_target_list = np.delete(tmp_target_list, [j])
234 |                 weight_list = np.delete(weight_list, [j])
235 |                 factor_data = np.delete(factor_data, [j])
236 | 
237 |         IC = pearsonr(yield_rate, factor_data)[0]  # 获取IC值
238 | 
239 |         IC_list.append(IC)  # 记录IC值
240 | 
241 |         weight_list = weight_list / weight_list.sum()  # 权重归一化
242 |         weight_list = weight_list.reshape(-1, 1)
243 |         factor_data = factor_data.reshape(-1, 1)
244 |         yield_rate = np.array(yield_rate).reshape(-1, 1)
245 | 
246 |         # ----------------------------------------------- #
247 |         # 4. 月初因子和本月收益率进行拟合, 获取因子收益率
248 |         # ----------------------------------------------- #
249 | 
250 |         print("{} - {}: 开始拟合！".format(begin_date_list[i], end_date_list[i]))
251 |         LR = linear_model.LinearRegression()  # 线性拟合器
252 |         LR.fit(factor_data, yield_rate)  # 拟合月初因子和本月平均收益率
253 | 
254 |         coef_list = list(LR.coef_)[0]
255 |         coef = coef_list[0]
256 |         factor_return_list.append(coef)  # 记录当期的因子收益率 保留小数点两位
257 | 
258 |         # -------------------------------------------------- #
259 |         # 5. 预测各股票本月收益率，计算股票池整体收益。
260 |         # -------------------------------------------------- #
261 |         print("{} - {}: 开始预测！".format(begin_date_list[i], end_date_list[i]))
262 |         pred_yield_rate = LR.predict(factor_data)  # 预测的各股票收益率
263 | 
264 |         rate_list = list(pred_yield_rate)[0]
265 |         rate_list = [round(r, 2) for r in rate_list]
266 |         single_yield_rate_list.append(rate_list)  # 记录当月各股票收益率 小数点两位
267 | 
268 |         # 利用权重和个股收益计算股票池整体平均收益率
269 |         mean_yield_rate = (pred_yield_rate * weight_list).sum()
270 | 
271 |         # 记录当月股票整体平均收益率
272 |         yield_rate_list.append(round(float(mean_yield_rate), 2))  # 小数点两位
273 | 
274 |         print("{} - {}: 股票平均收益率拟合完毕！".format(begin_date_list[i], end_date_list[i]))
275 | 
276 |     # --------------------------------------------------- #
277 |     # 汇总数据
278 |     # --------------------------------------------------- #
279 | 
280 |     # 计算超额收益率
281 |     yield_rate_array = np.array(yield_rate_list)
282 |     over_rate = yield_rate_array - 0.004  # 0.004 代表无风险利率
283 |     # 超额收益率均值和标准差
284 |     mean_over_rate = over_rate.mean()
285 |     std_over_rate = over_rate.std()
286 | 
287 |     # 单位时间夏普率
288 |     sharp_ratio = mean_over_rate / std_over_rate
289 |     # 年化夏普率
290 |     sharp_ratio = np.sqrt(12) * sharp_ratio
291 | 
292 |     # 计算股票收益率均值方差
293 |     yield_rate_array = np.array(yield_rate_list)
294 |     average_yield_rate = np.mean(yield_rate_array)
295 |     var_yield_rate = np.var(yield_rate_array)
296 | 
297 |     # 计算因子收益率的均值 标准差
298 |     factor_return_array = np.array(factor_return_list)
299 |     average_factor_return = np.mean(factor_return_array)
300 |     std_factor_return = np.std(factor_return_array)
301 | 
302 |     # 计算因子收益率大于0的概率
303 |     factor_greater_than_zero = sum([1 for i in factor_return_list if i > 0]) / len(factor_return_list)
304 | 
305 |     # 计算IC的平均值和标准差
306 |     average_IC = np.mean(np.array(IC_list))
307 |     std_IC = np.std(np.array(IC_list))
308 |     # 计算 IC > 0的概率
309 |     IC_greater_than_zero = sum([1 for i in IC_list if i > 0]) / len(IC_list)
310 | 
311 |     # 返回夏普率，波动率（收益率方差），因子收益均值，因子收益率，
312 |     test_result_dict = dict()
313 |     test_result_dict["年化夏普率"] = sharp_ratio
314 |     test_result_dict["波动率"] = var_yield_rate
315 |     test_result_dict["因子收益均值"] = average_factor_return
316 |     test_result_dict["因子收益标准差"] = std_factor_return
317 |     test_result_dict["因子收益>0概率"] = factor_greater_than_zero
318 |     test_result_dict["IC均值"] = average_IC
319 |     test_result_dict["IC标准差"] = std_IC
320 |     test_result_dict["IC>0概率"] = IC_greater_than_zero
321 | 
322 |     return test_result_dict
323 | 
324 | 
325 | # 同时多次测试因子，返回一个DataFrame
326 | def test_all_factors(factor_list, block, begin_date, end_date, layer_sign=0):
327 |     """
328 |     :param factor_list: 因子列表
329 |     :param block: 股市指数
330 |     :param begin_date: 开始年月
331 |     :param end_date: 结束年月
332 |     :return: 返回各因子的测试指标结果
333 |     """
334 |     begin_date_list, end_date_list = create_date(begin_date, end_date)
335 |     result_dict_list = list()
336 |     for factor in factor_list:
337 |         result_dict = test_factor(factor, block, begin_date_list, end_date_list, layer_sign)
338 |         result_dict_list.append(result_dict)
339 | 
340 |     return pd.DataFrame(result_dict_list, index=factor_list)
341 | 
342 | 
343 | result = test_all_factors(["NLSIZE", "MktValue", "BIAS10", "NegMktValue", "CurrentAssetsRatio",
344 |                            "MLEV", "Variance20", "ROAEBIT"],
345 |                           'hs300', '2016-01', '2018-09',
346 |                           layer_sign=0)  # 0 不分层  1 低流通市值  2 中流通市值  3 高流通市值
347 | 
348 | result.to_csv("single_factor_test.csv", sep=',')
349 | 
350 | 
351 | 
352 | 


--------------------------------------------------------------------------------
/get_factor_report.py:
--------------------------------------------------------------------------------
 1 | import atrader as at
 2 | import pandas as pd
 3 | import numpy as np
 4 | import sys
 5 | """
 6 | 运行之前！！！！！！！！！！！
 7 | 修改输出的csv文件名！！！！！！
 8 | 注意不要重复，可能会覆盖原来的文件！！！
 9 | """
10 | 
11 | csv_file = "final_Q1_ChengZhang_ChangYongJiShuZhiBiao_DongLiang_factors.csv"
12 | strategy_dicts = at.get_strategy_id()
13 | save_dict = {"测试因子": [],
14 |                  '年化收益率': [],
15 |                  '年化夏普率': [],
16 |                  '最大回撤率': [],
17 |                  'alpha': [],
18 |                  'beta': [],
19 |                  '信息比率': []
20 |                  }
21 | for strategy in strategy_dicts:
22 |     strategy_id = strategy["strategy_id"]
23 |     result = at.get_performance(strategy_id)
24 |     save_dict['测试因子'].append(result['strategy_name'])
25 |     save_dict['年化收益率'].append(result['annu_return'])
26 |     save_dict['年化夏普率'].append(result['sharpe_ratio']*np.sqrt(12))
27 |     save_dict['最大回撤率'].append(result['max_drawback_rate'])
28 |     save_dict['alpha'].append(result['alpha'])
29 |     save_dict['beta'].append(result['beta'])
30 |     save_dict['信息比率'].append(result['info_ratio'])
31 | 
32 | df = pd.DataFrame(save_dict)
33 | df.to_csv(csv_file, sep=',')
34 | print(df)


--------------------------------------------------------------------------------
/lstm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：
  7 | 	- 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
  8 |     - 质量类：ROIC, CashToCurrentLiability
  9 |     - 收益风险类：DDNCR    
 10 |     - 情绪类：PVI
 11 | 	- 成长类：TotalAssetGrowRate
 12 | 	- 常用技术指标类：MA120
 13 | 	- 动量类：AD
 14 | 	- 价值类：PS
 15 |     - 每股指标类：EnterpriseFCFPS
 16 |     - 行业分析师：FY12P
 17 |     - 特色技术指标：STDDEV
 18 | 4. 单因子回归测试模型思路：
 19 |     1. 先获得 21 天以上的K线数据和因子数据，预处理
 20 |     2. 使用上月初的多个因子和上月收益率进行线性回归
 21 |     3. 使用【LSTM模型】进行训练
 22 |     4. 回到当前时间点，使用本月初的因子作为预测样本特征，预测本月的各股票平均收益率的大小。
 23 | 5. 选股逻辑：
 24 |     将符合预测结果的股票按均等分配可用资金进行下单交易。持有一个月后 ，再次进行调仓，训练预测。
 25 | 6. 交易逻辑：
 26 |     每次调仓时，若当前有持仓，并且符合选股条件，则仓位不动；
 27 |                               若不符合选股条件，则对收益低的标的进行仓位平仓；
 28 |                 若当前无仓，并且符合选股条件，则多开仓，对收益高的标的进行开仓；
 29 |                             若不符合选股条件，则不开仓，无需操作。
 30 | ----------------------------------------------------------
 31 | """
 32 | from atrader import *
 33 | import pandas as pd
 34 | import numpy as np
 35 | from sklearn import svm
 36 | import math
 37 | from sklearn import preprocessing
 38 | import datetime
 39 | import torch
 40 | from torch import nn
 41 | from torch.autograd import Variable
 42 | import torchvision.datasets as dsets
 43 | import torch.utils.data as Data
 44 | import matplotlib.pyplot as plt
 45 | import torchvision
 46 | 
 47 | # 作为全局变量进行测试
 48 | 
 49 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 50 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'GrossProfit', 'FY12P',
 51 |               'AD', 'TotalAssetGrowRate', 'MA120']
 52 | class lstm(nn.Module):
 53 |     def __init__(self):
 54 |         super(lstm, self).__init__()
 55 | 
 56 |         self.rnn = nn.LSTM(
 57 |             input_size=len(FactorCode),
 58 |             hidden_size=64,
 59 |             num_layers=2,
 60 |             batch_first=True,
 61 |             dropout=0.2
 62 |         )
 63 | 
 64 |         self.out = nn.Linear(64, 1)
 65 | 
 66 |     def forward(self, x):
 67 |         r_out, (h_n, h_c) = self.rnn(x, None)
 68 | 
 69 |         # print(x.shape)
 70 |         # print(r_out.shape)
 71 | 
 72 |         out = self.out(r_out[:, -1, :])
 73 |         # print(r_out[:, -1, :].shape)
 74 |         # input()
 75 |         return out
 76 | LR = 0.01
 77 | EPOCH = 3
 78 | BATCH_SIZE = 5
 79 | 
 80 | 
 81 | # 中位数去极值法
 82 | def filter_MAD(df, factor, n=3):
 83 |     """
 84 |     :param df: 去极值的因子序列
 85 |     :param factor: 待去极值的因子
 86 |     :param n: 中位数偏差值的上下界倍数
 87 |     :return: 经过处理的因子dataframe
 88 |     """
 89 |     median = df[factor].quantile(0.5)
 90 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 91 |     max_range = median + n * new_median
 92 |     min_range = median - n * new_median
 93 | 
 94 |     for i in range(df.shape[0]):
 95 |         if df.loc[i, factor] > max_range:
 96 |             df.loc[i, factor] = max_range
 97 |         elif df.loc[i, factor] < min_range:
 98 |             df.loc[i, factor] = min_range
 99 |     return df
100 | 
101 | 
102 | def init(context):
103 |     context.SVM = svm.SVC(gamma='scale')
104 |     # 账号设置：设置初始资金为 10000000 元
105 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
106 |                  price_loc=1, deal_type=0, limit_type=0)
107 |     # 注册数据：日频数据
108 |     reg_kdata('day', 1)
109 |     global FactorCode  # 全局单因子代号
110 |     reg_factor(factor=FactorCode)
111 |     print("init 函数, 注册因子为{}".format(FactorCode[0]))
112 |     context.FactorCode = FactorCode  #
113 | 
114 |     # 超参数设置：
115 |     context.Len = 21*2    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
116 |     context.Num = 0   # 记录当前交易日个数
117 | 
118 |     # lstm
119 |     context.lstm = lstm()
120 |     context.optimizer = torch.optim.Adam(context.lstm.parameters(), lr=LR)
121 |     context.loss_func = nn.MSELoss()
122 |     context.EPOCH = EPOCH
123 |     context.BATCH_SIZE = BATCH_SIZE
124 | 
125 |     # 较敏感的超参数，需要调节
126 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
127 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
128 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
129 | 
130 |     # 确保月初调仓
131 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
132 |     months = np.vectorize(lambda x: x.month)(days)
133 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
134 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
135 | 
136 | 
137 | def on_data(context):
138 |     context.Num = context.Num + 1
139 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
140 |         return
141 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
142 |         return
143 | 
144 |     # 获取数据：
145 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
146 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
147 |                            df=True)  # 获取因子数据
148 | 
149 | 
150 |     # 特征构建：
151 |     Fcode = context.FactorCode  # 标签不需要代号了
152 | 
153 |     # 数据存储变量：
154 |     # Close 字段为标签，Fcode 为标签
155 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
156 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
157 | 
158 |     # K线数据序号对齐
159 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
160 | 
161 |     # 按标的处理数据：
162 |     for i in range(300):
163 |         # 训练特征集及训练标签构建：
164 |         # 临时数据存储变量:
165 |         FactorData0 = pd.DataFrame(np.full([int(context.Len/2), len(Fcode) + 2], np.nan),
166 |             columns=(['idx', 'benefit'] + Fcode))
167 |         # 存储预测特征样本
168 |         FactorDataTest0 = pd.DataFrame(np.full([int(context.Len/2), len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
169 | 
170 |         # 因子数据 序号对齐, 提取当前标的的因子数据
171 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
172 | 
173 |         # 按特征处理数据：
174 |         for FC in context.FactorCode:
175 |             # 提取当前标的中与当前因子FC相同的部分
176 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
177 |             #print(FCData.shape)
178 |             #print(FCData[int(context.Len / 2):])
179 |             #print(FCData[:int(context.Len/2)])
180 |             #input()
181 |             FactorData0[FC] = FCData[:int(context.Len/2)]  # 存储上一个月初的股票因子数据
182 | 
183 |         # 按标签处理数据：
184 |         # 提取当前标的的前一个月的K线面板数据
185 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
186 |         # 计算当前标的在上一个月的收益率
187 |         benefit = (close[-1] - close[int(context.Len/2) - 1]) / close[int(context.Len/2) - 1]
188 | 
189 |         FactorData0['benefit'] = benefit
190 |         # idx: 建立当前标的在训练样本集中的索引
191 |         FactorData0['idx'] = tempIdx[i]
192 |         # 合并数据：组成训练样本
193 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
194 | 
195 |         # 预测特征集构建：建立标的索引
196 |         FactorDataTest0['idx'] = tempIdx[i]
197 |         # 按特征处理数据，过程同建立训练特征
198 |         for FC in context.FactorCode:
199 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
200 |             #print(FCData.shape)
201 |             #print(FCData[int(context.Len / 2):])
202 |             #print(FCData[:int(context.Len / 2)])
203 |             #input()
204 |             FactorDataTest0[FC] = FCData[int(context.Len/2):].reset_index(drop=True)
205 | 
206 |         # 合并测试数据
207 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
208 | 
209 |     """
210 |     训练集和测试集的表头字段如下
211 |     FactorData DataFrame:
212 |     idx  |  benefit |  Factor 1 | Factor 2| ....
213 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
214 |     FactorDataTest DataFrame: 
215 |     idx | Factor 1 | Factor 2 | ...
216 |     本月初的因子作为预测特征
217 |     """
218 | 
219 |     # 数据清洗：
220 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
221 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
222 | 
223 |     count1 = FactorData.groupby('idx').count().reset_index()
224 |     remain1 = count1[count1[count1.columns[1]] == int(context.Len / 2)]['idx']
225 |     count2 = FactorDataTest.groupby('idx').count().reset_index()
226 |     remain2 = count2[count2[count2.columns[1]] == int(context.Len / 2)]['idx']
227 |     remain = pd.merge(remain1, remain2, on=['idx']).reset_index(drop=True)
228 |     Idx = remain['idx']  # 剩余标的序号
229 | 
230 | 
231 | 
232 |     FactorData = FactorData[FactorData['idx'].isin(remain['idx'])].reset_index(drop=True)
233 |     FactorDataTest = FactorDataTest[FactorDataTest['idx'].isin(remain['idx'])].reset_index(drop=True)
234 | 
235 |     #print(count[count.columns[0:2]])
236 | 
237 |     # 按特征进行预处理
238 |     for Factor in context.FactorCode:
239 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
240 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
241 | 
242 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
243 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
244 | 
245 |     # print(FactorData.head(1))
246 |     # print(FactorDataTest.head(1))
247 | 
248 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
249 |     X = np.ones([FactorData.shape[0], len(Fcode)])
250 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
251 | 
252 | 
253 | 
254 |     # 循环填充特征到numpy数组中
255 |     for i in range(X.shape[1]):
256 |         X[:, i] = FactorData[Fcode[i]]
257 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
258 | 
259 |     Xtest = np.array([Xtest[i * int(context.Len / 2):(i + 1) * int(context.Len / 2)] for i in range(len(remain))])
260 |     Xtest = torch.from_numpy(Xtest).float()
261 | 
262 |     # 训练样本的标签，为浮点数的收益率
263 |     Y = FactorData[['idx', 'benefit']]
264 |     Y = Y.groupby('idx').mean().reset_index(drop=True)
265 |     Y = np.array(Y['benefit']).astype(float)
266 | 
267 |     # print(X.shape)
268 |     # print(X[:2])
269 |     # print(Y)
270 |     # input()
271 | 
272 |     # 模型训练：
273 |     class trainset(Data.Dataset):
274 |         def __init__(self):
275 |             self.X = X
276 |             self.Y = Y
277 |         def __getitem__(self, index):
278 |             len = int(context.Len/2)
279 |             head = len * index
280 |             tail = len * (index + 1)
281 |             data = self.X[head:tail]
282 |             label = self.Y[index]
283 |             return data, label
284 |         def __len__(self):
285 |             return int(self.X.shape[0]/(context.Len/2))
286 | 
287 |     train_loader = Data.DataLoader(dataset=trainset(), batch_size=context.BATCH_SIZE, shuffle=True)
288 | 
289 |     for epoch in range(EPOCH):
290 |         for step, (x, y) in enumerate(train_loader):
291 |             # b_x = Variable(x.view(-1, 28, 28))
292 |             # b_y = Variable(y)
293 |             b_x = x.float()
294 |             b_y = y.float()
295 | 
296 |             output = context.lstm(b_x)
297 |             loss = context.loss_func(output, b_y)
298 |             context.optimizer.zero_grad()
299 |             loss.backward()
300 |             context.optimizer.step()
301 | 
302 | 
303 |     # 预测：
304 |     y = context.lstm(Xtest)
305 |     y = y.detach().numpy().reshape((-1))
306 | 
307 |     # 交易设置：
308 |     positions = context.account().positions['volume_long']  # 多头持仓数量
309 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
310 | 
311 | 
312 |     # 获取收益率的高分位数和低分位数
313 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
314 |     high_return, low_return = np.percentile(y, [context.upper_pos, context.down_pos])
315 | 
316 | 
317 |     for i in range(len(Idx)):
318 |         position = positions.iloc[Idx[i]]
319 |         if position == 0 and y[i] > high_return and y[i] > 0 and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
320 |             # print('开仓')
321 |         # if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓，且该股票收益大于高70%分位数，则开仓，买入
322 |             # 开仓数量 + 1防止分母为0
323 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
324 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i] * 21 + 20] + 1)) * 100)
325 | 
326 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
327 |             if Num < 1000:
328 |                 Num *= 10
329 |             if Num > 100000:
330 |                 Num = int(Num / 10)
331 |                 Num -= Num % 100
332 |             if Num <= 0:  # 不开仓
333 |                 continue
334 | 
335 |             print("开仓数量为：{}".format(Num))
336 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
337 |                          price=0)  # 指定委托量开仓
338 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
339 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
340 | 
341 |         elif position > 0 and y[i] < low_return:  #预测结果为false(收益率<0)，卖出
342 |         # elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
343 |             # print("平仓")
344 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position), side=2, position_effect=2,
345 |                          order_type=2, price=0)  # 指定委托量平仓
346 | 
347 | 
348 | if __name__ == '__main__':
349 | 
350 |     file_path = 'lstm.py'
351 |     block = 'hs300'
352 | 
353 |     begin_date = '2016-01-01'
354 |     end_date = '2018-09-30'
355 | 
356 |     strategy_name = 'lstm'
357 | 
358 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
359 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
360 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
361 | 


--------------------------------------------------------------------------------
/multi_factor_lr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | -------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：AD
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | 4. 回归测试模型思路：
 19 |     1. 先获得 21 天以上的K线数据和因子数据，预处理
 20 |     2. 使用上月初因子和上月收益率进行线性回归
 21 |     3. 使用单变量线性模型进行训练
 22 |     4. 回到当前时间点，使用本月初的因子作为预测样本特征，预测本月的各股票平均收益率的大小。
 23 | 5. 选股逻辑：
 24 |     将符合预测结果的股票按均等分配可用资金进行下单交易。持有一个月后 ，再次进行调仓，训练预测。
 25 | 6. 交易逻辑：
 26 |     每次调仓时，若当前有持仓，并且符合选股条件，则仓位不动；
 27 |                               若不符合选股条件，则对收益低的标的进行仓位平仓；
 28 |                 若当前无仓，并且符合选股条件，则多开仓，对收益高的标的进行开仓；
 29 |                             若不符合选股条件，则不开仓，无需操作。
 30 | ---------------------------------------------------------
 31 | 运行方法：
 32 | 1. 在 main 中定义同一类的因子列表。
 33 | 2. 逐个因子执行回测。
 34 | 3. 获取回测报告ID，通过ID获取绩效报告字段。
 35 | 4. 保留字段到CSV文件中。
 36 | """
 37 | 
 38 | from atrader import *
 39 | import pandas as pd
 40 | import numpy as np
 41 | from sklearn.linear_model import LinearRegression
 42 | import math
 43 | from sklearn import preprocessing
 44 | import datetime
 45 | import sys
 46 | 
 47 | # 作为全局变量进行测试
 48 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 49 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 50 |               'AD', 'TotalAssetGrowRate', 'MA120']
 51 | 
 52 | # 中位数去极值法
 53 | def filter_MAD(df, factor, n=3):
 54 |     """
 55 |     :param df: 去极值的因子序列
 56 |     :param factor: 待去极值的因子
 57 |     :param n: 中位数偏差值的上下界倍数
 58 |     :return: 经过处理的因子dataframe
 59 |     """
 60 |     median = df[factor].quantile(0.5)
 61 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 62 |     max_range = median + n * new_median
 63 |     min_range = median - n * new_median
 64 | 
 65 |     for i in range(df.shape[0]):
 66 |         if df.loc[i, factor] > max_range:
 67 |             df.loc[i, factor] = max_range
 68 |         elif df.loc[i, factor] < min_range:
 69 |             df.loc[i, factor] = min_range
 70 |     return df
 71 | 
 72 | 
 73 | def init(context):
 74 |     # 账号设置：设置初始资金为 10000000 元
 75 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 76 |                  price_loc=1, deal_type=0, limit_type=0)
 77 |     # 注册数据：日频数据
 78 |     reg_kdata('day', 1)
 79 |     global FactorCode  # 全局单因子代号
 80 |     reg_factor(factor=FactorCode)
 81 |     print("init 函数, 注册因子为{}".format(FactorCode[0]))
 82 |     context.FactorCode = FactorCode  #
 83 | 
 84 |     # 超参数设置：
 85 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 86 |     context.Num = 0   # 记录当前交易日个数
 87 | 
 88 |     # 较敏感的超参数，需要调节
 89 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 90 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 91 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 92 | 
 93 |     # 确保月初调仓
 94 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 95 |     months = np.vectorize(lambda x: x.month)(days)
 96 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 97 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 98 | 
 99 | 
100 | def on_data(context):
101 |     context.Num = context.Num + 1
102 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
103 |         return
104 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
105 |         return
106 | 
107 |     # 获取数据：
108 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
109 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
110 |                            df=True)  # 获取因子数据
111 | 
112 |     # 特征构建：
113 |     Fcode = context.FactorCode  # 标签不需要代号了
114 | 
115 |     # 数据存储变量：
116 |     # Close 字段为标签，Fcode 为标签
117 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
118 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
119 | 
120 |     # K线数据序号对齐
121 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
122 | 
123 |     # 按标的处理数据：
124 |     for i in range(300):
125 |         # 训练特征集及训练标签构建：
126 |         # 临时数据存储变量:
127 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
128 |             columns=(['idx', 'benefit'] + Fcode))
129 |         # 存储预测特征样本
130 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
131 | 
132 |         # 因子数据 序号对齐, 提取当前标的的因子数据
133 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
134 | 
135 |         # 按特征处理数据：
136 |         for FC in context.FactorCode:
137 |             # 提取当前标的中与当前因子FC相同的部分
138 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
139 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
140 | 
141 |         # 按标签处理数据：
142 |         # 提取当前标的的前一个月的K线面板数据
143 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
144 |         # 计算当前标的在上一个月的收益率
145 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
146 | 
147 |         FactorData0['benefit'] = benefit
148 |         # idx: 建立当前标的在训练样本集中的索引
149 |         FactorData0['idx'] = tempIdx[i]
150 |         # 合并数据：组成训练样本
151 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
152 | 
153 |         # 预测特征集构建：建立标的索引
154 |         FactorDataTest0['idx'] = tempIdx[i]
155 |         # 按特征处理数据，过程同建立训练特征
156 |         for FC in context.FactorCode:
157 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
158 |             FactorDataTest0[FC] = FCData[context.Len - 1]
159 | 
160 |         # 合并测试数据
161 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
162 | 
163 |     """
164 |     训练集和测试集的表头字段如下
165 |     FactorData DataFrame:
166 |     idx  |  benefit |  Factor 1 | Factor 2| ....
167 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
168 |     FactorDataTest DataFrame: 
169 |     idx | Factor 1 | Factor 2 | ...
170 |     本月初的因子作为预测特征
171 |     """
172 | 
173 |     # 数据清洗：
174 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
175 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
176 |     Idx = FactorDataTest['idx']  # 剩余标的序号
177 | 
178 |     # 按特征进行预处理
179 |     for Factor in context.FactorCode:
180 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
181 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
182 | 
183 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
184 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
185 | 
186 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
187 |     X = np.ones([FactorData.shape[0], len(Fcode)])
188 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
189 | 
190 |     # 循环填充特征到numpy数组中
191 |     for i in range(X.shape[1]):
192 |         X[:, i] = FactorData[Fcode[i]]
193 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
194 | 
195 |     # 训练样本的标签，为浮点数的收益率
196 |     Y = np.array(FactorData['benefit']).astype(float)
197 | 
198 |     LRModel = LinearRegression(normalize=True)
199 | 
200 |     # 模型训练：
201 |     LRModel.fit(X, Y)
202 | 
203 |     # LR分类预测：
204 |     y = LRModel.predict(Xtest)
205 | 
206 |     # 交易设置：
207 |     positions = context.account().positions['volume_long']  # 多头持仓数量
208 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
209 | 
210 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
211 | 
212 |     # 获取收益率的高分位数和低分位数
213 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
214 | 
215 |     for i in range(len(Idx)):
216 |         position = positions.iloc[Idx[i]]
217 |         if position == 0 and y[i] > high_return and valid_cash > 0:
218 |             # 开仓数量 + 1防止分母为0
219 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
220 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
221 | 
222 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
223 |             if Num < 1000:
224 |                 Num *= 10
225 |             if Num > 100000:
226 |                 Num = int(Num / 10)
227 |                 Num -= Num % 100
228 |             if Num <= 0:  # 不开仓
229 |                 continue
230 | 
231 |             print("开仓数量为：{}".format(Num))
232 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
233 |                          price=0)  # 指定委托量开仓
234 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
235 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
236 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
237 |             print("平仓，数量为: {}".format(position / 10 + 100))
238 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
239 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
240 | 
241 | 
242 | if __name__ == '__main__':
243 | 
244 |     file_path = 'multi_factor_lr.py'
245 |     block = 'hs300'
246 | 
247 |     begin_date = '2016-01-01'
248 |     end_date = '2018-09-30'
249 | 
250 |     strategy_name = 'multi_factor_lr'
251 | 
252 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
253 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
254 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
255 | 
256 | 


--------------------------------------------------------------------------------
/random_forest_reg.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：TotalAssetGrowRate
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | 
 20 | ----------------------------------------------------------
 21 | """
 22 | from atrader import *
 23 | import pandas as pd
 24 | import numpy as np
 25 | from sklearn.ensemble import RandomForestRegressor
 26 | import math
 27 | from sklearn import preprocessing
 28 | import datetime
 29 | 
 30 | # 作为全局变量进行测试
 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 32 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 33 |               'AD', 'TotalAssetGrowRate', 'MA120']
 34 | 
 35 | # 中位数去极值法
 36 | def filter_MAD(df, factor, n=3):
 37 |     """
 38 |     :param df: 去极值的因子序列
 39 |     :param factor: 待去极值的因子
 40 |     :param n: 中位数偏差值的上下界倍数
 41 |     :return: 经过处理的因子dataframe
 42 |     """
 43 |     median = df[factor].quantile(0.5)
 44 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 45 |     max_range = median + n * new_median
 46 |     min_range = median - n * new_median
 47 | 
 48 |     for i in range(df.shape[0]):
 49 |         if df.loc[i, factor] > max_range:
 50 |             df.loc[i, factor] = max_range
 51 |         elif df.loc[i, factor] < min_range:
 52 |             df.loc[i, factor] = min_range
 53 |     return df
 54 | 
 55 | 
 56 | def init(context):
 57 | 
 58 |     # context.SVM = svm.SVC(gamma='scale')
 59 |     # 账号设置：设置初始资金为 10000000 元
 60 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 61 |                  price_loc=1, deal_type=0, limit_type=0)
 62 |     # 注册数据：日频数据
 63 |     reg_kdata('day', 1)
 64 |     global FactorCode  # 全局单因子代号
 65 |     reg_factor(factor=FactorCode)
 66 |     print("init 函数, 注册因子为{}".format(FactorCode[0]))
 67 |     context.FactorCode = FactorCode  #
 68 | 
 69 |     # 超参数设置：
 70 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 71 |     context.Num = 0   # 记录当前交易日个数
 72 | 
 73 |     # 较敏感的超参数，需要调节
 74 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 75 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 76 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 77 | 
 78 |     # 确保月初调仓
 79 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 80 |     months = np.vectorize(lambda x: x.month)(days)
 81 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 82 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 83 | 
 84 | 
 85 | def on_data(context):
 86 |     context.Num = context.Num + 1
 87 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 88 |         return
 89 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 90 |         return
 91 | 
 92 |     # 获取数据：
 93 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 94 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 95 |                            df=True)  # 获取因子数据
 96 | 
 97 |     # 特征构建：
 98 |     Fcode = context.FactorCode  # 标签不需要代号了
 99 | 
100 |     # 数据存储变量：
101 |     # Close 字段为标签，Fcode 为标签
102 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
103 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
104 | 
105 |     # K线数据序号对齐
106 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
107 | 
108 |     # 按标的处理数据：
109 |     for i in range(300):
110 |         # 训练特征集及训练标签构建：
111 |         # 临时数据存储变量:
112 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
113 |             columns=(['idx', 'benefit'] + Fcode))
114 |         # 存储预测特征样本
115 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
116 | 
117 |         # 因子数据 序号对齐, 提取当前标的的因子数据
118 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
119 | 
120 |         # 按特征处理数据：
121 |         for FC in context.FactorCode:
122 |             # 提取当前标的中与当前因子FC相同的部分
123 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
124 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
125 | 
126 |         # 按标签处理数据：
127 |         # 提取当前标的的前一个月的K线面板数据
128 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
129 |         # 计算当前标的在上一个月的收益率
130 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
131 | 
132 |         FactorData0['benefit'] = benefit
133 |         # idx: 建立当前标的在训练样本集中的索引
134 |         FactorData0['idx'] = tempIdx[i]
135 |         # 合并数据：组成训练样本
136 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
137 | 
138 |         # 预测特征集构建：建立标的索引
139 |         FactorDataTest0['idx'] = tempIdx[i]
140 |         # 按特征处理数据，过程同建立训练特征
141 |         for FC in context.FactorCode:
142 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
143 |             FactorDataTest0[FC] = FCData[context.Len - 1]
144 | 
145 |         # 合并测试数据
146 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
147 | 
148 |     """
149 |     训练集和测试集的表头字段如下
150 |     FactorData DataFrame:
151 |     idx  |  benefit |  Factor 1 | Factor 2| ....
152 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
153 |     FactorDataTest DataFrame: 
154 |     idx | Factor 1 | Factor 2 | ...
155 |     本月初的因子作为预测特征
156 |     """
157 | 
158 |     # 数据清洗：
159 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
160 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
161 |     Idx = FactorDataTest['idx']  # 剩余标的序号
162 | 
163 |     # 按特征进行预处理
164 |     for Factor in context.FactorCode:
165 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
166 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
167 | 
168 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
169 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
170 | 
171 |     # print(FactorData.head(1))
172 |     # print(FactorDataTest.head(1))
173 | 
174 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
175 |     X = np.ones([FactorData.shape[0], len(Fcode)])
176 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
177 | 
178 |     # 循环填充特征到numpy数组中
179 |     for i in range(X.shape[1]):
180 |         X[:, i] = FactorData[Fcode[i]]
181 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
182 | 
183 |     # 训练样本的标签，为浮点数的收益率
184 |     Y = np.array(FactorData['benefit']).astype(float)
185 | 
186 |     random_forest = RandomForestRegressor(max_depth=5, n_estimators=50)
187 | 
188 |     # 模型训练：
189 |     random_forest.fit(X, Y)
190 | 
191 |     # LR分类预测：
192 |     y = random_forest.predict(Xtest)
193 |     # 交易设置：
194 |     positions = context.account().positions['volume_long']  # 多头持仓数量
195 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
196 | 
197 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
198 | 
199 |     # 获取收益率的高分位数和低分位数
200 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
201 | 
202 |     for i in range(len(Idx)):
203 |         position = positions.iloc[Idx[i]]
204 |         if position == 0 and y[i] > high_return and valid_cash > 0:  # 当前无仓，且该股票收益大于高70%分位数，则开仓，买入
205 |             # 开仓数量 + 1防止分母为0
206 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
207 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
208 | 
209 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
210 |             if Num < 1000:
211 |                 Num *= 10
212 |             if Num > 100000:
213 |                 Num = int(Num / 10)
214 |                 Num -= Num % 100
215 |             if Num <= 0:  # 不开仓
216 |                 continue
217 | 
218 |             print("开仓数量为：{}".format(Num))
219 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
220 |                          price=0)  # 指定委托量开仓
221 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
222 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
223 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
224 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
225 |             print("平仓，数量为: {}".format(position / 10))
226 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position/10),
227 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
228 | 
229 | 
230 | if __name__ == '__main__':
231 |     file_path = 'random_forest_reg.py'
232 |     block = 'hs300'
233 | 
234 |     begin_date = '2016-01-01'
235 |     end_date = '2018-09-30'
236 | 
237 |     strategy_name = 'random_forest_reg'
238 | 
239 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
240 |                  target_list=list(get_code_list('zz500', date=begin_date)['code']),
241 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
242 | 


--------------------------------------------------------------------------------
/references/2011年金融工程研讨会专题报告系列之二：大浪淘金，Alpha因子何处寻？.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/2011年金融工程研讨会专题报告系列之二：大浪淘金，Alpha因子何处寻？.pdf


--------------------------------------------------------------------------------
/references/A.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/A.pdf


--------------------------------------------------------------------------------
/references/A股Alpha策略及产品回顾与展望——2018年金融工程年度报告.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/A股Alpha策略及产品回顾与展望——2018年金融工程年度报告.pdf


--------------------------------------------------------------------------------
/references/A题—通过机器学习优化股票多因子模型解题指引.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/A题—通过机器学习优化股票多因子模型解题指引.pdf


--------------------------------------------------------------------------------
/references/SA20190100000_36930159.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/SA20190100000_36930159.pdf


--------------------------------------------------------------------------------
/references/人工智能选股框架及经典算法简介.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/人工智能选股框架及经典算法简介.pdf


--------------------------------------------------------------------------------
/references/华泰证券-多因子系列之一：华泰多因子模型体系初探-160921.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/华泰证券-多因子系列之一：华泰多因子模型体系初探-160921.pdf


--------------------------------------------------------------------------------
/references/单因子测试.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/单因子测试.PDF


--------------------------------------------------------------------------------
/references/收益预测模型.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/收益预测模型.PDF


--------------------------------------------------------------------------------
/references/风险模型与组合优化.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/references/风险模型与组合优化.PDF


--------------------------------------------------------------------------------
/run_test.bat:
--------------------------------------------------------------------------------
  1 | :: 添加因子ID，进行单因子回测
  2 | 
  3 | :: 基础科目衍生类（全部） --------------------------------------
  4 | ::python single_factor_test.py NetWorkingCapital
  5 | ::python single_factor_test.py NetDebt
  6 | ::python single_factor_test.py RetainedEarnings
  7 | ::python single_factor_test.py GrossProfit
  8 | ::python single_factor_test.py FCFF
  9 | ::python single_factor_test.py TotalPaidinCapital
 10 | ::python single_factor_test.py IntFreeNCL
 11 | ::python single_factor_test.py IntFreeCL
 12 | ::python single_factor_test.py EBIAT
 13 | ::python single_factor_test.py EBIT
 14 | ::python single_factor_test.py EBITDA
 15 | ::python single_factor_test.py NIAPCut
 16 | ::python single_factor_test.py WorkingCapital
 17 | ::python single_factor_test.py IntDebt
 18 | ::python single_factor_test.py IntCL
 19 | ::python single_factor_test.py NRProfitLoss
 20 | ::python single_factor_test.py FCFE
 21 | ::python single_factor_test.py TotalFixedAssets
 22 | ::python single_factor_test.py ValueChgProfit
 23 | ::python single_factor_test.py OperateNetIncome
 24 | ::python single_factor_test.py DA
 25 | ::python single_factor_test.py NetIntExpense
 26 | ::python single_factor_test.py NetTangibleAssets
 27 | ::python single_factor_test.py TEAP
 28 | ::python single_factor_test.py ASSI
 29 | ::python single_factor_test.py TotalAssets
 30 | ::python single_factor_test.py CostTTM
 31 | ::python single_factor_test.py OperateProfitTTM
 32 | ::python single_factor_test.py RevenueTTM
 33 | ::python single_factor_test.py TCostTTM
 34 | ::python single_factor_test.py TRevenueTTM
 35 | ::python single_factor_test.py AssetImpairLossTTM
 36 | ::python single_factor_test.py TProfitTTM
 37 | ::python single_factor_test.py NonOperatingNPTTM
 38 | ::python single_factor_test.py GrossProfitTTM
 39 | ::python single_factor_test.py AdminExpenseTTM
 40 | ::python single_factor_test.py NetProfitAPTTM
 41 | ::python single_factor_test.py NetProfitTTM
 42 | ::python single_factor_test.py FinanExpenseTTM
 43 | ::python single_factor_test.py SalesExpenseTTM
 44 | ::python single_factor_test.py NetInvestCFTTM
 45 | ::python single_factor_test.py NetOperateCFTTM
 46 | ::python single_factor_test.py NetFinanceCFTTM
 47 | ::python single_factor_test.py SaleServiceRenderCashTTM
 48 | ::python single_factor_test.py NIAP
 49 | ::python single_factor_test.py COperAdelpct
 50 | ::python single_factor_test.py COperAdelQpct
 51 | ::python single_factor_test.py COperApct
 52 | ::python single_factor_test.py COperATTMpct
 53 | ::python single_factor_test.py COperDdelpct
 54 | ::python single_factor_test.py COperDdelQpct
 55 | ::python single_factor_test.py COperDpct
 56 | ::python single_factor_test.py COperDTTMpct
 57 | ::python single_factor_test.py COnonperDdelpct
 58 | ::python single_factor_test.py COnonperDdelQpct
 59 | ::python single_factor_test.py COnonperDpct
 60 | ::python single_factor_test.py COnonperDTTMpct
 61 | ::python single_factor_test.py COnonperAdelpct
 62 | ::python single_factor_test.py COnonperAdelQpct
 63 | ::python single_factor_test.py COnonperApct
 64 | ::python single_factor_test.py COnonperATTMpct
 65 | 
 66 | 
 67 | 
 68 | :: 质量类（全部） ---------------------------------------------
 69 | ::python single_factor_test.py DebtEquityRatio
 70 | ::python single_factor_test.py SuperQuickRatio
 71 | ::python single_factor_test.py NonCurrentAssetsRatio
 72 | ::python single_factor_test.py EquityToAsset
 73 | ::python single_factor_test.py EquityFixedAssetRatio
 74 | ::python single_factor_test.py FixAssetRatio
 75 | ::python single_factor_test.py CurrentRatio
 76 | ::python single_factor_test.py CurrentAssetsRatio
 77 | ::python single_factor_test.py QuickRatio
 78 | ::python single_factor_test.py IntangibleAssetRatio
 79 | ::python single_factor_test.py BondsPayableToAsset
 80 | ::python single_factor_test.py DebtsAssetRatio
 81 | ::python single_factor_test.py LongDebtToWorkingCapital
 82 | ::python single_factor_test.py LongTermDebtToAsset
 83 | ::python single_factor_test.py LongDebtToAsset
 84 | ::python single_factor_test.py BLEV
 85 | ::python single_factor_test.py DebtTangibleEquityRatio
 86 | ::python single_factor_test.py CashToCurrentLiability
 87 | ::python single_factor_test.py OperCashInToCurrentLiability
 88 | ::python single_factor_test.py CurrentAssetsTRate
 89 | ::python single_factor_test.py AccountsPayablesTRate
 90 | ::python single_factor_test.py ROA
 91 | ::python single_factor_test.py NOCFToTLiability
 92 | ::python single_factor_test.py OperCashInToAsset
 93 | ::python single_factor_test.py MLEV
 94 | ::python single_factor_test.py TSEPToTotalCapital
 95 | ::python single_factor_test.py TotalAssetsTRate
 96 | ::python single_factor_test.py EquityTRate
 97 | ::python single_factor_test.py FinancialExpenseRate
 98 | ::python single_factor_test.py TotalProfitCostRatio
 99 | ::python single_factor_test.py AdminiExpenseRate
100 | ::python single_factor_test.py NPToTOR
101 | ::python single_factor_test.py SalesCostRatio
102 | ::python single_factor_test.py NetProfitRatio
103 | ::python single_factor_test.py GrossIncomeRatio
104 | ::python single_factor_test.py TaxRatio
105 | ::python single_factor_test.py OperatingExpenseRate
106 | ::python single_factor_test.py OperatingProfitRatio
107 | ::python single_factor_test.py OperatingProfitToTOR
108 | ::python single_factor_test.py EBITToTOR
109 | ::python single_factor_test.py NetNonOIToTP
110 | ::python single_factor_test.py ROAEBITTTMROE
111 | ::python single_factor_test.py ROE
112 | ::python single_factor_test.py InventoryTRate
113 | ::python single_factor_test.py FixedAssetsTRate
114 | ::python single_factor_test.py NOCFToOperatingNI
115 | ::python single_factor_test.py CashRateOfSales
116 | ::python single_factor_test.py SaleServiceCashToOR
117 | ::python single_factor_test.py SalesServiceCashToORLatest
118 | ::python single_factor_test.py CashRateOfSalesLatest
119 | ::python single_factor_test.py NetNonOIToTPLatest
120 | ::python single_factor_test.py PeriodCostsRate
121 | ::python single_factor_test.py InvestRAssociatesToTP
122 | ::python single_factor_test.py InvestRAssociatesToTPLatest
123 | ::python single_factor_test.py DividendCover
124 | ::python single_factor_test.py OperatingNIToTPLatest
125 | ::python single_factor_test.py NPCutToNP
126 | ::python single_factor_test.py OperatingNIToTP
127 | ::python single_factor_test.py DividendPaidRatio
128 | ::python single_factor_test.py RetainedEarningRatio
129 | ::python single_factor_test.py DEGM
130 | ::python single_factor_test.py ACCA
131 | ::python single_factor_test.py CFO2EV
132 | ::python single_factor_test.py NOCFToOperatingNILatest
133 | ::python single_factor_test.py NOCFToNetDebt
134 | ::python single_factor_test.py NetProfitCashCover
135 | ::python single_factor_test.py InventoryTDays
136 | ::python single_factor_test.py OperatingCycle
137 | ::python single_factor_test.py AccountsPayablesTDays
138 | ::python single_factor_test.py ARTRate
139 | ::python single_factor_test.py ARTDays
140 | ::python single_factor_test.py CashConversionCycle
141 | ::python single_factor_test.py InteBearDebtToTotalCapital
142 | ::python single_factor_test.py TangibleAToInteBearDebt
143 | ::python single_factor_test.py TangibleAToNetDebt
144 | ::python single_factor_test.py TSEPToInterestBearDebt
145 | ::python single_factor_test.py NOCFToInterestBearDebt
146 | ::python single_factor_test.py InterestCover
147 | ::python single_factor_test.py ROIC
148 | ::python single_factor_test.py ROEDiluted
149 | ::python single_factor_test.py ROEAvg
150 | ::python single_factor_test.py ROECut
151 | ::python single_factor_test.py ROECutWeighted
152 | ::python single_factor_test.py ROEWeighted
153 | ::python single_factor_test.py ROAEBIT
154 | ::python single_factor_test.py ROE5
155 | ::python single_factor_test.py ROA5
156 | 
157 | 
158 | 
159 | :: 收益风险类（部分） ------------------------------------
160 | :: 60日方差
161 | ::python single_factor_test.py Variance60
162 | :: 股价偏度
163 | ::python single_factor_test.py Skewness20
164 | :: 历史波动
165 | ::python single_factor_test.py HSIGMA
166 | :: 20日信息比率
167 | ::python single_factor_test.py InformationRatio20
168 | :: 20日夏普率
169 | ::python single_factor_test.py Sharperatio20
170 | :: 超额收益标准差
171 | ::python single_factor_test.py DASTD
172 | :: 股权向后复权因子
173 | ::python single_factor_test.py BackwardADJ
174 | :: 个股收益的120日峰度
175 | ::python single_factor_test.py Kurtosis120
176 | :: 个股收益的20日峰度
177 | ::python single_factor_test.py Kurtosis20
178 | :: 下跌贝塔
179 | ::python single_factor_test.py DDNBT
180 | :: 下跌相关系数
181 | ::python single_factor_test.py DDNCR
182 | :: 下跌波动
183 | ::python single_factor_test.py DDNSR
184 | :: 60日信息比率
185 | ::python single_factor_test.py InformationRatio60
186 | :: 60日夏普率
187 | ::python single_factor_test.py Sharperatio60
188 | 
189 | 
190 | 
191 | :: 情绪类（部分） --------------------------------
192 | :: 20日成交金额的移动平均值
193 | ::python single_factor_test.py TVMA20
194 | :: 20日平均换手率
195 | ::python single_factor_test.py VOL20
196 | :: 20日成交量标准差
197 | ::python single_factor_test.py VSTD20
198 | :: 正成交量指标
199 | ::python single_factor_test.py PVI
200 | :: 成交量比率
201 | ::python single_factor_test.py VR
202 | :: 20日资金流量
203 | ::python single_factor_test.py MONEYFLOW20
204 | :: 20日收集派发指标
205 | ::python single_factor_test.py ACD20
206 | :: 人气指标
207 | ::python single_factor_test.py AR
208 | :: 20日能量潮指标
209 | ::python single_factor_test.py OBV20
210 | :: 阶段强势指标
211 | ::python single_factor_test.py JDQS20
212 | :: 资本利得突出量
213 | ::python single_factor_test.py CGO_10
214 | :: 显著性因子 20
215 | ::python single_factor_test.py ST_20
216 | :: 综合效用因子 20
217 | ::python single_factor_test.py TK_20
218 | :: 抢跑因子
219 | ::python single_factor_test.py FR_pure
220 | 
221 | 
222 | 
223 | ::成长类因子（部分） --------------------------------
224 | ::python single_factor_test.py OperatingRevenueGrowRate
225 | ::python single_factor_test.py TotalAssetGrowRate
226 | ::python single_factor_test.py EGRO
227 | ::python single_factor_test.py FinancingCashGrowRate
228 | ::python single_factor_test.py NPParentCompanyGrowRate
229 | ::python single_factor_test.py NetProfitGrowRate
230 | ::python single_factor_test.py SGRO
231 | ::python single_factor_test.py TotalProfitGrowRate
232 | 
233 | 
234 | 
235 | ::常用技术指标因子（部分） --------------------------------
236 | ::python single_factor_test.py MA10
237 | ::python single_factor_test.py MA120
238 | ::python single_factor_test.py MTM
239 | ::python single_factor_test.py DBCD
240 | ::python single_factor_test.py EMA12
241 | ::python single_factor_test.py CR20
242 | ::python single_factor_test.py UOS
243 | ::python single_factor_test.py CHAIKINVOLATILITY
244 | 
245 | 
246 | 
247 | ::动量类因子（部分） --------------------------------
248 | ::python single_factor_test.py BIAS20
249 | ::python single_factor_test.py CMO
250 | ::python single_factor_test.py PVT
251 | ::python single_factor_test.py CCI5
252 | ::python single_factor_test.py SRMI
253 | ::python single_factor_test.py CMOSD
254 | ::python single_factor_test.py BEARPOWER
255 | ::python single_factor_test.py AD
256 | 
257 | 
258 | 
259 | ::价值类因子（全部） -----------------------------------
260 | ::python single_factor_test.py NegMktValue
261 | ::python single_factor_test.py PE
262 | ::python single_factor_test.py PB
263 | ::python single_factor_test.py PS
264 | ::python single_factor_test.py MktValue
265 | ::python single_factor_test.py PCF
266 | ::python single_factor_test.py LFLO
267 | ::python single_factor_test.py LCAP
268 | ::python single_factor_test.py NLSIZE
269 | ::python single_factor_test.py ForwardPE
270 | ::python single_factor_test.py StaticPE
271 | ::python single_factor_test.py ETOP
272 | ::python single_factor_test.py CETOP
273 | ::python single_factor_test.py PEG3Y
274 | ::python single_factor_test.py PEG5Y
275 | ::python single_factor_test.py CTOP
276 | ::python single_factor_test.py TA2EV
277 | ::python single_factor_test.py ETP5
278 | ::python single_factor_test.py CTP5
279 | 
280 | 
281 | :: 模式识别类（部分） -----------------------------------
282 | :: 藏婴吞没（CDLCONCEALBABYSWALL）
283 | ::python single_factor_test.py CDLCONCEALBABYSWALL
284 | :: 射击之星（CDLSHOOTINGSTAR）
285 | ::python single_factor_test.py CDLSHOOTINGSTAR
286 | :: 十字暮星（CDLEVENINGDOJISTAR）
287 | ::python single_factor_test.py CDLEVENINGDOJISTAR
288 | :: 吞噬模式（CDLENGULFING）
289 | ::python single_factor_test.py CDLENGULFING
290 | :: 刺透形态（CDLPIERCING）
291 | ::python single_factor_test.py CDLPIERCING
292 | :: 倒锤头（CDLINVERTEDHAMMER）
293 | ::python single_factor_test.py CDLINVERTEDHAMMER
294 | 
295 | 
296 | :: 每股指标类（部分） ----------------------------------
297 | :: 基本每股收益（BasicEPS）
298 | ::python single_factor_test.py BasicEPS
299 | :: 每股收益TTM值（EPS）
300 | ::python single_factor_test.py EPS
301 | :: 每股净资产（NetAssetPS）
302 | ::python single_factor_test.py NetAssetPS
303 | :: 每股营业总收入（TORPS）
304 | ::python single_factor_test.py TORPS
305 | :: 每股营业利润（OperatingProfitPS）
306 | ::python single_factor_test.py OperatingProfitPS
307 | :: 每股息税前利润（EBITPS）
308 | ::python single_factor_test.py EBIPTS
309 | :: 每股现金流量净额（CashFlowPS）
310 | ::python single_factor_test.py CashFlowPS
311 | :: 每股企业自由现金流量（EnterpriseFCFPS）
312 | ::python single_factor_test.py EnterpriseFCFPS
313 | 
314 | 
315 | :: 行业与分析师类（部分） ------------------------------
316 | :: 12月相对强势(RSTR12)
317 | ::python single_factor_test.py RSTR12
318 | :: 24月相对强势(RSTR24)
319 | ::python single_factor_test.py RSTR24
320 | :: 分析师盈利预测（FY12P）
321 | ::python single_factor_test.py FY12P
322 | :: 分析师营收预测（SFY12P）
323 | ::python single_factor_test.py SFY12P
324 | :: （PB–PB的行业均值）/PB的行业标准差（PBIndu）
325 | ::python single_factor_test.py PBIndu
326 | :: PCF–PCF的行业均值）/PCF的行业标准差（PCFIndu）
327 | ::python single_factor_test.py PCFIndu
328 | ::（PE–PE的行业均值）/PE的行业标准差（PEIndu）
329 | ::python single_factor_test.py PEIndu
330 | :: （PS–PS的行业均值）/PS的行业标准差（PSIndu）
331 | ::python single_factor_test.py PSIndu
332 | :: 投资回报率预测（EPIBS）
333 | ::python single_factor_test.py EPIBS
334 | :: 未来预期盈利增长（FEARNG）
335 | ::python single_factor_test.py FEARNG
336 | :: 未来预期盈收增长（FSALESG）
337 | ::python single_factor_test.py FSALESG
338 | :: 长期盈利增长预测（EgibsLong）
339 | ::python single_factor_test.py EgibsLong
340 | 
341 | ::python get_factor_report.py 行业与分析师类
342 | 
343 | :: 特色技术指标（部分） --------------------------------
344 | :: 绝对价格振荡器(APO)
345 | ::python single_factor_test.py APO
346 | :: 平均价格(AVGPRICE)
347 | ::python single_factor_test.py AVGPRICE
348 | :: 均势指标(BOP)
349 | ::python single_factor_test.py BOP
350 | :: 考夫曼自适应移动平均线（KAMA）
351 | ::python single_factor_test.py KAMA
352 | :: 线性回归（LINEARREG）
353 | ::python single_factor_test.py LINEARREG
354 | :: 标准差(STDDEV)
355 | ::python single_factor_test.py STDDEV
356 | :: 时间序列预测（TSF）
357 | ::python single_factor_test.py TSF


--------------------------------------------------------------------------------
/same_weight_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20/PVI
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit/NIAP
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：AD
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | 
 20 | ----------------------------------------------------------
 21 | """
 22 | from atrader import *
 23 | import pandas as pd
 24 | import numpy as np
 25 | from sklearn.ensemble import RandomForestRegressor
 26 | import math
 27 | from sklearn import preprocessing
 28 | import datetime
 29 | 
 30 | # 作为全局变量进行测试
 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 32 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 33 |               'AD', 'TotalAssetGrowRate', 'MA120']
 34 | 
 35 | # 中位数去极值法
 36 | def filter_MAD(df, factor, n=3):
 37 |     """
 38 |     :param df: 去极值的因子序列
 39 |     :param factor: 待去极值的因子
 40 |     :param n: 中位数偏差值的上下界倍数
 41 |     :return: 经过处理的因子dataframe
 42 |     """
 43 |     median = df[factor].quantile(0.5)
 44 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 45 |     max_range = median + n * new_median
 46 |     min_range = median - n * new_median
 47 | 
 48 |     for i in range(df.shape[0]):
 49 |         if df.loc[i, factor] > max_range:
 50 |             df.loc[i, factor] = max_range
 51 |         elif df.loc[i, factor] < min_range:
 52 |             df.loc[i, factor] = min_range
 53 |     return df
 54 | 
 55 | 
 56 | def init(context):
 57 |     # 账号设置：设置初始资金为 10000000 元
 58 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 59 |                  price_loc=1, deal_type=0, limit_type=0)
 60 |     # 注册数据：日频数据
 61 |     reg_kdata('day', 1)
 62 |     global FactorCode  # 全局单因子代号
 63 |     reg_factor(factor=FactorCode)
 64 | 
 65 |     context.FactorCode = FactorCode  #
 66 | 
 67 |     # 超参数设置：
 68 |     context.Len = 21  # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 69 |     context.Num = 0  # 记录当前交易日个数
 70 | 
 71 |     # 较敏感的超参数，需要调节
 72 |     context.upper_pos = 85  # 股票预测收益率的上分位数，高于则买入
 73 |     context.down_pos = 10   # 股票预测收益率的下分位数，低于则卖出
 74 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 75 | 
 76 |     # 确保月初调仓
 77 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 78 |     months = np.vectorize(lambda x: x.month)(days)
 79 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 80 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 81 | 
 82 | 
 83 | def on_data(context):
 84 |     context.Num = context.Num + 1
 85 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 86 |         return
 87 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 88 |         return
 89 | 
 90 |     # 获取数据：
 91 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 92 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 93 |                            df=True)  # 获取因子数据
 94 | 
 95 |     # 特征构建：
 96 |     Fcode = context.FactorCode  # 标签不需要代号了
 97 | 
 98 |     # 数据存储变量：
 99 |     # Close 字段为标签，Fcode 为标签
100 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
101 | 
102 |     # K线数据序号对齐
103 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
104 | 
105 |     # 按标的处理数据：
106 |     for i in range(300):
107 |         # 存储预测特征样本
108 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
109 |         # 因子数据 序号对齐, 提取当前标的的因子数据
110 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
111 |         # 预测特征集构建：建立标的索引
112 |         FactorDataTest0['idx'] = tempIdx[i]
113 |         # 按特征处理数据，过程同建立训练特征
114 |         for FC in context.FactorCode:
115 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
116 |             FactorDataTest0[FC] = FCData[context.Len - 1]
117 | 
118 |         # 合并测试数据
119 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
120 | 
121 |     """
122 |     训练集和测试集的表头字段如下
123 |     FactorData DataFrame:
124 |     idx  |  benefit |  Factor 1 | Factor 2| ....
125 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
126 |     FactorDataTest DataFrame: 
127 |     idx | Factor 1 | Factor 2 | ...
128 |     本月初的因子作为预测特征
129 |     """
130 | 
131 |     # 数据清洗：
132 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
133 |     Idx = FactorDataTest['idx']  # 剩余标的序号
134 | 
135 |     # 按特征进行预处理
136 |     for Factor in context.FactorCode:
137 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
138 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
139 | 
140 |     # print(FactorData.head(1))
141 |     # print(FactorDataTest.head(1))
142 | 
143 |     # 预测特征构建：# 行（样本数）* 列（特征数）
144 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
145 | 
146 |     # 循环填充特征到numpy数组中
147 |     for i in range(Xtest.shape[1]):
148 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
149 | 
150 |     y = np.average(Xtest, axis=1) / len(Fcode)  # 对每一行的因子序列取均值
151 | 
152 |     # 交易设置：
153 |     positions = context.account().positions['volume_long']  # 多头持仓数量
154 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
155 | 
156 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
157 | 
158 |     # 获取收益率的高分位数和低分位数
159 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
160 | 
161 |     for i in range(len(Idx)):
162 |         position = positions.iloc[Idx[i]]
163 |         # if position == 0 and y[i] == True and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
164 |             # print('开仓')
165 |         if position == 0 and y[i] > high_return and valid_cash > 0 and y[i] > 0:  # 当前无仓，且该股票收益大于高70%分位数，则开仓，买入
166 |             # 开仓数量 + 1防止分母为0
167 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
168 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
169 | 
170 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
171 |             if Num < 1000:
172 |                 Num *= 10
173 |             if Num > 100000:
174 |                 Num = int(Num / 10)
175 |                 Num -= Num % 100
176 |             if Num <= 0:  # 不开仓
177 |                 continue
178 | 
179 |             print("开仓数量为：{}".format(Num))
180 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
181 |                          price=0)  # 指定委托量开仓
182 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
183 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
184 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
185 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
186 |             print("平仓，数量为: {}".format(position / 10))
187 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
188 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
189 | 
190 | 
191 | if __name__ == '__main__':
192 | 
193 |     file_path = 'same_weight_model.py'
194 |     block = 'hs300'
195 | 
196 |     begin_date = '2016-01-01'
197 |     end_date = '2018-09-30'
198 | 
199 |     strategy_name = 'same-weight-model'
200 | 
201 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
202 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
203 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
204 | 


--------------------------------------------------------------------------------
/single_factor_test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | -------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：待测单因子
  7 | 4. 单因子回归测试模型思路：
  8 |     1. 先获得 21 天以上的K线数据和因子数据，预处理
  9 |     2. 使用上月初因子和上月收益率进行线性回归
 10 |     3. 使用单变量线性模型进行训练
 11 |     4. 回到当前时间点，使用本月初的因子作为预测样本特征，预测本月的各股票平均收益率的大小。
 12 | 5. 选股逻辑：
 13 |     将符合预测结果的股票按均等分配可用资金进行下单交易。持有一个月后 ，再次进行调仓，训练预测。
 14 | 6. 交易逻辑：
 15 |     每次调仓时，若当前有持仓，并且符合选股条件，则仓位不动；
 16 |                               若不符合选股条件，则对收益低的标的进行仓位平仓；
 17 |                 若当前无仓，并且符合选股条件，则多开仓，对收益高的标的进行开仓；
 18 |                             若不符合选股条件，则不开仓，无需操作。
 19 | 
 20 | ---------------------------------------------------------
 21 | 运行方法：
 22 | 1. 在 main 中定义同一类的因子列表。
 23 | 2. 逐个因子执行回测。
 24 | 3. 获取回测报告ID，通过ID获取绩效报告字段。
 25 | 4. 保留字段到CSV文件中。
 26 | """
 27 | 
 28 | from atrader import *
 29 | import pandas as pd
 30 | import numpy as np
 31 | from sklearn.linear_model import LinearRegression
 32 | import math
 33 | from sklearn import preprocessing
 34 | import datetime
 35 | import sys
 36 | 
 37 | # 作为全局变量进行测试
 38 | factor = sys.argv[1]
 39 | FactorCode = [factor]
 40 | print("传入因子参数为" + factor)
 41 | 
 42 | 
 43 | # 中位数去极值法
 44 | def filter_MAD(df, factor, n=3):
 45 |     """
 46 |     :param df: 去极值的因子序列
 47 |     :param factor: 待去极值的因子
 48 |     :param n: 中位数偏差值的上下界倍数
 49 |     :return: 经过处理的因子dataframe
 50 |     """
 51 |     median = df[factor].quantile(0.5)
 52 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 53 |     max_range = median + n * new_median
 54 |     min_range = median - n * new_median
 55 | 
 56 |     for i in range(df.shape[0]):
 57 |         if df.loc[i, factor] > max_range:
 58 |             df.loc[i, factor] = max_range
 59 |         elif df.loc[i, factor] < min_range:
 60 |             df.loc[i, factor] = min_range
 61 |     return df
 62 | 
 63 | 
 64 | def init(context):
 65 |     # 账号设置：设置初始资金为 10000000 元
 66 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 67 |                  price_loc=1, deal_type=0, limit_type=0)
 68 |     # 注册数据：日频数据
 69 |     reg_kdata('day', 1)
 70 |     global FactorCode  # 全局单因子代号
 71 |     reg_factor(factor=FactorCode)
 72 |     print("init 函数, 注册因子为{}".format(FactorCode[0]))
 73 |     context.FactorCode = FactorCode  #
 74 | 
 75 |     # 超参数设置：
 76 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 77 |     context.Num = 0   # 记录当前交易日个数
 78 | 
 79 |     # 较敏感的超参数，需要调节
 80 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 81 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 82 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 83 | 
 84 |     # 确保月初调仓
 85 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 86 |     months = np.vectorize(lambda x: x.month)(days)
 87 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 88 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 89 | 
 90 |     
 91 | 
 92 | def on_data(context):
 93 |     context.Num = context.Num + 1
 94 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 95 |         return
 96 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 97 |         return
 98 | 
 99 |     # 获取数据：
100 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
101 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
102 |                            df=True)  # 获取因子数据
103 | 
104 |     # 特征构建：
105 |     Fcode = context.FactorCode  # 标签不需要代号了
106 | 
107 |     # 数据存储变量：
108 |     # Close 字段为标签，Fcode 为标签
109 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
110 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
111 | 
112 |     # K线数据序号对齐
113 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
114 | 
115 |     # 按标的处理数据：
116 |     for i in range(300):
117 |         # 训练特征集及训练标签构建：
118 |         # 临时数据存储变量:
119 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
120 |             columns=(['idx', 'benefit'] + Fcode))
121 |         # 存储预测特征样本
122 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
123 | 
124 |         # 因子数据 序号对齐, 提取当前标的的因子数据
125 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
126 | 
127 |         # 按特征处理数据：
128 |         for FC in context.FactorCode:
129 |             # 提取当前标的中与当前因子FC相同的部分
130 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
131 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
132 | 
133 |         # 按标签处理数据：
134 |         # 提取当前标的的前一个月的K线面板数据
135 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
136 |         # 计算当前标的在上一个月的收益率
137 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
138 | 
139 |         FactorData0['benefit'] = benefit
140 |         # idx: 建立当前标的在训练样本集中的索引
141 |         FactorData0['idx'] = tempIdx[i]
142 |         # 合并数据：组成训练样本
143 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
144 | 
145 |         # 预测特征集构建：建立标的索引
146 |         FactorDataTest0['idx'] = tempIdx[i]
147 |         # 按特征处理数据，过程同建立训练特征
148 |         for FC in context.FactorCode:
149 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
150 |             FactorDataTest0[FC] = FCData[context.Len - 1]
151 | 
152 |         # 合并测试数据
153 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
154 | 
155 |     """
156 |     训练集和测试集的表头字段如下
157 |     FactorData DataFrame:
158 |     idx  |  benefit |  Factor 1 | Factor 2| ....
159 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
160 |     FactorDataTest DataFrame: 
161 |     idx | Factor 1 | Factor 2 | ...
162 |     本月初的因子作为预测特征
163 |     """
164 | 
165 |     # 数据清洗：
166 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
167 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
168 |     Idx = FactorDataTest['idx']  # 剩余标的序号
169 | 
170 |     # 按特征进行预处理
171 |     for Factor in context.FactorCode:
172 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
173 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
174 | 
175 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
176 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
177 | 
178 |     # print(FactorData.head(1))
179 |     # print(FactorDataTest.head(1))
180 | 
181 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
182 |     X = np.ones([FactorData.shape[0], len(Fcode)])
183 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
184 | 
185 |     # 循环填充特征到numpy数组中
186 |     for i in range(X.shape[1]):
187 |         X[:, i] = FactorData[Fcode[i]]
188 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
189 | 
190 |     # 训练样本的标签，为浮点数的收益率
191 |     Y = np.array(FactorData['benefit']).astype(float)
192 | 
193 |     # 构建模型：
194 |     LRModel = LinearRegression(normalize=True)
195 | 
196 |     # 模型训练：
197 |     LRModel.fit(X, Y)
198 | 
199 |     # LR分类预测：
200 |     y = LRModel.predict(Xtest)
201 | 
202 |     # 交易设置：
203 |     positions = context.account().positions['volume_long']  # 多头持仓数量
204 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
205 | 
206 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
207 | 
208 |     # 获取收益率的高分位数和低分位数
209 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
210 | 
211 |     for i in range(len(Idx)):
212 |         position = positions.iloc[Idx[i]]
213 |         if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓，且该股票收益大于高80%分位数，则开仓，买入
214 |             # 开仓数量 + 1防止分母为0
215 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
216 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
217 | 
218 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
219 |             if Num < 1000:
220 |                 Num *= 10
221 |             if Num > 100000:
222 |                 Num = int(Num / 10)
223 |                 Num -= Num % 100
224 |             if Num <= 0:  # 不开仓
225 |                 continue
226 | 
227 |             print("开仓数量为：{}".format(Num))
228 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
229 |                          price=0)  # 指定委托量开仓
230 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
231 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
232 | 
233 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低60%分位数，则平仓，卖出
234 |             #print("平仓")
235 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position), side=2, position_effect=2,
236 |                          order_type=2, price=0)  # 指定委托量平仓
237 | 
238 | 
239 | if __name__ == '__main__':
240 | 
241 |     file_path = 'single_factor_test.py'
242 |     block = 'hs300'
243 | 
244 |     begin_date = '2016-01-01'
245 |     end_date = '2018-09-30'
246 | 
247 |     strategy_name = factor
248 | 
249 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
250 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
251 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
252 | 
253 | 


--------------------------------------------------------------------------------
/svm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：TotalAssetGrowRate
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | ----------------------------------------------------------
 20 | """
 21 | from atrader import *
 22 | import pandas as pd
 23 | import numpy as np
 24 | from sklearn import svm
 25 | import math
 26 | from sklearn import preprocessing
 27 | import datetime
 28 | from sklearn.decomposition import PCA
 29 | from sklearn import linear_model
 30 | from sklearn.preprocessing import OneHotEncoder
 31 | from sklearn.ensemble import GradientBoostingRegressor
 32 | 
 33 | # 作为全局变量进行测试
 34 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 35 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 36 |               'AD', 'TotalAssetGrowRate', 'MA120']
 37 | 
 38 | 
 39 | # 中位数去极值法
 40 | def filter_MAD(df, factor, n=3):
 41 |     """
 42 |     :param df: 去极值的因子序列
 43 |     :param factor: 待去极值的因子
 44 |     :param n: 中位数偏差值的上下界倍数
 45 |     :return: 经过处理的因子dataframe
 46 |     """
 47 |     median = df[factor].quantile(0.5)
 48 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 49 |     max_range = median + n * new_median
 50 |     min_range = median - n * new_median
 51 | 
 52 |     for i in range(df.shape[0]):
 53 |         if df.loc[i, factor] > max_range:
 54 |             df.loc[i, factor] = max_range
 55 |         elif df.loc[i, factor] < min_range:
 56 |             df.loc[i, factor] = min_range
 57 |     return df
 58 | 
 59 | 
 60 | def init(context):
 61 |     # 账号设置：设置初始资金为 10000000 元
 62 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 63 |                  price_loc=1, deal_type=0, limit_type=0)
 64 |     # 注册数据：日频数据
 65 |     reg_kdata('day', 1)
 66 |     global FactorCode  # 全局单因子代号
 67 |     reg_factor(factor=FactorCode)
 68 |     context.FactorCode = FactorCode  #
 69 | 
 70 |     # 超参数设置：
 71 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 72 |     context.Num = 0   # 记录当前交易日个数
 73 | 
 74 |     # 较敏感的超参数，需要调节
 75 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 76 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 77 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 78 | 
 79 |     # 确保月初调仓
 80 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 81 |     months = np.vectorize(lambda x: x.month)(days)
 82 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 83 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 84 | 
 85 | 
 86 | def on_data(context):
 87 |     context.Num = context.Num + 1
 88 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 89 |         return
 90 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 91 |         return
 92 | 
 93 |     # 获取数据：
 94 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 95 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 96 |                            df=True)  # 获取因子数据
 97 | 
 98 |     # 特征构建：
 99 |     Fcode = context.FactorCode  # 标签不需要代号了
100 | 
101 |     # 数据存储变量：
102 |     # Close 字段为标签，Fcode 为标签
103 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
104 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
105 | 
106 |     # K线数据序号对齐
107 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
108 | 
109 |     # 按标的处理数据：
110 |     for i in range(300):
111 |         # 训练特征集及训练标签构建：
112 |         # 临时数据存储变量:
113 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
114 |             columns=(['idx', 'benefit'] + Fcode))
115 |         # 存储预测特征样本
116 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
117 | 
118 |         # 因子数据 序号对齐, 提取当前标的的因子数据
119 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
120 | 
121 |         # 按特征处理数据：
122 |         for FC in context.FactorCode:
123 |             # 提取当前标的中与当前因子FC相同的部分
124 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
125 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
126 | 
127 |         # 按标签处理数据：
128 |         # 提取当前标的的前一个月的K线面板数据
129 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
130 |         # 计算当前标的在上一个月的收益率
131 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
132 | 
133 |         FactorData0['benefit'] = benefit
134 |         # idx: 建立当前标的在训练样本集中的索引
135 |         FactorData0['idx'] = tempIdx[i]
136 |         # 合并数据：组成训练样本
137 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
138 | 
139 |         # 预测特征集构建：建立标的索引
140 |         FactorDataTest0['idx'] = tempIdx[i]
141 |         # 按特征处理数据，过程同建立训练特征
142 |         for FC in context.FactorCode:
143 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
144 |             FactorDataTest0[FC] = FCData[context.Len - 1]
145 | 
146 |         # 合并测试数据
147 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
148 | 
149 |     """
150 |     训练集和测试集的表头字段如下
151 |     FactorData DataFrame:
152 |     idx  |  benefit |  Factor 1 | Factor 2| ....
153 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
154 |     FactorDataTest DataFrame: 
155 |     idx | Factor 1 | Factor 2 | ...
156 |     本月初的因子作为预测特征
157 |     """
158 | 
159 |     # 数据清洗：
160 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
161 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
162 |     Idx = FactorDataTest['idx']  # 剩余标的序号
163 | 
164 |     # 按特征进行预处理
165 |     for Factor in context.FactorCode:
166 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
167 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
168 | 
169 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
170 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
171 | 
172 |     # print(FactorData.head(1))
173 |     # print(FactorDataTest.head(1))
174 | 
175 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
176 |     X = np.ones([FactorData.shape[0], len(Fcode)])
177 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
178 | 
179 |     # 循环填充特征到numpy数组中
180 |     for i in range(X.shape[1]):
181 |         X[:, i] = FactorData[Fcode[i]]
182 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
183 | 
184 |     # 训练样本的标签，为浮点数的收益率
185 |     Y = (np.array(FactorData['benefit']).astype(float) > 0)
186 | 
187 |     SVM = svm.SVR(gamma='scale')
188 | 
189 |     gbr = GradientBoostingRegressor()
190 |     gbr.fit(X, Y)
191 |     enc = OneHotEncoder()
192 |     enc.fit(gbr.apply(X))
193 | 
194 |     new_X = enc.transform(gbr.apply(X))
195 |     new_X = new_X.toarray()
196 | 
197 |     X = new_X
198 | 
199 |     new_Xtest = enc.transform(gbr.apply(Xtest))
200 |     new_Xtest = new_Xtest.toarray()
201 |     Xtest = new_Xtest
202 | 
203 |     # 模型训练：
204 |     SVM.fit(X, Y)
205 | 
206 |     # LR分类预测：
207 |     y = SVM.predict(Xtest)
208 |     # 交易设置：
209 |     positions = context.account().positions['volume_long']  # 多头持仓数量
210 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
211 | 
212 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
213 | 
214 |     # 获取收益率的高分位数和低分位数
215 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
216 | 
217 |     for i in range(len(Idx)):
218 |         position = positions.iloc[Idx[i]]
219 |         #if position == 0 and y[i] == True and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
220 |             # print('开仓')
221 |         if position == 0 and y[i] > high_return and valid_cash > 0:
222 |             # 开仓数量 + 1防止分母为0
223 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
224 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
225 | 
226 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
227 |             if Num < 1000:
228 |                 Num *= 10
229 |             if Num > 100000:
230 |                 Num = int(Num / 10)
231 |                 Num -= Num % 100
232 |             if Num <= 0:  # 不开仓
233 |                 continue
234 | 
235 |             print("开仓数量为：{}".format(Num))
236 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
237 |                          price=0)  # 指定委托量开仓
238 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
239 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
240 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
241 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
242 |             print("平仓，数量为: {}".format(position / 10))
243 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
244 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
245 | 
246 | 
247 | if __name__ == '__main__':
248 | 
249 |     file_path = 'svm.py'
250 |     block = 'hs300'
251 | 
252 |     begin_date = '2016-01-01'
253 |     end_date = '2018-09-30'
254 | 
255 |     strategy_name = 'svm'
256 | 
257 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
258 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
259 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
260 | 


--------------------------------------------------------------------------------
/time_roll_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS TA2EV
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense
 14 |     - 行业分析师：FY12P
 15 |     - 成长类：TotalAssetGrowRate
 16 |     - 常用技术类：MA120
 17 | ... 其余逻辑参照single_factor_test.py
 18 | ----------------------------------------------------------
 19 | 
 20 | 时间窗口滚动模型：
 21 | 在原来的基础上增加了滚动选项。
 22 | 原来的时间窗口固定为一个，即前20天为一个时间窗口。
 23 | 现在支持时间窗口向前滚动获取数据，有：时间窗口第一天的因子值，时间窗口内各股票的平均收益率；
 24 | 
 25 | """
 26 | from atrader import *
 27 | import pandas as pd
 28 | import numpy as np
 29 | from sklearn import svm
 30 | import math
 31 | from sklearn import preprocessing
 32 | import datetime
 33 | from xgboost.sklearn import XGBRegressor
 34 | from sklearn.ensemble import RandomForestRegressor
 35 | 
 36 | # 中位数去极值法
 37 | def filter_MAD(df, factor, n=3):
 38 |     """
 39 |     :param df: 去极值的因子序列
 40 |     :param factor: 待去极值的因子
 41 |     :param n: 中位数偏差值的上下界倍数
 42 |     :return: 经过处理的因子dataframe
 43 |     """
 44 |     median = df[factor].quantile(0.5)
 45 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 46 |     max_range = median + n * new_median
 47 |     min_range = median - n * new_median
 48 | 
 49 |     for i in range(df.shape[0]):
 50 |         if df.loc[i, factor] > max_range:
 51 |             df.loc[i, factor] = max_range
 52 |         elif df.loc[i, factor] < min_range:
 53 |             df.loc[i, factor] = min_range
 54 |     return df
 55 | 
 56 | 
 57 | def init(context):
 58 | 
 59 |     # 账号设置：设置初始资金为 10000000 元
 60 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 61 |                  price_loc=1, deal_type=0, limit_type=0)
 62 | 
 63 |     # 注册数据：日频数据
 64 |     reg_kdata('day', 1)
 65 |     FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 66 |                   'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 67 |                   'AD', 'TotalAssetGrowRate', 'MA120']
 68 |     reg_factor(factor=FactorCode)
 69 |     context.FactorCode = FactorCode
 70 | 
 71 |     # 参数设置：
 72 |     context.LEN = 21   # 时间窗口滑动最大范围
 73 |     context.N1 = 20    # 时间窗口中的训练/预测特征部分
 74 |     context.Num = 0    # 记录当前交易日个数，保证交易日个数需要大于时间窗口滑动的最大范围
 75 | 
 76 |     # 较敏感的超参数，需要调节
 77 |     context.upper_pos = 80   # 股票预测收益率的上分位数，高于则买入
 78 |     context.down_pos = 20    # 股票预测收益率的下分位数，低于则卖出
 79 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，
 80 | 
 81 |     # 确保月初调仓
 82 |     days = get_trading_days('SZSE', '2016-01-01', '2018-09-30')
 83 |     months = np.vectorize(lambda x: x.month)(days)
 84 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 85 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 86 | 
 87 | 
 88 | def on_data(context):
 89 |     context.Num = context.Num + 1  # 交易日数目+1
 90 |     if context.Num < context.LEN:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 91 |         return
 92 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 93 |         return
 94 | 
 95 |     # -------------------------------------------- #
 96 |     #  获取 K线数据和因子数据                      #
 97 |     # -------------------------------------------- #
 98 |     """
 99 |     K 线数据 DataFrame结构：
100 |     |  target_idx | time | open   | high   | low   |  close | volume | amount   | open_interest
101 |     | 标的索引号  | 日期 | 开盘价 | 最高价 |最低价 | 收盘价 | 成交量 | 成交金额 | 持仓量
102 |     如果获取了 LEN 天的各股票对应的K线数据，那么行排列是：
103 |     ０ 至 LEN - 1 行先排第一个股票在LEN天内K线数据，
104 |     然后 LEN 至 2 LEN - 1行排第二个股票在LEN天内的K线数据。
105 |     """
106 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.LEN, fill_up=True, df=True)
107 | 
108 |     """
109 |     因子数据 DataFrame结构：
110 |     | target_idx | date | factor   | value | 
111 |     | 标的序号   | 日期 | 因子名称 | 因子值|
112 |     行排列情况：先排一个股票在LEN天内的某一因子值，然后在排该股票下一个因子值，直到因子值排完，
113 |     然后再轮到下一个股票
114 |     """
115 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.LEN,
116 |                            df=True)  # 获取因子数据
117 | 
118 |     # ------------------------------------- #
119 |     #  特征构建                             #
120 |     # ------------------------------------- #
121 |     Fcode = list()
122 |     # 此处构建因子列名，取时间窗的第一天因子作为训练/预测数据样本
123 |     Fcode = context.FactorCode
124 | 
125 |     FactorData_list = []  # 存储多个时间窗口的训练样本和标签
126 |     """
127 |     用于训练的DataFrame，每一列的含义如下：
128 |     idx  | benefit |  factor1  | factor1 | .... | factorm
129 |     idx 表示沪深300股中股票的序号，范围从 0~299，我们可以通过该序号定位股票
130 |     benefit 表示该股票在某时间窗口后 N2 天内的平均收益率，即涨幅情况
131 |     factorm_n 表示在时间窗口内的第一天的第 m 个因子
132 |     我们使用所有的factor作为训练特征，benefit作为训练标签。
133 |     """
134 |     for i in range(context.LEN - context.N1 + 1):  # 时间窗口个数
135 |         FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
136 |         FactorData_list.append(FactorData)   # 将该时间窗的训练数据存入列表
137 | 
138 |     """
139 |      用于预测的DataFrame，结构如下：
140 |      idx  | factor1_1  | factor2_1 | .... | factorm_n
141 |      idx 表示沪深300股中股票的序号，范围从 0~299，我们可以通过该序号定位股票
142 |      factorm_n 表示第 m 个因子在第 n 天的值
143 |      我们使用所有的factor作为预测特征，预测出未来 N2天的各股票的收益率情况
144 |     """
145 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))  # 存储预测特征样本
146 | 
147 |     # K线数据序号对齐
148 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
149 | 
150 |     # ----------------------------------------- #
151 |     #  按标的处理数据，提取训练特征和标签       #
152 |     # ----------------------------------------- #
153 |     for window in range(context.LEN - context.N1 + 1):  # 滚动时间窗
154 |         for i in range(300):  # 按标的处理
155 |             # 训练特征集及训练标签构建：
156 |             FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan), columns=(['idx', 'benefit'] + Fcode))
157 | 
158 |             # 因子数据 序号对齐, 提取当前标的的因子数据
159 |             FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
160 | 
161 |             # 按特征处理数据：
162 |             for FC in context.FactorCode:
163 |                 # 提取当前标的中与当前因子FC相同的部分
164 |                 FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
165 |                 FactorData0[FC] = FCData[window]
166 | 
167 |             FactorData0['idx'] = i
168 | 
169 |             # 按标签处理数据：
170 |             # 提取当前标的的前一个月的K线面板数据
171 |             close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
172 | 
173 |             # 当前时间窗之后的N2天内的股票收益率情况
174 |             benefit = (close[window + context.N1 - 1] - close[window]) / close[window]
175 | 
176 |             FactorData0['benefit'] = benefit
177 |             FactorData_list[window] = FactorData_list[window].append(FactorData0, ignore_index=True)
178 |             print("window:{}, stock :{} ".format(window, i))
179 |         print("pass this window: {}".format(window))
180 |     # ----------------------------------- #
181 |     # 提取预测样本特征                    #
182 |     # ----------------------------------- #
183 |     for i in range(300):
184 |         # 存储预测特征样本
185 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
186 | 
187 |         # 因子数据 序号对齐, 提取当前标的的因子数据
188 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
189 | 
190 |         # 预测特征集构建：建立标的索引
191 |         FactorDataTest0['idx'] = tempIdx[i]
192 | 
193 |         # 按特征处理数据，过程同建立训练特征
194 |         for FC in context.FactorCode:
195 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
196 |             FactorDataTest0[FC] = FCData[context.LEN - 1]
197 | 
198 |         # 合并测试数据
199 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
200 | 
201 |     # 数据清洗：
202 |     for i in range(len(FactorData_list)):
203 |         FactorData_list[i] = FactorData_list[i].dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
204 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
205 |     Idx = FactorDataTest['idx']  # 剩余标的序号
206 | 
207 |     # 按特征进行预处理
208 |     for Factor in Fcode:
209 |         # 处理多个时间窗口的训练数据。
210 |         for window in range(len(FactorData_list)):
211 |             FactorData_list[window] = filter_MAD(FactorData_list[window], Factor, 5)  # 中位数去极值法
212 |             FactorData_list[window][Factor] = preprocessing.scale(FactorData_list[window][Factor])  # 标准化
213 | 
214 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
215 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
216 | 
217 |     """
218 |     xgb_params = {'learning_rate': 0.01, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 4, 'seed': 1000,
219 |                   'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1}
220 | 
221 |     xgb_model = XGBRegressor(**xgb_params)    
222 |     """
223 |     RF = RandomForestRegressor(max_depth=5, n_estimators=50)
224 | 
225 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
226 |     for window in range(len(FactorData_list)):
227 |         X = np.ones([FactorData_list[window].shape[0], len(Fcode)])
228 | 
229 |         # 循环填充特征到numpy数组中
230 |         for i in range(X.shape[1]):
231 |             X[:, i] = FactorData_list[window][Fcode[i]]
232 | 
233 |         # 训练样本的标签，为浮点数的收益率
234 |         Y = (np.array(FactorData_list[window]['benefit']).astype(float) > 0)
235 | 
236 |         # 模型训练：
237 |         print("FITTING!")
238 |         RF.fit(X, Y)
239 | 
240 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
241 |     for i in range(X.shape[1]):
242 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
243 | 
244 |     # 分类预测：
245 |     y = RF.predict(Xtest)
246 | 
247 |     # 交易设置：
248 |     positions = context.account().positions['volume_long']  # 多头持仓数量
249 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
250 | 
251 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
252 | 
253 |     # 获取收益率的高分位数和低分位数
254 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
255 | 
256 |     for i in range(len(Idx)):
257 |         position = positions.iloc[Idx[i]]
258 |         if position == 0 and y[i] > high_return and valid_cash > 0: # 当前无仓，且该股票收益大于高70%分位数，则开仓，买入
259 |             # 开仓数量 + 1防止分母为0
260 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
261 | 
262 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
263 |             if Num < 1000:
264 |                 Num *= 10
265 |             if Num > 100000:
266 |                 Num = int(Num / 10)
267 |                 Num -= Num % 100
268 |             if Num <= 0:  # 不开仓
269 |                 continue
270 | 
271 |             print("开仓数量为：{}".format(Num))
272 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1,
273 |                                     order_type=2, price=0)  # 指定委托量开仓
274 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
275 |             stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
276 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
277 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
278 |             print("平仓，数量为: {}".format(position / 10))
279 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
280 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
281 | 
282 | 
283 | if __name__ == '__main__':
284 | 
285 |     file_path = 'time_roll_model.py'
286 |     block = 'hs300'
287 | 
288 |     begin_date = '2016-01-01'
289 |     end_date = '2018-09-30'
290 | 
291 |     strategy_name = 'random_forest'
292 | 
293 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
294 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
295 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
296 | 


--------------------------------------------------------------------------------
/xgb_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ----------------------------------------------------------
  3 | 策略思路：
  4 | 1. 回测标的：沪深300成分股
  5 | 2. 回测时间段：2016-01-01 至 2018-09-30
  6 | 3. 特征选择：每个大类夏普率最高的因子+夏普率高于1.5的因子
  7 |     - 质量类：ROIC, CashToCurrentLiability
  8 |     - 特色技术指标：STDDEV
  9 |     - 收益风险：DDNCR
 10 |     - 情绪类：TVMA20
 11 |     - 每股指标类：EnterpriseFCFPS
 12 |     - 价值类：PS
 13 |     - 基础类：AdminExpenseTTM, FinanExpenseTTM, NetIntExpense, GrossProfit
 14 |     - 行业分析师：FY12P
 15 |     - 动量类：TotalAssetGrowRate
 16 |     - 成长类：TotalAssetGrowRate
 17 |     - 常用技术类：MA120
 18 | ... 其余逻辑参照single_factor_test.py
 19 | ----------------------------------------------------------
 20 | """
 21 | from atrader import *
 22 | import pandas as pd
 23 | import numpy as np
 24 | from sklearn import svm
 25 | import math
 26 | from sklearn import preprocessing
 27 | import datetime
 28 | from xgboost.sklearn import XGBRegressor
 29 | 
 30 | # 作为全局变量进行测试
 31 | FactorCode = ['ROIC', 'CashToCurrentLiability', 'STDDEV', 'DDNCR', 'PVI', 'EnterpriseFCFPS',
 32 |               'PS', 'AdminExpenseTTM', 'FinanExpenseTTM', 'NetIntExpense', 'NIAP', 'FY12P',
 33 |               'AD', 'TotalAssetGrowRate', 'MA120']
 34 | 
 35 | 
 36 | # 中位数去极值法
 37 | def filter_MAD(df, factor, n=3):
 38 |     """
 39 |     :param df: 去极值的因子序列
 40 |     :param factor: 待去极值的因子
 41 |     :param n: 中位数偏差值的上下界倍数
 42 |     :return: 经过处理的因子dataframe
 43 |     """
 44 |     median = df[factor].quantile(0.5)
 45 |     new_median = ((df[factor] - median).abs()).quantile(0.5)
 46 |     max_range = median + n * new_median
 47 |     min_range = median - n * new_median
 48 | 
 49 |     for i in range(df.shape[0]):
 50 |         if df.loc[i, factor] > max_range:
 51 |             df.loc[i, factor] = max_range
 52 |         elif df.loc[i, factor] < min_range:
 53 |             df.loc[i, factor] = min_range
 54 |     return df
 55 | 
 56 | 
 57 | def init(context):
 58 |     # 账号设置：设置初始资金为 10000000 元
 59 |     set_backtest(initial_cash=10000000, future_cost_fee=1.0, stock_cost_fee=30, margin_rate=1.0, slide_price=0.0,
 60 |                  price_loc=1, deal_type=0, limit_type=0)
 61 |     # 注册数据：日频数据
 62 |     reg_kdata('day', 1)
 63 |     global FactorCode  # 全局单因子代号
 64 |     reg_factor(factor=FactorCode)
 65 | 
 66 |     context.FactorCode = FactorCode  #
 67 | 
 68 |     # 超参数设置：
 69 |     context.Len = 21    # 时间长度: 当交易日个数小于该事件长度时，跳过该交易日，假设平均每个月 21 个交易日左右  250/12
 70 |     context.Num = 0   # 记录当前交易日个数
 71 | 
 72 |     # 较敏感的超参数，需要调节
 73 |     context.upper_pos = 80  # 股票预测收益率的上分位数，高于则买入
 74 |     context.down_pos = 20   # 股票预测收益率的下分位数，低于则卖出
 75 |     context.cash_rate = 0.6  # 计算可用资金比例的分子，利益大于0的股票越多，比例越小
 76 | 
 77 |     # 确保月初调仓
 78 |     days = get_trading_days('SSE', '2016-01-01', '2018-09-30')
 79 |     months = np.vectorize(lambda x: x.month)(days)
 80 |     month_begin = days[pd.Series(months) != pd.Series(months).shift(1)]
 81 |     context.month_begin = pd.Series(month_begin).dt.strftime('%Y-%m-%d').tolist()
 82 | 
 83 | 
 84 | def on_data(context):
 85 |     context.Num = context.Num + 1
 86 |     if context.Num < context.Len:  # 如果交易日个数小于Len+1，则进入下一个交易日进行回测
 87 |         return
 88 |     if datetime.datetime.strftime(context.now, '%Y-%m-%d') not in context.month_begin:  # 调仓频率为月,月初开始调仓
 89 |         return
 90 | 
 91 |     # 获取数据：
 92 |     KData = get_reg_kdata(reg_idx=context.reg_kdata[0], length=context.Len, fill_up=True, df=True)
 93 |     FData = get_reg_factor(reg_idx=context.reg_factor[0], target_indices=[x for x in range(300)], length=context.Len,
 94 |                            df=True)  # 获取因子数据
 95 | 
 96 |     # 特征构建：
 97 |     Fcode = context.FactorCode  # 标签不需要代号了
 98 | 
 99 |     # 数据存储变量：
100 |     # Close 字段为标签，Fcode 为标签
101 |     FactorData = pd.DataFrame(columns=(['idx', 'benefit'] + Fcode))  # 存储训练特征及标签样本
102 |     FactorDataTest = pd.DataFrame(columns=(['idx'] + Fcode))       # 存储预测特征样本
103 | 
104 |     # K线数据序号对齐
105 |     tempIdx = KData[KData['time'] == KData['time'][0]]['target_idx'].reset_index(drop=True)
106 | 
107 |     # 按标的处理数据：
108 |     for i in range(300):
109 |         # 训练特征集及训练标签构建：
110 |         # 临时数据存储变量:
111 |         FactorData0 = pd.DataFrame(np.full([1, len(Fcode) + 2], np.nan),
112 |             columns=(['idx', 'benefit'] + Fcode))
113 |         # 存储预测特征样本
114 |         FactorDataTest0 = pd.DataFrame(np.full([1, len(Fcode) + 1], np.nan), columns=(['idx'] + Fcode))
115 | 
116 |         # 因子数据 序号对齐, 提取当前标的的因子数据
117 |         FData0 = FData[FData['target_idx'] == tempIdx[i]].reset_index(drop=True)
118 | 
119 |         # 按特征处理数据：
120 |         for FC in context.FactorCode:
121 |             # 提取当前标的中与当前因子FC相同的部分
122 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
123 |             FactorData0[FC] = FCData[0]  # 存储上一个月初的股票因子数据
124 | 
125 |         # 按标签处理数据：
126 |         # 提取当前标的的前一个月的K线面板数据
127 |         close = np.array(KData[KData['target_idx'] == tempIdx[i]]['close'])
128 |         # 计算当前标的在上一个月的收益率
129 |         benefit = (close[context.Len - 1] - close[0]) / close[0]
130 | 
131 |         FactorData0['benefit'] = benefit
132 |         # idx: 建立当前标的在训练样本集中的索引
133 |         FactorData0['idx'] = tempIdx[i]
134 |         # 合并数据：组成训练样本
135 |         FactorData = FactorData.append(FactorData0, ignore_index=True)
136 | 
137 |         # 预测特征集构建：建立标的索引
138 |         FactorDataTest0['idx'] = tempIdx[i]
139 |         # 按特征处理数据，过程同建立训练特征
140 |         for FC in context.FactorCode:
141 |             FCData = FData0[FData0['factor'] == FC]['value'].reset_index(drop=True)
142 |             FactorDataTest0[FC] = FCData[context.Len - 1]
143 | 
144 |         # 合并测试数据
145 |         FactorDataTest = FactorDataTest.append(FactorDataTest0, ignore_index=True)
146 | 
147 |     """
148 |     训练集和测试集的表头字段如下
149 |     FactorData DataFrame:
150 |     idx  |  benefit |  Factor 1 | Factor 2| ....
151 |     benefit 作为标签，上月初Factor作为特征，此处是单因子测试，只有一个特征
152 |     FactorDataTest DataFrame: 
153 |     idx | Factor 1 | Factor 2 | ...
154 |     本月初的因子作为预测特征
155 |     """
156 | 
157 |     # 数据清洗：
158 |     FactorData = FactorData.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
159 |     FactorDataTest = FactorDataTest.dropna(axis=0, how='any').reset_index(drop=True)  # 清洗数据
160 |     Idx = FactorDataTest['idx']  # 剩余标的序号
161 | 
162 |     # 按特征进行预处理
163 |     for Factor in context.FactorCode:
164 |         FactorData = filter_MAD(FactorData, Factor, 5)  # 中位数去极值法
165 |         FactorData[Factor] = preprocessing.scale(FactorData[Factor])  # 标准化
166 | 
167 |         FactorDataTest = filter_MAD(FactorDataTest, Factor, 5)  # 中位数去极值法
168 |         FactorDataTest[Factor] = preprocessing.scale(FactorDataTest[Factor])  # 标准化
169 | 
170 |     # 训练和预测特征构建：# 行（样本数）* 列（特征数）
171 |     X = np.ones([FactorData.shape[0], len(Fcode)])
172 |     Xtest = np.ones([FactorDataTest.shape[0], len(Fcode)])
173 | 
174 |     # 循环填充特征到numpy数组中
175 |     for i in range(X.shape[1]):
176 |         X[:, i] = FactorData[Fcode[i]]
177 |         Xtest[:, i] = FactorDataTest[Fcode[i]]
178 | 
179 |     # 训练样本的标签，为浮点数的收益率
180 |     Y = (np.array(FactorData['benefit']).astype(float) > 0)
181 | 
182 |     xgb_params = {'learning_rate': 0.01, 'n_estimators': 50, 'max_depth': 10, 'min_child_weight': 5, 'seed': 1000,
183 |                   'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1}
184 | 
185 |     xgb_reg = XGBRegressor(**xgb_params)
186 | 
187 |     # 模型训练：
188 |     xgb_reg.fit(X, Y)
189 | 
190 |     # 分类预测：
191 |     y = xgb_reg.predict(Xtest)
192 | 
193 |     # 交易设置：
194 |     positions = context.account().positions['volume_long']  # 多头持仓数量
195 |     valid_cash = context.account(account_idx=0).cash['valid_cash'][0]  # 可用资金
196 | 
197 |     P = context.cash_rate / (sum(y > 0) + 1)  # 设置每只标的可用资金比例 + 1 防止分母为0
198 | 
199 |     # 获取收益率的高分位数和低分位数
200 |     low_return, high_return = np.percentile(y, [context.down_pos, context.upper_pos])
201 | 
202 |     for i in range(len(Idx)):
203 |         position = positions.iloc[Idx[i]]
204 |         # if position == 0 and y[i] == True and valid_cash > 0:  # 若预测结果为true(收益率>0)，买入
205 |             # print('开仓')
206 |         if position == 0 and y[i] > high_return and valid_cash > 0:
207 |             # 开仓数量 + 1防止分母为0
208 |             # print(valid_cash, P, KData['close'][Idx[i]])  # 这里的数目可考虑减少一点，，有时太多有时太少
209 |             Num = int(math.floor(valid_cash * P / 100 / (KData['close'][Idx[i]] + 1)) * 100)
210 | 
211 |             # 控制委托量，不要过大或过小,需要保证是100的倍数
212 |             if Num < 1000:
213 |                 Num *= 10
214 |             if Num > 100000:
215 |                 Num = int(Num / 10)
216 |                 Num -= Num % 100
217 |             if Num <= 0:  # 不开仓
218 |                 continue
219 | 
220 |             print("开仓数量为：{}".format(Num))
221 |             order_id = order_volume(account_idx=0, target_idx=int(Idx[i]), volume=Num, side=1, position_effect=1, order_type=2,
222 |                          price=0)  # 指定委托量开仓
223 |             # 对订单号为order_id的委托单设置止损，止损距离10个整数点，触发时，委托的方式用市价委托
224 |             # stop_loss_by_order(target_order_id=order_id, stop_type=1, stop_gap=10, order_type=2)
225 |         # elif position > 0 and y[i] == False: #预测结果为false(收益率<0)，卖出
226 |         elif position > 0 and y[i] < low_return:  # 当前持仓，且该股票收益小于低30%分位数，则平仓，卖出
227 |             print("平仓，数量为: {}".format(position / 10 + 100))
228 |             order_volume(account_idx=0, target_idx=int(Idx[i]), volume=int(position / 10),
229 |                          side=2, position_effect=2, order_type=2, price=0)  # 指定委托量平仓
230 | 
231 | 
232 | if __name__ == '__main__':
233 | 
234 |     file_path = 'xgb_model.py'
235 |     block = 'hs300'
236 | 
237 |     begin_date = '2016-01-01'
238 |     end_date = '2018-09-30'
239 | 
240 |     strategy_name = 'xgb'
241 | 
242 |     run_backtest(strategy_name=strategy_name, file_path=file_path,
243 |                  target_list=list(get_code_list('hs300', date=begin_date)['code']),
244 |                  frequency='day', fre_num=1, begin_date=begin_date, end_date=end_date, fq=1)
245 | 


--------------------------------------------------------------------------------
/价值类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/价值类.png


--------------------------------------------------------------------------------
/基础类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/基础类.png


--------------------------------------------------------------------------------
/情绪类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/情绪类.png


--------------------------------------------------------------------------------
/每股指标类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/每股指标类.png


--------------------------------------------------------------------------------
/特色技术指标类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/特色技术指标类.png


--------------------------------------------------------------------------------
/行业分析师类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/行业分析师类.png


--------------------------------------------------------------------------------
/质量类.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JoshuaQYH/TIDIBEI/bae9a29662ecec55a9a9f1a9aa75c1c4de053a8c/质量类.png


--------------------------------------------------------------------------------