├── 01OLS-AC.py ├── 02Lasso_AC.py ├── 03Ridge_AC.py ├── 04ENet-AC.py ├── 05PCA_AC.py ├── 06RPPCA_AC.py ├── 07RNN-AC.py ├── 08LSTM-AC.py ├── 09DFN-AC.py ├── 10result_pro.py ├── 11get_weight_result.py ├── README.md ├── data ├── CH_3_update_20211231.csv └── CH_4_fac_update_20211231.csv └── pytorchtools_change.py /01OLS-AC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Dec 2 09:33:11 2022 4 | 5 | @author: whufi 6 | """ 7 | 8 | import os 9 | import numpy as np 10 | import random 11 | import pandas as pd 12 | import warnings 13 | random.seed(135) 14 | warnings.filterwarnings("ignore") 15 | 16 | # os.chdir(r'E:\02实验\98ML-AC-code') ##设置文件路径 17 | 18 | #%% 主要代码 19 | 20 | 21 | ##获取等权投资组合 22 | def get_equal_weight(ret): 23 | ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0]) 24 | ret_m.columns=['date','permno','ret'] 25 | ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6]) 26 | ret_m=ret_m.dropna(axis=0).astype(float) 27 | count=ret_m.groupby(['date'])['ret'].count().reset_index() 28 | retd=pd.merge(ret_m,count,on=['date'],how='left') 29 | 30 | retd['weight']=1/retd['ret_y'] 31 | weight1=pd.pivot(retd,index='date',columns='permno',values='weight') 32 | weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer') 33 | weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer') 34 | weight2.columns=weight2.columns.astype(float) 35 | 36 | return weight2 37 | 38 | 39 | 40 | def get_tc(j,theta0,rets,chs,weights,cr=0.005): 41 | ''' 42 | j:从1开始,假设第一期到第二期无交易费用 43 | cr:表示费率,一般取0.005 44 | ''' 45 | 46 | r=rets.iloc[j:j+1,:].values 47 | c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values 48 | wbar=weights.iloc[j:j+1,:].fillna(0).values 49 | w0bar=weights.iloc[j-1:j,:].fillna(0).values 50 | c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values 51 | r0=rets.iloc[j-1:j,:].values 52 | r00=rets.iloc[j-1:j,:].fillna(0).values 53 | 54 | wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00)) 55 | w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 56 | 57 | ##固定比列的交易成本 58 | lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T) 59 | tc1=cr*np.dot(np.sign(w-wp),lc) 60 | 61 | return tc1.T 62 | 63 | 64 | 65 | def power_utility(r,gamma=5): ##越大越好 66 | return np.power(1+r,1-gamma)/(1-gamma) 67 | 68 | 69 | 70 | #根据资产的特征和收益数据,获得特征的权重系数theta 71 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type): ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束 72 | 73 | retsv=rets.fillna(0).values 74 | weightsv=weights.fillna(0).values 75 | rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 76 | rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for j in range(len(rets))]) 77 | sigmac=np.cov(rcs) 78 | 79 | cmean=np.mean(rcs,axis=1) 80 | cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T 81 | sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1) 82 | 83 | if rcs.shape[0]==1: 84 | theta=np.dot(1/sigmac,(cmean.reshape(len(cmean),1)/gamma-sigmabc.reshape(len(cmean),1) )) 85 | else: ##inv求逆函数要求矩阵是二维的 86 | theta=np.dot(np.linalg.inv(sigmac),(cmean.reshape(len(cmean),1)/gamma-sigmabc.reshape(len(cmean),1) )) 87 | 88 | return theta 89 | 90 | 91 | 92 | 93 | 94 | #根据特征的权重系数theta,进而获得投资组合权重w 95 | def test(theta,rett,weightt,chs,allow_short_selling): 96 | 97 | rets=rett 98 | weightsv=weightt.fillna(0).values 99 | 100 | #是否允许卖空 101 | if allow_short_selling==True: #允许卖空 102 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 103 | # r= rbs+ np.dot(theta.T,rcs) 104 | elif allow_short_selling==False: #不允许卖空 105 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 106 | w[w<0]=0 107 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 108 | w=w/wsum 109 | 110 | return w 111 | 112 | 113 | 114 | 115 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type): 116 | trw=7 117 | viw=3 118 | 119 | #样本集 120 | weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合 121 | 122 | rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date') 123 | rets1.columns=['date','permno','ret'] 124 | rets1=rets1.sort_values(by=['date']).astype(float) 125 | chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno']) ##训练验证测试所用的特征 126 | 127 | retm=ret.iloc[i:i+12*(trw+viw),:] 128 | lambdaopt=0 129 | 130 | theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 131 | print(theta) 132 | ##测试集 133 | rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:] 134 | weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:] 135 | wp=test(theta,rett,weightt,chs,allow_short_selling) 136 | wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns) 137 | return wp 138 | 139 | 140 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type): 141 | trww=7*12 142 | viww=3*12 143 | teww=1*12 144 | weight0= get_equal_weight(ret) 145 | weight=pd.DataFrame() 146 | for i in range(0,len(ret)-trww-viww-teww+1,12): 147 | print(i) 148 | w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 149 | weight=pd.concat([weight,w],axis=0,join='outer') 150 | weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv') 151 | 152 | 153 | 154 | #%%#所需数据,代码调用 155 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 156 | ret.index.name='date' 157 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征 158 | ch=ch.sort_values(by=['date','permno'],ascending= True) 159 | 160 | 161 | # %%单个特征检验的权重输出 162 | # for i in range(ch.shape[1]-2): 163 | # para=[ch.columns[2+i]]+[0,5,0.005,True,'MV',False] 164 | # chm=pd.concat([ch.iloc[:,:2],ch.iloc[:,i+2]],axis=1) 165 | # get_result(ret,chm,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 166 | 167 | 168 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重 169 | #%%#OLS-AC的权重输出 170 | para_list=[['OLS',0,5,0.005,True,'MV',False],['OLS',0,5,0.005,False,'MV',False],['OLS',0,10,0.005,True,'MV',False]] #, 171 | for para in para_list: 172 | print(para) 173 | get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 174 | 175 | 176 | #%%#OLS-5C的权重输出 177 | ch=ch[['date','permno','01_size', '19_mom12', '29_BM', '59_ROE', '43_AG']] 178 | para_list=[['ff5',0,5,0.005,True,'MV',False],['ff5',0,5,0.005,False,'MV',False],['ff5',0,10,0.005,True,'MV',False]] 179 | for para in para_list: 180 | get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 181 | 182 | 183 | #%%#OLS-3C的权重输出 184 | ch1=ch[['date','permno','01_size', '19_mom12', '29_BM']] 185 | para_list=[['ff3',0,5,0.005,True,'MV',False],['ff3',0,5,0.005,False,'MV',False],['ff3',0,10,0.005,True,'MV',False]] 186 | for para in para_list: 187 | get_result(ret,ch1,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 188 | 189 | 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /02Lasso_AC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Nov 13 22:01:15 2022 4 | 5 | @author: whufi 6 | """ 7 | import os 8 | import numpy as np 9 | import random 10 | import pandas as pd 11 | import warnings 12 | from sklearn.model_selection import KFold 13 | random.seed(135) 14 | warnings.filterwarnings("ignore") 15 | 16 | # os.chdir(r'E:\02实验\98ML-AC-code') ##设置文件路径 17 | #%%#等权投资组合 18 | def get_equal_weight(ret): 19 | ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0]) 20 | ret_m.columns=['date','permno','ret'] 21 | ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6]) 22 | ret_m=ret_m.dropna(axis=0).astype(float) 23 | 24 | count=ret_m.groupby(['date'])['ret'].count().reset_index() 25 | retd=pd.merge(ret_m,count,on=['date'],how='left') 26 | 27 | retd['weight']=1/retd['ret_y'] 28 | weight1=pd.pivot(retd,index='date',columns='permno',values='weight') 29 | weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer') 30 | weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer') 31 | 32 | weight2.columns=weight2.columns.astype(float) 33 | 34 | return weight2 35 | 36 | 37 | 38 | 39 | def get_tc(j,theta0,rets,chs,weights,cr=0.005): 40 | ''' 41 | j:从1开始,假设第一期到第二期无交易费用 42 | cr:表示费率,一般取0.005 43 | ''' 44 | 45 | r=rets.iloc[j:j+1,:].values 46 | c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values 47 | 48 | wbar=weights.iloc[j:j+1,:].fillna(0).values 49 | 50 | w0bar=weights.iloc[j-1:j,:].fillna(0).values 51 | c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values 52 | r0=rets.iloc[j-1:j,:].values 53 | r00=rets.iloc[j-1:j,:].fillna(0).values 54 | 55 | wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00)) 56 | w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 57 | 58 | ##固定比列的交易成本 59 | lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T) 60 | tc1=cr*np.dot(np.sign(w-wp),lc) 61 | 62 | return tc1.T 63 | 64 | 65 | def power_utility(r,gamma=5): ##越大越好 66 | return np.power(1+r,1-gamma)/(1-gamma) 67 | 68 | 69 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type): 70 | 71 | retsv=rets.fillna(0).values 72 | weightsv=weights.fillna(0).values 73 | 74 | #是否允许卖空 75 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 76 | retw=w*retsv 77 | r= np.sum(retw ,axis=1) #.reshape(1,len(wsum)) 78 | 79 | #是否更换效用函数 80 | if utility_function=='crra': 81 | utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2) 82 | elif utility_function=='MV': 83 | sigma=np.std(r,ddof=1) 84 | utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2) 85 | 86 | if cost_type==False: 87 | utility=utility 88 | elif cost_type==True: 89 | wp=w*(1+retsv) 90 | tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1)) ##计算交易成本 91 | utility=utility+cr*tc 92 | 93 | return utility 94 | 95 | 96 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type): ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束 97 | 98 | retsv=rets.fillna(0).values 99 | weightsv=weights.fillna(0).values 100 | rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 101 | rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for j in range(len(rets))]) 102 | sigmac=np.cov(rcs) 103 | 104 | cmean=np.mean(rcs,axis=1) 105 | cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T 106 | sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1) 107 | uc=cmean 108 | 109 | k=len(ch.columns)-2 ##特征的个数 110 | theta0=np.ones((k,1))*1.5 111 | eps=10**(-8) 112 | 113 | t=1 114 | beta1=0.9 115 | beta2=0.999 116 | alpha=0.2 ##学习率0.1 117 | 118 | 119 | utility0=100 120 | 121 | 122 | for u in range(100): 123 | print('batch',u) 124 | 125 | batch_size=2 ##如何设置 126 | batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)] 127 | random.shuffle(batch_starts) 128 | 129 | m0=0 130 | v0=0 131 | 132 | for p in batch_starts: 133 | print('第多少轮次梯度',t) 134 | 135 | if utility_function=='crra': 136 | gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0 ##梯度 137 | elif utility_function=='MV': 138 | gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0 ##梯度 139 | 140 | 141 | if cost_type==False: #不考虑交易成本 142 | gra=gra ##梯度 143 | elif cost_type==True: #考虑交易成本 144 | tc=0 145 | for j in range(p,p+batch_size): 146 | # print('成本',j) 147 | tc1=get_tc(j,theta0,rets,chs,weights,cr) 148 | tc=tc+tc1 149 | gra=gra+tc ##梯度 150 | 151 | m=beta1*m0 +(1-beta1)*gra 152 | v=beta2*v0+(1-beta2)*np.dot(gra.T,gra) 153 | 154 | beta1t=beta1**t 155 | beta2t=beta2**t 156 | 157 | mh=m/(1-beta1t) 158 | vh=v/(1-beta2t) 159 | 160 | theta= theta0 -alpha*mh/(np.sqrt(vh)+eps) 161 | 162 | utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 163 | 164 | 165 | print('utility',utility) 166 | if utility>utility0: 167 | break 168 | 169 | if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5) : 170 | print(np.linalg.norm(utility-utility0)) 171 | break 172 | 173 | theta0=theta 174 | utility0=utility 175 | t=t+1 176 | 177 | return theta 178 | 179 | 180 | 181 | 182 | def test(theta,rett,weightt,chs,allow_short_selling): 183 | 184 | rets=rett 185 | weightsv=weightt.fillna(0).values 186 | 187 | #是否允许卖空 188 | if allow_short_selling==True: #允许卖空 189 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 190 | 191 | elif allow_short_selling==False: #不允许卖空 192 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 193 | w[w<0]=0 194 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 195 | w=w/wsum 196 | 197 | return w 198 | 199 | 200 | 201 | 202 | 203 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type): 204 | trw=7 205 | viw=3 206 | 207 | #样本集 208 | weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合 209 | 210 | rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date') 211 | rets1.columns=['date','permno','ret'] 212 | rets1=rets1.sort_values(by=['date']).astype(float) 213 | chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno']) ##训练验证测试所用的特征 214 | 215 | retm=ret.iloc[i:i+12*(trw+viw),:] 216 | # lambdaopt=0.01 217 | 218 | if i==0:##首先仅考虑对第一期进行超参数调整 219 | kf=KFold(n_splits=5) 220 | umax=10000000 #最小效用 221 | global lambdaopt 222 | lambdaopt=0 223 | lambda_list=[0.001,0.0001] # 正则化项的项的参数是待调参数,范围可以从 [0.001,0.0001] 224 | for lambda1 in lambda_list: 225 | print(lambda1) 226 | 227 | ##五折交叉验证进行优化lambda 228 | util_list=[] 229 | for rets_index,retv_index in kf.split(retm): 230 | print(rets_index.shape,retv_index.shape) 231 | theta=train(retm.iloc[rets_index,:],chs,weights.iloc[rets_index,:],gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 232 | 233 | util=loss(retm.iloc[retv_index,:],chs,weights.iloc[retv_index,:],theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 234 | util_list.append(util) 235 | 236 | util_mean=np.mean(util_list) 237 | 238 | 239 | if util_meanutility0: 175 | break 176 | 177 | if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5) : 178 | print(np.linalg.norm(utility-utility0)) 179 | break 180 | 181 | theta0=theta 182 | utility0=utility 183 | t=t+1 184 | 185 | return theta 186 | 187 | 188 | 189 | 190 | def test(theta,rett,weightt,chs,allow_short_selling): 191 | 192 | rets=rett 193 | weightsv=weightt.fillna(0).values 194 | 195 | #是否允许卖空 196 | if allow_short_selling==True: #允许卖空 197 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 198 | elif allow_short_selling==False: #不允许卖空 199 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 200 | w[w<0]=0 201 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 202 | w=w/wsum 203 | 204 | return w 205 | 206 | 207 | 208 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type): 209 | trw=7 210 | viw=3 211 | 212 | #样本集 213 | weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合 214 | 215 | rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date') 216 | rets1.columns=['date','permno','ret'] 217 | rets1=rets1.sort_values(by=['date']).astype(float) 218 | chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno']) ##训练验证测试所用的特征 219 | 220 | retm=ret.iloc[i:i+12*(trw+viw),:] 221 | 222 | if i==0:##首先仅考虑对第一期进行超参数调整 223 | kf=KFold(n_splits=5) 224 | umax=10000000 #最小效用 225 | global lambdaopt 226 | lambdaopt=0 227 | lambda_list=[0.001,0.0001]# 正则化项的项的参数是待调参数,范围可以从 [0.001,0.0001] 228 | for lambda1 in lambda_list: 229 | print(lambda1) 230 | 231 | ##五折交叉验证进行优化lambda 232 | util_list=[] 233 | for rets_index,retv_index in kf.split(retm): 234 | print(rets_index.shape,retv_index.shape) 235 | theta=train(retm.iloc[rets_index,:],chs,weights.iloc[rets_index,:],gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 236 | 237 | util=loss(retm.iloc[retv_index,:],chs,weights.iloc[retv_index,:],theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 238 | util_list.append(util) 239 | 240 | util_mean=np.mean(util_list) 241 | if util_meanutility0: 174 | break 175 | 176 | if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5) : 177 | print(np.linalg.norm(utility-utility0)) 178 | break 179 | 180 | theta0=theta 181 | utility0=utility 182 | t=t+1 183 | 184 | return theta 185 | 186 | 187 | 188 | 189 | def test(theta,rett,weightt,chs,allow_short_selling): 190 | 191 | rets=rett 192 | weightsv=weightt.fillna(0).values 193 | 194 | #是否允许卖空 195 | if allow_short_selling==True: #允许卖空 196 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 197 | # r= rbs+ np.dot(theta.T,rcs) 198 | elif allow_short_selling==False: #不允许卖空 199 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 200 | w[w<0]=0 201 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 202 | w=w/wsum 203 | 204 | return w 205 | 206 | 207 | 208 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type): 209 | trw=7 210 | viw=3 211 | 212 | #样本集 213 | weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合 214 | 215 | rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date') 216 | rets1.columns=['date','permno','ret'] 217 | rets1=rets1.sort_values(by=['date']).astype(float) 218 | chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno']) ##训练验证测试所用的特征 219 | 220 | retm=ret.iloc[i:i+12*(trw+viw),:] 221 | 222 | 223 | if i==0:##首先仅考虑对第一期进行超参数调整 224 | kf=KFold(n_splits=5) 225 | umax=10000000 #最小效用 226 | global lambdaopt 227 | lambdaopt=0 228 | lambda_list=[0.001,0.0001] # 正则化项的项的参数是待调参数,范围可以从 [0.001,0.0001] 229 | for lambda1 in lambda_list: 230 | print(lambda1) 231 | 232 | ##五折交叉验证进行优化lambda 233 | util_list=[] 234 | for rets_index,retv_index in kf.split(retm): 235 | print(rets_index.shape,retv_index.shape) 236 | theta=train(retm.iloc[rets_index,:],chs,weights.iloc[rets_index,:],gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 237 | 238 | util=loss(retm.iloc[retv_index,:],chs,weights.iloc[retv_index,:],theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 239 | util_list.append(util) 240 | 241 | util_mean=np.mean(util_list) 242 | 243 | if util_meanutility0: 228 | break 229 | 230 | if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5) : 231 | print(np.linalg.norm(utility-utility0)) 232 | break 233 | 234 | theta0=theta 235 | utility0=utility 236 | t=t+1 237 | 238 | return theta 239 | 240 | 241 | 242 | 243 | def test(theta,rett,weightt,chs,allow_short_selling): 244 | 245 | rets=rett 246 | weightsv=weightt.fillna(0).values 247 | 248 | #是否允许卖空 249 | if allow_short_selling==True: #允许卖空 250 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 251 | 252 | elif allow_short_selling==False: #不允许卖空 253 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 254 | w[w<0]=0 255 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 256 | w=w/wsum 257 | 258 | return w 259 | 260 | 261 | 262 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type): 263 | trw=7 264 | viw=3 265 | 266 | #样本集 267 | weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合 268 | 269 | rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date') 270 | rets1.columns=['date','permno','ret'] 271 | rets1=rets1.sort_values(by=['date']).astype(float) 272 | chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno']) ##训练验证测试所用的特征 273 | 274 | retm=ret.iloc[i:i+12*(trw+viw),:] 275 | lambdaopt=0 276 | 277 | theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 278 | print(theta) 279 | ##测试集 280 | rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:] 281 | weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:] 282 | wp=test(theta,rett,weightt,chs,allow_short_selling) 283 | wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns) 284 | return wp 285 | 286 | 287 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type): 288 | trww=7*12 289 | viww=3*12 290 | teww=1*12 291 | weight0= get_equal_weight(ret) 292 | weight=pd.DataFrame() 293 | for i in range(0,len(ret)-trww-viww-teww+1,12): 294 | print(i) 295 | w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 296 | weight=pd.concat([weight,w],axis=0,join='outer') 297 | weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv') 298 | 299 | 300 | #%%#所需数据,代码调用 301 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 302 | ret.index.name='date' 303 | ch=pd.read_csv('data/char_pca.csv',index_col=0).astype(float) #读取特征 304 | ch=ch.sort_values(by=['date','permno'],ascending= True) 305 | 306 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重 307 | para_list=[['PCA',0,5,0.005,True,'MV',False],['PCA',0,5,0.005,False,'MV',False],['PCA',0,10,0.005,True,'MV',False] ] 308 | for para in para_list: 309 | get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 310 | 311 | 312 | 313 | -------------------------------------------------------------------------------- /06RPPCA_AC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Nov 13 15:56:43 2022 4 | 5 | @author: whufi 6 | """ 7 | 8 | import os 9 | import numpy as np 10 | import random 11 | import pandas as pd 12 | import warnings 13 | random.seed(135) 14 | warnings.filterwarnings("ignore") 15 | # os.chdir(r'E:\02实验\98ML-AC-code') ##设置文件路径 16 | 17 | #%% 读取数据 18 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 19 | ret.index.name='date' 20 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征 21 | ch=ch.sort_values(by=['date','permno'],ascending= True) 22 | 23 | #%%#RPPCA降维 24 | gamma=20 25 | num=39 ##pca的方差大于0.8,对应的方差个数39 26 | def RP_PCA(chara,gamma,num): 27 | ''' 28 | 计算方式参考:Factors That Fit the Time Series and Cross-Section of Stock Returns 29 | gamma:用来控制RP_PCA中对于一阶的均值考虑的比重 30 | num:用来控制主成分的个数 31 | chara:T*N,N为特征的个数,T为样本数 32 | 返回降维后的特征 33 | ''' 34 | T,N=chara.shape 35 | chmean=chara.mean() 36 | sigma=np.dot(chara.T,chara)/T+gamma* np.dot(np.array(chmean).reshape(N,1),np.array(chmean).reshape(1,N)) 37 | 38 | def eigen(A): 39 | ''' 40 | 用来计算矩阵的特征分解、并根绝特征值大小对特征向量进行排序 41 | ''' 42 | eigenValues, eigenVectors = np.linalg.eig(A) 43 | idx = eigenValues.argsort()[::-1] 44 | eigenValues = eigenValues[idx] 45 | eigenVectors = eigenVectors[:,idx] 46 | return (eigenValues, eigenVectors) 47 | 48 | vals,vecs =eigen(sigma) 49 | gam=vecs[:,:num] 50 | factor=np.dot(chara,gam).dot(np.linalg.inv(np.dot(gam.T,gam))) 51 | return factor 52 | 53 | 54 | chara=ch.iloc[:,2:] 55 | pc=RP_PCA(chara,gamma,num) 56 | 57 | #将PAC降维后的因子进行横截面归一化 58 | pcdf=pd.DataFrame(pc,index=ch['permno']) 59 | pcdf=pcdf.reset_index() 60 | pcdf.index=ch['date'] 61 | pcdf=pcdf.reset_index() 62 | 63 | rr=ret.reset_index() 64 | rm=pd.melt(rr,id_vars=rr.columns[0]) 65 | rm.columns=['date','permno','ret'] 66 | rm=rm.dropna() 67 | 68 | ic= rm[['date','permno']].astype(float) 69 | col=pd.DataFrame(columns=ret.columns.astype(float)) 70 | ind=pd.DataFrame(index=ret.index.astype(float)) 71 | 72 | data=rm[['date','permno']].astype(float) 73 | 74 | for i in range(2,pcdf.shape[1]): # 75 | print(i) 76 | fm=pcdf[['date','permno',pcdf.columns[i]]] 77 | fr=pd.merge(ic,fm,how='left',on=['date','permno']) 78 | fp=pd.pivot(fr,index='date',columns='permno') 79 | fp=fp.droplevel(None,axis=1) 80 | fp.columns=fp.columns.astype(float) 81 | 82 | f_dp=pd.concat([col,fp],axis=0,join='inner') 83 | f_dp=pd.concat([col,f_dp],axis=0,join='outer') 84 | 85 | 86 | f_s=f_dp.T.apply(lambda x: (x-x.mean())/x.std() if (x.min() !=x.max()) else x.min()-x.max() ) #将数据标准化到均值为0,方差为1 #截面上如果只有一个数据,则让他等于0 87 | f_f=f_s.T.reset_index() ##缺失值不填充,填充会降低权重 88 | fm=pd.melt(f_f,id_vars='index').astype(float) 89 | fm.columns=['date','permno',pcdf.columns[i]] 90 | 91 | data=pd.merge(data,fm,how='left',on=['date','permno']) 92 | 93 | data=data.fillna(0) ##有收益的股票,若其特征值缺失,则填充横截面均值0 94 | data.to_csv('data/char_rppca.csv') 95 | 96 | 97 | 98 | 99 | ##等权投资组合 100 | def get_equal_weight(ret): 101 | ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0]) 102 | ret_m.columns=['date','permno','ret'] 103 | ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6]) 104 | ret_m=ret_m.dropna(axis=0).astype(float) 105 | 106 | count=ret_m.groupby(['date'])['ret'].count().reset_index() 107 | retd=pd.merge(ret_m,count,on=['date'],how='left') 108 | 109 | retd['weight']=1/retd['ret_y'] 110 | weight1=pd.pivot(retd,index='date',columns='permno',values='weight') 111 | weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer') 112 | weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer') 113 | weight2.columns=weight2.columns.astype(float) 114 | 115 | return weight2 116 | 117 | 118 | def get_tc(j,theta0,rets,chs,weights,cr=0.005): 119 | ''' 120 | j:从1开始,假设第一期到第二期无交易费用 121 | cr:表示费率,一般取0.005 122 | ''' 123 | 124 | r=rets.iloc[j:j+1,:].values 125 | c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values 126 | 127 | wbar=weights.iloc[j:j+1,:].fillna(0).values 128 | 129 | w0bar=weights.iloc[j-1:j,:].fillna(0).values 130 | c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values 131 | r0=rets.iloc[j-1:j,:].values 132 | r00=rets.iloc[j-1:j,:].fillna(0).values 133 | 134 | wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00)) 135 | w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 136 | 137 | ##固定比列的交易成本 138 | lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T) 139 | tc1=cr*np.dot(np.sign(w-wp),lc) 140 | 141 | return tc1.T 142 | 143 | 144 | 145 | def power_utility(r,gamma=5): ##越大越好 146 | return np.power(1+r,1-gamma)/(1-gamma) 147 | 148 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type): 149 | 150 | retsv=rets.fillna(0).values 151 | weightsv=weights.fillna(0).values 152 | 153 | #是否允许卖空 154 | if allow_short_selling==True: #允许卖空 155 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 156 | 157 | elif allow_short_selling==False: #不允许卖空 158 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 159 | w[w<0]=0 160 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 161 | w=w/wsum 162 | 163 | retw=w*retsv 164 | r= np.sum(retw ,axis=1) #.reshape(1,len(wsum)) 165 | 166 | 167 | 168 | #是否更换效用函数 169 | if utility_function=='crra': 170 | utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2) 171 | elif utility_function=='MV': 172 | sigma=np.std(r,ddof=1) 173 | utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2) 174 | 175 | if cost_type==False: 176 | utility=utility 177 | elif cost_type==True: 178 | wp=w*(1+retsv) 179 | tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1)) ##计算交易成本 180 | utility=utility+cr*tc 181 | 182 | return utility 183 | 184 | 185 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type): ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束 186 | 187 | retsv=rets.fillna(0).values 188 | weightsv=weights.fillna(0).values 189 | rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 190 | rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for j in range(len(rets))]) 191 | sigmac=np.cov(rcs) 192 | 193 | cmean=np.mean(rcs,axis=1) 194 | cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T 195 | sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1) 196 | uc=cmean 197 | 198 | k=len(ch.columns)-2 ##特征的个数 199 | theta0=np.ones((k,1))*1.5 200 | eps=10**(-8) 201 | 202 | t=1 203 | beta1=0.9 204 | beta2=0.999 205 | alpha=0.2 ##学习率0.1 206 | 207 | 208 | utility0=100 209 | 210 | 211 | for u in range(100): 212 | print('batch',u) 213 | 214 | batch_size=2 ##如何设置 215 | 216 | batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)] 217 | random.shuffle(batch_starts) 218 | 219 | m0=0 220 | v0=0 221 | 222 | for p in batch_starts: 223 | print('第多少轮次梯度',t) 224 | 225 | 226 | if utility_function=='crra': 227 | gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0 ##梯度 228 | elif utility_function=='MV': 229 | gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0 ##梯度 230 | 231 | 232 | 233 | if cost_type==False: #不考虑交易成本 234 | gra=gra ##梯度 235 | elif cost_type==True: #考虑交易成本 236 | tc=0 237 | for j in range(p,p+batch_size): 238 | # print('成本',j) 239 | tc1=get_tc(j,theta0,rets,chs,weights,cr) 240 | tc=tc+tc1 241 | gra=gra+tc ##梯度 242 | 243 | m=beta1*m0 +(1-beta1)*gra 244 | v=beta2*v0+(1-beta2)*np.dot(gra.T,gra) 245 | 246 | beta1t=beta1**t 247 | beta2t=beta2**t 248 | 249 | mh=m/(1-beta1t) 250 | vh=v/(1-beta2t) 251 | 252 | theta= theta0 -alpha*mh/(np.sqrt(vh)+eps) 253 | 254 | utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 255 | print('utility',utility) 256 | 257 | if utility>utility0: 258 | break 259 | 260 | if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5) : 261 | print(np.linalg.norm(utility-utility0)) 262 | break 263 | 264 | theta0=theta 265 | utility0=utility 266 | t=t+1 267 | 268 | return theta 269 | 270 | 271 | 272 | 273 | def test(theta,rett,weightt,chs,allow_short_selling): 274 | 275 | rets=rett 276 | weightsv=weightt.fillna(0).values 277 | 278 | #是否允许卖空 279 | if allow_short_selling==True: #允许卖空 280 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 281 | 282 | elif allow_short_selling==False: #不允许卖空 283 | w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values for j in range(len(rets)) ]) 284 | w[w<0]=0 285 | wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1) 286 | w=w/wsum 287 | 288 | return w 289 | 290 | 291 | 292 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type): 293 | trw=7 294 | viw=3 295 | 296 | #样本集 297 | weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合 298 | 299 | rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date') 300 | rets1.columns=['date','permno','ret'] 301 | rets1=rets1.sort_values(by=['date']).astype(float) 302 | chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno']) ##训练验证测试所用的特征 303 | 304 | retm=ret.iloc[i:i+12*(trw+viw),:] 305 | lambdaopt=0 306 | 307 | theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 308 | print(theta) 309 | ##测试集 310 | rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:] 311 | weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:] 312 | wp=test(theta,rett,weightt,chs,allow_short_selling) 313 | wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns) 314 | return wp 315 | 316 | 317 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type): 318 | trww=7*12 319 | viww=3*12 320 | teww=1*12 321 | weight0= get_equal_weight(ret) 322 | weight=pd.DataFrame() 323 | for i in range(0,len(ret)-trww-viww-teww+1,12): 324 | print(i) 325 | w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 326 | weight=pd.concat([weight,w],axis=0,join='outer') 327 | weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv') 328 | 329 | 330 | #%%#所需数据,代码调用 331 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 332 | ret.index.name='date' 333 | ch=pd.read_csv('data/char_rppca.csv',index_col=0).astype(float) #读取特征 334 | ch=ch.sort_values(by=['date','permno'],ascending= True) 335 | 336 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重 337 | para_list=[['RPPCA',0,5,0.005,True,'MV',False],['RPPCA',0,5,0.005,False,'MV',False],['RPPCA',0,10,0.005,True,'MV',False] ] # 338 | for para in para_list: 339 | get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 340 | 341 | 342 | 343 | 344 | 345 | # para=['PCA',0,5,0.005,True,'crra',True] 346 | # get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6]) 347 | 348 | # methodname=para[0] 349 | # rho=para[1] 350 | # gamma=para[2] 351 | # cr=para[3] 352 | # allow_short_selling=para[4] 353 | # utility_function=para[5] 354 | # cost_type=para[6] 355 | 356 | 357 | 358 | # para=['PCA',0,0.005,False,'crra',False] 359 | # get_result(rho=para[0],cr=para[1], allow_short_selling=para[2],utility_function=para[3],cost_type=para[4]) 360 | 361 | # para=['PCA',0,0.005,True,'MV',False] 362 | # get_result(rho=para[0],cr=para[1], allow_short_selling=para[2],utility_function=para[3],cost_type=para[4]) 363 | 364 | # para=['PCA',0,0.005,True,'crra',True] 365 | # get_result(rho=para[0],cr=para[1], allow_short_selling=para[2],utility_function=para[3],cost_type=para[4]) 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | -------------------------------------------------------------------------------- /07RNN-AC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 7 15:26:45 2022 4 | 5 | @author: whufi 6 | """ 7 | ##RNN预测神经网络 8 | 9 | # from TCNmodel import TCN 10 | import torch 11 | from torch import nn 12 | from torch.nn import init 13 | import numpy as np 14 | import torch.utils.data as Data 15 | import random 16 | import pandas as pd 17 | import os 18 | import warnings 19 | from pytorchtools_change import EarlyStoppings ##点开pytorchtools,复制里面的代码,即可新建pytorchtools 20 | warnings.filterwarnings("ignore") 21 | seed = 100 22 | random.seed(seed) # random 23 | np.random.seed(seed) # numpy 24 | torch.manual_seed(seed) # torch+CPU 25 | torch.cuda.manual_seed(seed) # torch+GPU 26 | # os.chdir(r'E:\02实验\98ML-AC-code') ##设置文件路径 27 | 28 | 29 | #%%读取数据 30 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 31 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征 32 | 33 | 34 | def get_data(ret,ch): 35 | ''' 36 | 对收益、特征数据进行调整 37 | ''' 38 | ret.index.name='date' 39 | 40 | ##为了保证每一期的股票数量的位置相同,生成全部的date+permno 41 | new=pd.melt(ret.fillna(0).reset_index(),id_vars='date').astype(float) 42 | new.columns=['date','permno','ret'] 43 | new=new[['date','permno']] 44 | new=new.sort_values(by=['date','permno']) 45 | 46 | ret0=pd.melt(ret.reset_index(),id_vars='date').astype(float) 47 | ret0.columns=['date','permno','ret'] 48 | 49 | 50 | ch=ch.fillna(0) 51 | 52 | retch=pd.merge(new,ch,how='left',on=['date','permno']).fillna(0) 53 | retch=pd.merge(retch,ret0,how='left',on=['date','permno']) 54 | 55 | data_list=[] 56 | for i in ret.index: 57 | data_list.append(retch[retch.date==i]) 58 | 59 | return data_list 60 | 61 | data_list=get_data(ret,ch) 62 | 63 | 64 | 65 | 66 | 67 | # #获得数据迭代器 68 | def load_batch(data_arrays, batch_size, N, is_train=True): 69 | ''' 70 | 自定义生成训练集中的batch数据集,为了保持横截面的样本顺序 71 | ''' 72 | batch_starts=[start for start in range(0, int(data_arrays[0].shape[0]+1-batch_size),batch_size)] 73 | 74 | if is_train: 75 | random.shuffle(batch_starts) 76 | 77 | cr_list=[] 78 | 79 | for j in batch_starts : 80 | cr_list.append([data_arrays[0][j:(j+batch_size)],data_arrays[1][j:(j+batch_size)]]) 81 | 82 | if np.max(batch_starts)+batch_size < data_arrays[0].shape[0]: 83 | cr_list.append([data_arrays[0][(np.max(batch_starts)+batch_size):],data_arrays[1][(np.max(batch_starts)+batch_size):]]) 84 | 85 | return cr_list 86 | 87 | 88 | 89 | class RNN(nn.Module): 90 | def __init__(self, input_size, hidden_size,output_size,num_layers,dropout): 91 | super(RNN, self).__init__() 92 | 93 | # 这里构建LSTM 还可以构建RNN、GRU等方法类似 94 | self.rnn1 = nn.RNN( 95 | input_size=input_size, 96 | hidden_size=32, 97 | num_layers=1, 98 | # dropout=dropout, 99 | batch_first=True # 如果为True,输入输出数据格式是(batch, seq_len, feature) 100 | # 为False,输入输出数据格式是(seq_len, batch, feature), 101 | ) 102 | 103 | self.rnn2 = nn.RNN( 104 | input_size=32, 105 | hidden_size=16, 106 | num_layers=1, 107 | # dropout=dropout, 108 | batch_first=True # 如果为True,输入输出数据格式是(batch, seq_len, feature) 109 | # 为False,输入输出数据格式是(seq_len, batch, feature), 110 | ) 111 | self.fc = nn.Linear(16,output_size,bias=False) #拼接隐藏层 112 | 113 | def forward(self, x): 114 | r_out, _ = self.rnn1(x) 115 | r_out, _ = self.rnn2(r_out) 116 | out = self.fc(r_out) # 全连接层进行分类 117 | return out 118 | 119 | 120 | def loss(net,w_hat,r,gamma,N,allow_short_selling=True,utility_function='crra',cost_type=False): 121 | ''' 122 | what:上一层的输出,为权重的一部分,等价于y_pred 123 | r:对应的收益数据 124 | ''' 125 | 126 | weigh0=1/(~torch.isnan(r)).sum(axis=1) ##计算等权投资组合的权重 127 | w_hat_mul=torch.where(torch.isnan(r),torch.full_like(r, np.nan), w_hat) 128 | w_hatm=torch.where(torch.isnan(r),torch.full_like(r, 0), w_hat) 129 | 130 | ws=torch.sum(w_hatm,axis=1) 131 | num=torch.sum(~torch.isnan(r),axis=1) 132 | wn=ws/num 133 | 134 | 135 | w0=torch.empty(w_hat.shape) 136 | wh=torch.empty(w_hat.shape) 137 | 138 | for j in range(w_hat.shape[0]): 139 | w0[j]=torch.where(torch.isnan(r[j]),torch.full_like(r[j], 0), weigh0[j]) 140 | wh[j]=(w_hat_mul[j]-wn[j])/num[j] 141 | 142 | 143 | wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh) 144 | w=w0+wh 145 | if allow_short_selling==True: ##没有权重约束 146 | w=w 147 | elif allow_short_selling==False: ##卖空约束 148 | w=w.clamp(0,1) 149 | w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape) ##重新要求权重之和等于1 150 | 151 | r0=torch.where(torch.isnan(r),torch.full_like(r, 0), r) 152 | retw=r0.mul(w) 153 | 154 | mret=retw.sum(axis=1) 155 | if utility_function=='crra': #指数效用函数 156 | utility=-torch.pow(1+mret,1-gamma)/(1-gamma) 157 | utilitymean=torch.mean(utility) 158 | elif utility_function=='MV': #均值方差效用函数 159 | sigma=torch.var(mret) 160 | utilitymean=sigma.mul(gamma/2)-torch.mean(mret) 161 | 162 | tc=torch.mean(torch.sum(torch.abs(w[1:,:,:]-retw[:-1,:,:]),axis=1),axis=0) ##计算交易成本 163 | if cost_type==False: 164 | loss=utilitymean 165 | elif cost_type==True: 166 | loss=utilitymean+0.005*tc 167 | 168 | 169 | return loss 170 | 171 | 172 | #记录列表(list),存储训练集和测试集上经过每一轮次,loss的变化 173 | def train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size,dropout, 174 | num_epochs,batch_size,params=None,lr=None,optimizer=None,allow_short_selling=True,utility_function='crra',cost_type=False): 175 | train_loss=[] 176 | valid_loss=[] 177 | 178 | p_list='RNNPP'+str(allow_short_selling)+utility_function+str(cost_type) 179 | early_stopping = EarlyStoppings(para_list=p_list,patience=7, verbose=True) 180 | 181 | for epoch in range(num_epochs):#外循环控制循环轮次 182 | for c,r in train_iter:#内循环控制训练批次 183 | 184 | w_hat = net(c.to(torch.float32)) 185 | l = loss(net,w_hat,r.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 186 | 187 | #梯度清零 188 | if optimizer is not None: 189 | optimizer.zero_grad() 190 | elif params is not None and params[0].grad is not None: 191 | for param in params: 192 | param.grad.data.zero_() 193 | 194 | 195 | #梯度回传 196 | l.backward() 197 | 198 | if optimizer is None: 199 | torch.optim.SGD(net.parameters(),lr,batch_size) 200 | # SGD(params,lr,batch_size) 201 | else: 202 | optimizer.step() 203 | 204 | train_loss.append((loss(net,net(tc.to(torch.float32)),tr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())#loss本身就默认了取平均值! 205 | valid_loss.append((loss(net,net(vc.to(torch.float32)),vr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item()) 206 | 207 | print("epoch %d,train_loss %.6f,valid_loss %.6f"%(epoch+1,train_loss[epoch],valid_loss[epoch])) 208 | 209 | valid_lossave = np.average(valid_loss) 210 | 211 | early_stopping(valid_loss[epoch], net,para_list=p_list) 212 | 213 | if early_stopping.early_stop: 214 | print("Early stopping") 215 | break 216 | 217 | # load the last checkpoint with the best model 218 | net.load_state_dict(torch.load(p_list+'checkpoint.pt')) 219 | 220 | return net, train_loss, valid_loss 221 | 222 | 223 | 224 | 225 | def test_model(netopt,ec,er,N,allow_short_selling=True): 226 | w_hat=netopt(ec.to(torch.float32)) 227 | weigh0=1/(~torch.isnan(er)).sum(axis=1) ##计算等权投资组合的权重 228 | 229 | w_hat_mul=torch.where(torch.isnan(er),torch.full_like(er, np.nan), w_hat) 230 | w_hatm=torch.where(torch.isnan(er),torch.full_like(er, 0), w_hat) 231 | 232 | ws=torch.sum(w_hatm,axis=1) 233 | num=torch.sum(~torch.isnan(er),axis=1) 234 | wn=ws/num 235 | 236 | w0=torch.empty(w_hat.shape) 237 | wh=torch.empty(w_hat.shape) 238 | 239 | for j in range(w_hat.shape[0]): 240 | w0[j]=torch.where(torch.isnan(er[j]),torch.full_like(er[j], 0), weigh0[j]) 241 | wh[j]= (w_hat_mul[j]-wn[j])/num[j] #times 242 | 243 | 244 | wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh) 245 | w=w0+wh 246 | 247 | 248 | if allow_short_selling==True: ##没有权重约束 249 | w=w 250 | elif allow_short_selling==False: ##卖空约束 251 | w=w.clamp(0,1) 252 | w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape) ##重新要求权重之和等于1 253 | return w 254 | 255 | 256 | 257 | def get_weights(i,data_list,ret,gamma,dropout,allow_short_selling,utility_function,cost_type): 258 | 259 | ''' 260 | i:以12为倍数 261 | ''' 262 | 263 | lr=0.01 264 | batch_size = 10# 设置小批量大小 265 | num_epochs = 100 #100 266 | 267 | trw=7*12 268 | viw=3*12 269 | tew=1*12 270 | N=data_list[0].shape[0] ##股票数量 271 | 272 | weight_index=ret.index[i+trw+viw:i+trw+viw+tew] 273 | 274 | input_size=data_list[0].shape[1]-3 ##特征数量 275 | hidden_size=32 276 | output_size=1 277 | num_layers=2 ##LSTM的层数 278 | 279 | 280 | ##训练集 281 | tc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i:i+trw]])) 282 | tr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i:i+trw]])) 283 | 284 | #验证集 285 | vc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw:i+trw+viw]])) 286 | vr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw:i+trw+viw]])) 287 | 288 | 289 | #测试集 290 | ec=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 291 | er=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 292 | 293 | ##形成batch数据 294 | train_iter = load_batch([tc,tr], batch_size, N, is_train=False) 295 | valid_iter = load_batch([vc,vr], batch_size, N, is_train=False) 296 | 297 | net = RNN(input_size, hidden_size,output_size,num_layers,dropout) 298 | optimizer =torch.optim.Adam(net.parameters(),lr=lr,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False) 299 | 300 | netopt, train_loss, valid_loss=train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size, dropout, 301 | num_epochs,batch_size,params=None,lr=lr,optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 302 | 303 | w=test_model(netopt,ec,er.to(torch.float32),N,allow_short_selling) 304 | path = 'result/weightRNN'+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type) 305 | if not os.path.exists(path): 306 | os.mkdir(path) 307 | pd.DataFrame(w.squeeze().detach().numpy(),index=weight_index,columns=ret.columns).to_csv(path+'/'+str(i)+'.csv') 308 | 309 | 310 | 311 | #%%#1.原始模型 2.卖空约束 3.更换效用函数 312 | para_list=[[5,True,'MV',False],[5,False,'MV',False],[10,True,'MV',False]] 313 | for para in para_list: 314 | trw=7*12 315 | viw=3*12 316 | tew=1*12 317 | for i in range(0,len(data_list)-trw-viw-tew+1,12): 318 | print(i) 319 | get_weights(i,data_list,ret,gamma=para[0],dropout=0,allow_short_selling=para[1],utility_function=para[2],cost_type=para[3] ) 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | -------------------------------------------------------------------------------- /08LSTM-AC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 7 15:26:45 2022 4 | 5 | @author: whufi 6 | """ 7 | import torch 8 | from torch import nn 9 | from torch.nn import init 10 | import numpy as np 11 | import torch.utils.data as Data 12 | import random 13 | import pandas as pd 14 | import os 15 | import warnings 16 | from pytorchtools_change import EarlyStoppings ##点开pytorchtools,复制里面的代码,即可新建pytorchtools 17 | warnings.filterwarnings("ignore") 18 | 19 | seed = 100 20 | random.seed(seed) # random 21 | np.random.seed(seed) # numpy 22 | torch.manual_seed(seed) # torch+CPU 23 | torch.cuda.manual_seed(seed) # torch+GPU 24 | # os.chdir(r'E:\02实验\98ML-AC-code') ##设置文件路径 25 | 26 | 27 | #%%读取数据 28 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 29 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征 30 | 31 | 32 | def get_data(ret,ch): 33 | ''' 34 | 对收益、特征数据进行调整 35 | ''' 36 | ret.index.name='date' 37 | 38 | ##为了保证每一期的股票数量的位置相同,生成全部的date+permno 39 | new=pd.melt(ret.fillna(0).reset_index(),id_vars='date').astype(float) 40 | new.columns=['date','permno','ret'] 41 | new=new[['date','permno']] 42 | new=new.sort_values(by=['date','permno']) 43 | 44 | ret0=pd.melt(ret.reset_index(),id_vars='date').astype(float) 45 | ret0.columns=['date','permno','ret'] 46 | ch=ch.fillna(0) 47 | 48 | retch=pd.merge(new,ch,how='left',on=['date','permno']).fillna(0) 49 | retch=pd.merge(retch,ret0,how='left',on=['date','permno']) 50 | 51 | data_list=[] 52 | for i in ret.index: 53 | data_list.append(retch[retch.date==i]) ##看看此处的数据有没有乱,没乱 54 | 55 | return data_list 56 | 57 | data_list=get_data(ret,ch) 58 | 59 | 60 | 61 | 62 | 63 | # #获得数据迭代器 64 | def load_batch(data_arrays, batch_size, N, is_train=True): 65 | ''' 66 | 自定义生成训练集中的batch数据集,为了保持横截面的样本顺序 67 | ''' 68 | batch_starts=[start for start in range(0, int(data_arrays[0].shape[0]+1-batch_size),batch_size)] 69 | 70 | if is_train: 71 | random.shuffle(batch_starts) 72 | 73 | cr_list=[] 74 | 75 | for j in batch_starts : 76 | cr_list.append([data_arrays[0][j:(j+batch_size)],data_arrays[1][j:(j+batch_size)]]) 77 | 78 | if np.max(batch_starts)+batch_size < data_arrays[0].shape[0]: 79 | cr_list.append([data_arrays[0][(np.max(batch_starts)+batch_size):],data_arrays[1][(np.max(batch_starts)+batch_size):]]) 80 | 81 | return cr_list 82 | 83 | 84 | 85 | class LSTM(nn.Module): 86 | def __init__(self, input_size, hidden_size,output_size,num_layers,dropout): 87 | super(LSTM, self).__init__() 88 | 89 | # 这里构建LSTM 还可以构建RNN、GRU等方法类似 90 | self.rnn1 = nn.LSTM( 91 | input_size=input_size, 92 | hidden_size=32, 93 | num_layers=1, 94 | batch_first=True # 如果为True,输入输出数据格式是(batch, seq_len, feature) 95 | # 为False,输入输出数据格式是(seq_len, batch, feature), 96 | ) 97 | 98 | self.rnn2 = nn.LSTM( 99 | input_size=32, 100 | hidden_size=16, 101 | num_layers=1, 102 | batch_first=True # 如果为True,输入输出数据格式是(batch, seq_len, feature) 103 | # 为False,输入输出数据格式是(seq_len, batch, feature), 104 | ) 105 | self.fc = nn.Linear(16,output_size,bias=False) #拼接隐藏层 106 | 107 | def forward(self, x): 108 | r_out, (h_s, h_c) = self.rnn1(x) 109 | r_out, (h_s, h_c) = self.rnn2(r_out) 110 | out = self.fc(r_out) # 全连接层进行分类 111 | return out 112 | 113 | 114 | def loss(net,w_hat,r,gamma,N,allow_short_selling=True,utility_function='crra',cost_type=False): 115 | ''' 116 | what:上一层的输出,为权重的一部分,等价于y_pred 117 | r:对应的收益数据 118 | ''' 119 | 120 | weigh0=1/(~torch.isnan(r)).sum(axis=1) ##计算等权投资组合的权重 121 | w_hat_mul=torch.where(torch.isnan(r),torch.full_like(r, np.nan), w_hat) 122 | w_hatm=torch.where(torch.isnan(r),torch.full_like(r, 0), w_hat) 123 | 124 | ws=torch.sum(w_hatm,axis=1) 125 | num=torch.sum(~torch.isnan(r),axis=1) 126 | wn=ws/num 127 | 128 | 129 | w0=torch.empty(w_hat.shape) 130 | wh=torch.empty(w_hat.shape) 131 | 132 | for j in range(w_hat.shape[0]): 133 | w0[j]=torch.where(torch.isnan(r[j]),torch.full_like(r[j], 0), weigh0[j]) 134 | wh[j]=(w_hat_mul[j]-wn[j])/num[j] 135 | 136 | 137 | wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh) 138 | w=w0+wh 139 | if allow_short_selling==True: ##没有权重约束 140 | w=w 141 | elif allow_short_selling==False: ##卖空约束 142 | w=w.clamp(0,1) 143 | w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape) ##重新要求权重之和等于1 144 | 145 | r0=torch.where(torch.isnan(r),torch.full_like(r, 0), r) 146 | retw=r0.mul(w) 147 | 148 | mret=retw.sum(axis=1) 149 | if utility_function=='crra': #指数效用函数 150 | utility=-torch.pow(1+mret,1-gamma)/(1-gamma) 151 | utilitymean=torch.mean(utility) 152 | elif utility_function=='MV': #均值方差效用函数 153 | sigma=torch.var(mret) 154 | utilitymean=sigma.mul(gamma/2)-torch.mean(mret) 155 | 156 | tc=torch.mean(torch.sum(torch.abs(w[1:,:,:]-retw[:-1,:,:]),axis=1),axis=0) ##计算交易成本 157 | if cost_type==False: 158 | loss=utilitymean 159 | elif cost_type==True: 160 | loss=utilitymean+0.005*tc 161 | 162 | return loss 163 | 164 | 165 | #记录列表(list),存储训练集和测试集上经过每一轮次,loss的变化 166 | def train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size,dropout, 167 | num_epochs,batch_size,params=None,lr=None,optimizer=None,allow_short_selling=True,utility_function='crra',cost_type=False): 168 | train_loss=[] 169 | valid_loss=[] 170 | 171 | p_list='LSTMPP'+str(allow_short_selling)+utility_function+str(cost_type) 172 | early_stopping = EarlyStoppings(para_list=p_list,patience=7, verbose=True) 173 | 174 | for epoch in range(num_epochs):#外循环控制循环轮次 175 | #step1在训练集上,进行小批量梯度下降更新参数 176 | 177 | for c,r in train_iter:#内循环控制训练批次 178 | 179 | w_hat = net(c.to(torch.float32)) 180 | l = loss(net,w_hat,r.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 181 | 182 | #梯度清零 183 | if optimizer is not None: 184 | optimizer.zero_grad() 185 | elif params is not None and params[0].grad is not None: 186 | for param in params: 187 | param.grad.data.zero_() 188 | 189 | 190 | #梯度回传 191 | l.backward() 192 | 193 | if optimizer is None: 194 | torch.optim.SGD(net.parameters(),lr,batch_size) 195 | # SGD(params,lr,batch_size) 196 | else: 197 | optimizer.step() 198 | 199 | train_loss.append((loss(net,net(tc.to(torch.float32)),tr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())#loss本身就默认了取平均值! 200 | valid_loss.append((loss(net,net(vc.to(torch.float32)),vr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item()) 201 | 202 | print("epoch %d,train_loss %.6f,valid_loss %.6f"%(epoch+1,train_loss[epoch],valid_loss[epoch])) 203 | 204 | valid_lossave = np.average(valid_loss) 205 | 206 | early_stopping(valid_loss[epoch], net,para_list=p_list) 207 | if early_stopping.early_stop: 208 | print("Early stopping") 209 | break 210 | 211 | 212 | 213 | # load the last checkpoint with the best model 214 | net.load_state_dict(torch.load(p_list+'checkpoint.pt')) 215 | 216 | return net, train_loss, valid_loss 217 | 218 | 219 | 220 | 221 | def test_model(netopt,ec,er,N,allow_short_selling=True): 222 | w_hat=netopt(ec.to(torch.float32)) 223 | 224 | weigh0=1/(~torch.isnan(er)).sum(axis=1) ##计算等权投资组合的权重 225 | w_hat_mul=torch.where(torch.isnan(er),torch.full_like(er, np.nan), w_hat) 226 | w_hatm=torch.where(torch.isnan(er),torch.full_like(er, 0), w_hat) 227 | 228 | ws=torch.sum(w_hatm,axis=1) 229 | num=torch.sum(~torch.isnan(er),axis=1) 230 | wn=ws/num 231 | 232 | 233 | w0=torch.empty(w_hat.shape) 234 | wh=torch.empty(w_hat.shape) 235 | 236 | for j in range(w_hat.shape[0]): 237 | w0[j]=torch.where(torch.isnan(er[j]),torch.full_like(er[j], 0), weigh0[j]) 238 | wh[j]= (w_hat_mul[j]-wn[j])/num[j] #times 239 | 240 | 241 | wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh) 242 | w=w0+wh 243 | 244 | if allow_short_selling==True: ##没有权重约束 245 | w=w 246 | elif allow_short_selling==False: ##卖空约束 247 | w=w.clamp(0,1) 248 | w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape) ##重新要求权重之和等于1 249 | return w 250 | 251 | 252 | def get_weights(i,data_list,ret,gamma,dropout,allow_short_selling,utility_function,cost_type): 253 | 254 | ''' 255 | i:以12为倍数 256 | ''' 257 | 258 | lr=0.01 259 | batch_size = 10# 设置小批量大小 260 | num_epochs = 100 #100 261 | 262 | trw=7*12 263 | viw=3*12 264 | tew=1*12 265 | N=data_list[0].shape[0] ##股票数量 266 | 267 | weight_index=ret.index[i+trw+viw:i+trw+viw+tew] 268 | 269 | input_size=data_list[0].shape[1]-3 ##特征数量 270 | hidden_size=32 271 | output_size=1 272 | num_layers=2 ##LSTM的层数 273 | 274 | ##训练集 275 | tc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i:i+trw]])) 276 | tr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i:i+trw]])) 277 | 278 | #验证集 279 | vc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw:i+trw+viw]])) 280 | vr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw:i+trw+viw]])) 281 | 282 | 283 | #测试集 284 | ec=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 285 | er=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 286 | 287 | ##形成batch数据 288 | train_iter = load_batch([tc,tr], batch_size, N, is_train=False) 289 | valid_iter = load_batch([vc,vr], batch_size, N, is_train=False) 290 | 291 | 292 | net = LSTM(input_size, hidden_size,output_size,num_layers,dropout) 293 | 294 | optimizer =torch.optim.Adam(net.parameters(),lr=lr,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False) 295 | 296 | netopt, train_loss, valid_loss=train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size, dropout, 297 | num_epochs,batch_size,params=None,lr=lr,optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 298 | 299 | w=test_model(netopt,ec,er.to(torch.float32),N,allow_short_selling) 300 | path = 'result/weightLSTM'+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type) 301 | if not os.path.exists(path): 302 | os.mkdir(path) 303 | pd.DataFrame(w.squeeze().detach().numpy(),index=weight_index,columns=ret.columns).to_csv(path+'/'+str(i)+'.csv') 304 | 305 | 306 | 307 | #%%#1.原始模型 2.卖空约束 3.更换效用函数 308 | para_list=[[5,True,'MV',False],[5,False,'MV',False],[10,True,'MV',False]] 309 | for para in para_list: 310 | trw=7*12 311 | viw=3*12 312 | tew=1*12 313 | for i in range(0,len(data_list)-trw-viw-tew+1,12): 314 | print(i) 315 | get_weights(i,data_list,ret,gamma=para[0],dropout=0,allow_short_selling=para[1],utility_function=para[2],cost_type=para[3] ) 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | -------------------------------------------------------------------------------- /09DFN-AC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Nov 5 16:51:01 2022 4 | @author: whufi 5 | """ 6 | 7 | 8 | import torch 9 | from torch import nn 10 | from torch.nn import init 11 | import numpy as np 12 | import torch.utils.data as Data 13 | 14 | import random 15 | import pandas as pd 16 | import os 17 | import warnings 18 | from pytorchtools_change import EarlyStoppings ##点开pytorchtools,复制里面的代码,即可新建pytorchtools 19 | import joblib 20 | from sklearn.model_selection import KFold 21 | warnings.filterwarnings("ignore") 22 | # os.chdir(r'E:\02实验\98ML-AC-code') ##设置文件路径 23 | 24 | #%%读取数据 25 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 26 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征 27 | 28 | 29 | def get_data(ret,ch): 30 | ''' 31 | 对收益、特征数据进行调整 32 | ''' 33 | ret.index.name='date' 34 | 35 | ##为了保证每一期的股票数量的位置相同,生成全部的date+permno 36 | new=pd.melt(ret.fillna(0).reset_index(),id_vars='date').astype(float) 37 | new.columns=['date','permno','ret'] 38 | new=new[['date','permno']] 39 | new=new.sort_values(by=['date','permno']) 40 | 41 | ret0=pd.melt(ret.reset_index(),id_vars='date').astype(float) 42 | ret0.columns=['date','permno','ret'] 43 | ch=ch.fillna(0) 44 | 45 | retch=pd.merge(new,ch,how='left',on=['date','permno']).fillna(0) 46 | retch=pd.merge(retch,ret0,how='left',on=['date','permno']) 47 | 48 | data_list=[] 49 | for i in ret.index: 50 | data_list.append(retch[retch.date==i]) ##看看此处的数据有没有乱,没乱 51 | 52 | return data_list 53 | 54 | data_list=get_data(ret,ch) 55 | 56 | 57 | 58 | 59 | 60 | # #获得数据迭代器 61 | def load_batch(data_arrays, batch_size, N, is_train=True): 62 | ''' 63 | 自定义生成训练集中的batch数据集,为了保持横截面的样本顺序 64 | ''' 65 | batch_starts=[start for start in range(0, int(data_arrays[0].shape[0]+1-batch_size),batch_size)] 66 | 67 | if is_train: 68 | random.shuffle(batch_starts) 69 | 70 | cr_list=[] 71 | 72 | for j in batch_starts : 73 | cr_list.append([data_arrays[0][j:(j+batch_size)],data_arrays[1][j:(j+batch_size)]]) 74 | 75 | if np.max(batch_starts)+batch_size < data_arrays[0].shape[0]: 76 | cr_list.append([data_arrays[0][(np.max(batch_starts)+batch_size):],data_arrays[1][(np.max(batch_starts)+batch_size):]]) 77 | 78 | return cr_list 79 | 80 | 81 | ###简化标准化过程 82 | 83 | class Normalization(nn.Module): 84 | ''' 85 | 标准化每一个截面 86 | ''' 87 | def __init__(self): 88 | super(Normalization, self).__init__() 89 | # self.r=r 90 | # self.N=N 91 | 92 | def forward(self,x): 93 | xs=torch.empty(x.shape) 94 | 95 | for i in range(x.shape[0]): 96 | mean=torch.mean(x[i,:,:],axis=0) 97 | std=torch.std(x[i,:,:],axis=0) 98 | xs[i,:,:]=(x[i,:,:]-mean)/std 99 | 100 | return xs 101 | 102 | 103 | 104 | # Neural Network Model 105 | class Net(nn.Module): 106 | #初始化网络结构 107 | def __init__(self, input_size=95, hidden_size=32, num_classes=1): 108 | super(Net, self).__init__() 109 | self.fc1 = nn.Linear(input_size, hidden_size) #输入层,线性(liner)关系 ,bias=False 110 | self.relu1 = nn.LeakyReLU(negative_slope=0.01) #隐藏层,使用ReLU函数 111 | self.norm1=Normalization() 112 | self.dropout1 = nn.Dropout(p=0.1) # dropout训练 113 | 114 | 115 | self.fc2 = nn.Linear(hidden_size, 16) #输入层,线性(liner)关系 ,bias=False 116 | self.relu2 = nn.LeakyReLU(negative_slope=0.01) #隐藏层,使用ReLU函数 117 | self.norm2=Normalization() 118 | self.dropout2 = nn.Dropout(p=0.1) # dropout训练 119 | self.fc4 = nn.Linear(16, num_classes,bias=False) #输出层,线性(liner)关系 ,bias=False 120 | 121 | 122 | #forword 参数传递函数,网络中数据的流动 123 | def forward(self, x): 124 | out = self.fc1(x) 125 | out = self.relu1(out) 126 | out = self.norm1(out) 127 | out=self.dropout1(out) 128 | 129 | 130 | out = self.fc2(out) 131 | out = self.relu2(out) 132 | out = self.norm2(out) 133 | out=self.dropout2(out) 134 | out = self.fc4(out) 135 | return out 136 | 137 | net=Net() 138 | 139 | 140 | def loss(net,w_hat,r,gamma,N,lambda1,allow_short_selling=True,utility_function='crra',cost_type=False): 141 | ''' 142 | w_hat:上一层的输出,为权重的一部分,等价于y_pred 143 | r:对应的收益数据 144 | ''' 145 | weigh0=1/(~torch.isnan(r)).sum(axis=1) ##计算等权投资组合的权重 146 | w_hat_mul=torch.where(torch.isnan(r),torch.full_like(r, np.nan), w_hat) 147 | w_hatm=torch.where(torch.isnan(r),torch.full_like(r, 0), w_hat) 148 | 149 | ws=torch.sum(w_hatm,axis=1) 150 | num=torch.sum(~torch.isnan(r),axis=1) 151 | wn=ws/num 152 | 153 | 154 | w0=torch.empty(w_hat.shape) 155 | wh=torch.empty(w_hat.shape) 156 | 157 | for j in range(w_hat.shape[0]): 158 | w0[j]=torch.where(torch.isnan(r[j]),torch.full_like(r[j], 0), weigh0[j]) 159 | wh[j]=(w_hat_mul[j]-wn[j])/num[j] 160 | wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh) 161 | 162 | w=w0+wh 163 | if allow_short_selling==True: ##没有权重约束 164 | w=w 165 | elif allow_short_selling==False: ##卖空约束 166 | w=w.clamp(0,1) 167 | w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape) ##重新要求权重之和等于1 168 | 169 | r0=torch.where(torch.isnan(r),torch.full_like(r, 0), r) 170 | retw=r0.mul(w) 171 | 172 | mret=retw.sum(axis=1) 173 | if utility_function=='crra': #指数效用函数 174 | utility=-torch.pow(1+mret,1-gamma)/(1-gamma) 175 | utilitymean=torch.mean(utility) 176 | 177 | elif utility_function=='MV': #均值方差效用函数 178 | sigma=torch.var(mret) 179 | utilitymean=sigma.mul(gamma/2)-torch.mean(mret) 180 | 181 | tc=torch.mean(torch.sum(torch.abs(w[1:,:,:]-retw[:-1,:,:]),axis=1),axis=0) ##计算交易成本 182 | if cost_type==False: 183 | loss=utilitymean 184 | elif cost_type==True: 185 | loss=utilitymean+0.005*tc 186 | 187 | 188 | ##加入正则化项 189 | l1_reg = torch.tensor(0.) 190 | for param in net.parameters(): 191 | # print(param) 192 | l1_reg += torch.sum(torch.abs(param)) 193 | loss = loss+ lambda1 * l1_reg 194 | 195 | return loss 196 | 197 | 198 | 199 | #记录列表(list),存储训练集和测试集上经过每一轮次,loss的变化 200 | def train_model(train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size,hidden_size,num_classes, 201 | num_epochs,batch_size,params=None,lr=None,lambda1=None,optimizer=None,allow_short_selling=True,utility_function='crra',cost_type=False,seed=100): 202 | train_loss=[] 203 | valid_loss=[] 204 | 205 | p_list='FDNPP'+str(allow_short_selling)+utility_function+str(cost_type)+str(seed) 206 | 207 | early_stopping = EarlyStoppings(para_list=p_list,patience=7, verbose=True) 208 | 209 | for epoch in range(num_epochs):#外循环控制循环轮次 210 | #step1在训练集上,进行小批量梯度下降更新参数 211 | 212 | for c,r in train_iter:#内循环控制训练批次 213 | w_hat = net(c.to(torch.float32)) 214 | l = loss(net,w_hat,r.to(torch.float32),gamma,N,lambda1,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type) 215 | #梯度清零 216 | if optimizer is not None: 217 | optimizer.zero_grad() 218 | elif params is not None and params[0].grad is not None: 219 | for param in params: 220 | param.grad.data.zero_() 221 | 222 | #梯度回传 223 | l.backward() 224 | 225 | 226 | if optimizer is None: 227 | torch.optim.SGD(net.parameters(),lr,batch_size) 228 | # SGD(params,lr,batch_size) 229 | else: 230 | optimizer.step() 231 | 232 | train_loss.append((loss(net,net(tc.to(torch.float32)),tr.to(torch.float32),gamma,N,lambda1,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())#loss本身就默认了取平均值! 233 | valid_loss.append((loss(net,net(vc.to(torch.float32)),vr.to(torch.float32),gamma,N,lambda1,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item()) 234 | 235 | print("epoch %d,train_loss %.6f,valid_loss %.6f"%(epoch+1,train_loss[epoch],valid_loss[epoch])) 236 | 237 | valid_lossave = np.average(valid_loss) 238 | early_stopping(valid_loss[epoch], net,para_list=p_list) 239 | 240 | if early_stopping.early_stop: 241 | print("Early stopping") 242 | break 243 | 244 | # load the last checkpoint with the best model 245 | net.load_state_dict(torch.load(p_list+'checkpoint.pt')) 246 | 247 | return net, train_loss, valid_loss 248 | 249 | 250 | 251 | 252 | def test_model(netopt,ec,er,N,allow_short_selling=True): 253 | w_hat=netopt(ec.to(torch.float32)) 254 | weigh0=1/(~torch.isnan(er)).sum(axis=1) ##计算等权投资组合的权重 255 | w_hat_mul=torch.where(torch.isnan(er),torch.full_like(er, np.nan), w_hat) 256 | w_hatm=torch.where(torch.isnan(er),torch.full_like(er, 0), w_hat) 257 | 258 | ws=torch.sum(w_hatm,axis=1) 259 | num=torch.sum(~torch.isnan(er),axis=1) 260 | wn=ws/num 261 | 262 | 263 | w0=torch.empty(w_hat.shape) 264 | wh=torch.empty(w_hat.shape) 265 | 266 | for j in range(w_hat.shape[0]): 267 | w0[j]=torch.where(torch.isnan(er[j]),torch.full_like(er[j], 0), weigh0[j]) 268 | wh[j]=(w_hat_mul[j]-wn[j])/num[j] #times 269 | 270 | 271 | wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh) 272 | w=w0+wh 273 | 274 | if allow_short_selling==True: ##没有权重约束 275 | w=w 276 | elif allow_short_selling==False: ##卖空约束 277 | w=w.clamp(0,1) 278 | w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape) ##重新要求权重之和等于1 279 | return w 280 | 281 | 282 | def get_weights(i,data_list,ret,gamma,allow_short_selling,utility_function,cost_type,seed): 283 | 284 | ''' 285 | i:以12为倍数 286 | ''' 287 | batch_size =10 # 设置小批量大小 288 | num_epochs = 100 #100 289 | 290 | trw=7*12 291 | viw=3*12 292 | tew=1*12 293 | N=data_list[0].shape[0] ##股票数量 294 | 295 | weight_index=ret.index[i+trw+viw:i+trw+viw+tew] 296 | 297 | input_size=data_list[0].shape[1]-3 ##特征数量 298 | 299 | hidden_size=32 300 | num_classes=1 301 | 302 | allow_short_selling=allow_short_selling 303 | utility_function=utility_function 304 | cost_type=cost_type 305 | 306 | 307 | ##训练集 308 | tc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i:i+trw]])) 309 | tr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i:i+trw]])) 310 | 311 | #验证集 312 | vc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw:i+trw+viw]])) 313 | vr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw:i+trw+viw]])) 314 | 315 | 316 | #测试集 317 | ec=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 318 | er=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 319 | 320 | ##形成batch数据 321 | train_iter = load_batch([tc,tr], batch_size, N, is_train=False) 322 | valid_iter = load_batch([vc,vr], batch_size, N, is_train=False) 323 | 324 | 325 | 326 | if i==0:##首先仅考虑对第一期进行超参数调整 327 | data=data_list[i:i+trw+viw] ##用于训练和验证的数据 328 | 329 | kf=KFold(n_splits=5) 330 | umax=10000000 #最小效用 331 | global lambdaopt 332 | global lropt 333 | lambdaopt=0 334 | lropt=0.01 335 | 336 | lr_list=[0.01,0.001] # 正则化项的项的参数是待调参数,范围可以从 [0.01,0.1] 337 | lambda_list=[0.0001] 338 | # lambda_list=[0.00001,0.0001,0.001] 339 | for lambda1 in lambda_list: 340 | for lr in lr_list: 341 | print(lambda1,lr) 342 | 343 | ##五折交叉验证进行优化lambda 344 | valoss_list=[] 345 | for rets_index,retv_index in kf.split(data): 346 | print(rets_index.shape,retv_index.shape) 347 | 348 | #训练集 349 | ttc=torch.tensor(np.stack([data[x].iloc[:,2:-1] for x in rets_index])) 350 | ttr=torch.tensor(np.stack([data[x].iloc[:,-1:] for x in rets_index])) 351 | 352 | #验证集 353 | vvc=torch.tensor(np.stack([data[x].iloc[:,2:-1] for x in retv_index])) 354 | vvr=torch.tensor(np.stack([data[x].iloc[:,-1:] for x in retv_index])) 355 | 356 | tt_iter = load_batch([ttc,ttr], batch_size, N, is_train=False) 357 | vv_iter = load_batch([vvc,vvr], batch_size, N, is_train=False) 358 | 359 | optimizer =torch.optim.Adam(net.parameters(),lr=lr,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False) 360 | netopt, train_loss, valid_loss=train_model(tt_iter,vv_iter,ttc,ttr,vvc,vvr,gamma,N,loss,input_size,hidden_size,num_classes, 361 | num_epochs,batch_size,params=None,lr=lr,lambda1=torch.tensor(lambda1),optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type,seed=seed) 362 | valoss_list.append(valid_loss[-8]) 363 | util_mean=np.mean(valoss_list) 364 | 365 | # print(util_mean) 366 | 367 | if util_mean=0]=0 84 | wn[wn<0]=1 #用于统计负权重的占比 85 | 86 | #平均绝对权重 87 | wabs=np.sum(np.abs(weight),axis=1) 88 | wabsmean=np.mean(wabs/count)*100 ##先横截面平均、再时间序列上求平均 89 | 90 | #平均最大绝对权重 91 | wmax=np.max(np.abs(weight),axis=1) 92 | wmaxmean=np.mean(wmax)*100 ##权重变动范围还比较小 93 | 94 | #平均最小权重 95 | wmin=np.min(weightraw,axis=1) 96 | wminmean=np.mean(wmin)*100 ## 97 | 98 | #平均负权重之和 99 | wneg=np.sum(weight[weight<0],axis=1) 100 | wnegmean=np.mean(wneg) 101 | 102 | #平均负权重的占比 103 | wnegn=np.sum(wn,axis=1) 104 | wnegmeannum=np.mean(wnegn/count) 105 | 106 | ##平均权重变动之和(类似于换手率的概念) 107 | retn=pd.concat([pd.DataFrame(index=weight.index),ret],axis=1,join='inner') 108 | wplus=weight*(1+retn) 109 | wplus.index=wplus.reset_index().iloc[:,0].shift(-1) 110 | wplus=wplus.iloc[:-1,:] 111 | weightn=weight.iloc[1:,:] 112 | wwabs=np.sum(np.abs(weightn-wplus),axis=1) 113 | wwabsmean=np.mean(wwabs) 114 | 115 | ##定义均值、方差、夏普比、CER 116 | retport=np.sum(weight*retn,axis=1) 117 | mean=np.mean(retport)*12 118 | std=np.std(retport,ddof=1)*np.sqrt(12) 119 | sr=mean/std 120 | cer=mean-std*std/2 121 | 122 | mdd=get_DD(pd.DataFrame(retport))[0] 123 | 124 | 125 | skew=stats.skew(retport)#使用stats计算偏度 126 | kurtosis = stats.kurtosis(retport)#使用stats计算峰度 127 | 128 | ch3=pd.concat([pd.DataFrame(retport,columns=['retp']),ch3],axis=1,join='inner') 129 | ch4=pd.concat([pd.DataFrame(retport,columns=['retp']),ch4],axis=1,join='inner') 130 | 131 | #ch3-α检验 132 | ch3test = smf.ols('retp~mktrf+SMB+VMG',ch3).fit(cov_type = 'HAC',cov_kwds = {'maxlags':5}) 133 | ch3_a = ch3test.params[0] 134 | ch3_t = ch3test.tvalues[0] 135 | res=[cer,wabsmean,wmaxmean,wminmean,wnegmean,wnegmeannum,wwabsmean,mean,mdd,std,skew,kurtosis,sr,ch3_a, 136 | ch3_t] 137 | 138 | result=pd.DataFrame(res,columns=[methodname],index=['CER','w_abs','w_max','w_min','w_neg','w_negnum','ww_abs','Mean','MDD','StdDev', 139 | 'Skew','Kurt','SR','CH3_alpha','CH3_t']) 140 | return result 141 | 142 | 143 | def get_all_result(path,respath,name): ##为多个数据集所用 144 | os.chdir(path) 145 | file = glob.glob(os.path.join("*.csv")) 146 | result_all=pd.DataFrame(index=['CER','w_abs','w_max','w_min','w_neg','w_negnum','ww_abs','Mean','MDD', 147 | 'StdDev','Skew','Kurt','SR','CH3_alpha','CH3_t']) 148 | 149 | for i in range(len(file)): 150 | # i=0 151 | weight=pd.read_csv(file[i],index_col=0) 152 | result=get_single_result(weight,ret,ch3,ch4,file[i][:-4]) 153 | result_all=pd.concat([result_all,result],axis=1) 154 | result_all.to_csv(respath+'/result_'+name+'.csv') ##所有结果 155 | 156 | 157 | #%%#输出结果 158 | respath=r'E:\02实验\98ML-AC-code\result\final' 159 | path=r'E:\02实验\98ML-AC-code\result\middle\weight_all' 160 | name='all_results' 161 | get_all_result(path,respath,name) 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 基于机器学习和资产特征的投资组合选择研究 (Research on portfolio selection based on machine learning and asset characteristics) 2 | 3 | 李斌,屠雪永.基于机器学习和资产特征的投资组合选择研究[J/OL].系统工程理论与实践:1-27[2023-12-22]. http://kns.cnki.net/kcms/detail/11.2267.N.20231212.1114.002.html. 4 | 5 | 摘要: 随着可投资资产与资产信息的爆炸式增长,投资组合选择研究面临资产和特征双重高维挑战.为此,本文提出一个基于机器学习和资产特征的投资组合选择框架,该框架借助机器学习技术的天然优势,运用高维特征直接预测投资组合权重,避开了常规的两步投资组合管理范式中的收益预测过程,并用于中国股票市场的资产配置研究.结果显示: (1)基于此框架提出的投资策略能够捕捉高维特征中的增量信息,并挖掘资产特征与投资权重之间线性与非线性关系,大幅提升了投资绩效;(2)交易摩擦类特征是投资权重预测中最为重要的资产特征;(3)策略在套利限制较为严重的股票上回报更高,而对宏观经济状态变化的敏感性较低;在其他经济约束下,策略表现依然稳健.本文拓展了现代投资组合理论的研究框架,促进了人工智能与量化投资领域的交叉融合发展. 6 | 7 | 关键词:投资组合选择;人工智能;资产特征;大维资产配置;量化投资 8 | 9 | 10 | 1.代码共包含9种策略,简单线性的OLS-AC策略和8种ML-AC策略,分别放在01-09号py脚本中,相互独立;输入数据为:资产特征char和收益ret_clean; 输出数据为:样本外的投资组合权重 11 | 12 | 2.10号py文件为对非线性策略的投资组合权重的整理; 13 | 14 | 3.11号py文件基于所得的投资组合权重,计算所有策略的投资绩效,包括收益、标准差、夏普比率等多种指标。 15 | 16 | 4.数据见百度云链接: https://pan.baidu.com/s/12I_HWhi0hzpr4MeGld1BhQ?pwd=64un 提取码: 64un ; 17 | 将char.csv和ret_clean.csv下载后放入data文件夹 18 | -------------------------------------------------------------------------------- /data/CH_3_update_20211231.csv: -------------------------------------------------------------------------------- 1 | Monthly CH-3 factors,,,, 2 | mnthdt: Month-end date,,,, 3 | rf_mon: 1-year bank deposit rate,,,, 4 | mktrf: the excess return on the market,,,, 5 | SMB (Small-minus-big): Size factor,,,, 6 | VMG(Value-minus-growth) Value factor,,,, 7 | Unit: %,,,, 8 | ,,,, 9 | mnthdt,rf_mon,mktrf,SMB,VMG 10 | 20000131,0.19,14.79,-1.61,-0.84 11 | 20000229,0.18,11.97,1.34,-7.28 12 | 20000331,0.19,4.32,7.72,-0.5 13 | 20000430,0.18,1.68,-0.49,2.43 14 | 20000531,0.19,2.2,1.95,2.06 15 | 20000630,0.18,2.34,1.11,5.55 16 | 20000731,0.19,4.37,1.14,1.93 17 | 20000831,0.19,-1.75,2.1,-1.47 18 | 20000930,0.18,-5.51,1.83,-0.6 19 | 20001031,0.19,1.57,2.83,0.2 20 | 20001130,0.18,5.57,1.38,1.93 21 | 20001231,0.19,-0.89,1.89,1.69 22 | 20010131,0.19,-1.2,0.35,2.24 23 | 20010228,0.17,-5.78,-0.45,3.59 24 | 20010331,0.19,6.7,1.86,-0.82 25 | 20010430,0.18,-1.37,0.96,-1.88 26 | 20010531,0.19,2.55,2.64,-1.8 27 | 20010630,0.18,0.15,1.49,2.01 28 | 20010731,0.19,-13.45,-0.19,-0.26 29 | 20010831,0.19,-4.06,0.75,-0.98 30 | 20010930,0.18,-4.51,-1.17,3.25 31 | 20011031,0.19,-4.77,-1.75,-0.46 32 | 20011130,0.18,3.46,2.35,1.41 33 | 20011231,0.19,-6.08,-0.89,5.34 34 | 20020131,0.19,-9.93,-3.37,4.92 35 | 20020228,0.17,2.14,2.2,0.39 36 | 20020331,0.17,5.53,2.05,-2.39 37 | 20020430,0.16,3.84,1.23,-1.78 38 | 20020531,0.17,-8.64,-1.29,-1.66 39 | 20020630,0.16,14.23,-3.31,1.09 40 | 20020731,0.17,-4.6,1.44,1.44 41 | 20020831,0.17,0.73,-0.39,0.65 42 | 20020930,0.16,-5.44,-0.89,0.65 43 | 20021031,0.17,-5,-0.98,0.39 44 | 20021130,0.16,-5.46,-3.21,2.42 45 | 20021231,0.17,-5.35,-0.19,0.64 46 | 20030131,0.17,9.36,1.38,1.74 47 | 20030228,0.15,0.63,0.99,0.1 48 | 20030331,0.17,-0.4,-2.39,4.15 49 | 20030430,0.16,0.67,-5.53,7.01 50 | 20030531,0.17,4.14,-0.49,0.46 51 | 20030630,0.16,-6.2,-0.43,2.95 52 | 20030731,0.17,-0.61,-2.93,2.6 53 | 20030831,0.17,-3.81,0.81,-0.86 54 | 20030930,0.16,-4.41,0.31,0.49 55 | 20031031,0.17,-1.67,-3.81,9.41 56 | 20031130,0.16,2.78,0.77,2.55 57 | 20031231,0.17,6.16,-5.47,10.36 58 | 20040131,0.17,5.96,3.96,0.28 59 | 20040229,0.16,6.05,3.26,-3.37 60 | 20040331,0.17,3.61,2.31,2.96 61 | 20040430,0.16,-9.36,0.86,3.9 62 | 20040531,0.17,-2.66,1.11,-1.06 63 | 20040630,0.16,-10.2,-2.22,5.07 64 | 20040731,0.17,-0.34,-0.81,3.15 65 | 20040831,0.17,-3.56,0.13,4.86 66 | 20040930,0.16,4.57,1.05,3.83 67 | 20041031,0.17,-5.41,-0.21,4.12 68 | 20041130,0.18,1.18,3.66,-2.9 69 | 20041231,0.19,-6.01,-1.43,5.58 70 | 20050131,0.19,-6.16,-1.16,2.38 71 | 20050228,0.17,9.34,1.82,1.54 72 | 20050331,0.19,-9.55,-3.18,4.71 73 | 20050430,0.18,-2.25,-4.58,7.21 74 | 20050531,0.19,-8.42,5.11,-4.68 75 | 20050630,0.18,2,-1.49,4.93 76 | 20050731,0.19,0.1,-6.62,8.1 77 | 20050831,0.19,8.36,7.39,-4.38 78 | 20050930,0.18,-0.49,3.29,2.92 79 | 20051031,0.19,-5.75,-0.79,1.81 80 | 20051130,0.18,0.23,1.05,-0.62 81 | 20051231,0.19,6.07,-3.73,4.37 82 | 20060131,0.19,8.62,-0.72,-0.42 83 | 20060228,0.17,3.61,-1.69,-2.62 84 | 20060331,0.19,0.65,-1.22,1.72 85 | 20060430,0.18,11.28,-1.04,4.69 86 | 20060531,0.19,16.03,9.16,-8.13 87 | 20060630,0.18,2.59,5.38,3.75 88 | 20060731,0.19,-6.88,4.05,-1.24 89 | 20060831,0.2,3.94,-0.88,-0.16 90 | 20060930,0.2,4.64,-1.09,-0.98 91 | 20061031,0.21,2.65,-2.5,8.28 92 | 20061130,0.2,14.63,-11.81,3.99 93 | 20061231,0.21,13.93,-8.73,6.17 94 | 20070131,0.21,13.68,11.91,1.46 95 | 20070228,0.19,6.51,12.26,-5.9 96 | 20070331,0.22,11.09,6.43,-1.15 97 | 20070430,0.23,24.58,8.2,-9.3 98 | 20070531,0.24,9.41,-1.37,0.08 99 | 20070630,0.25,-9.22,-5.64,13.35 100 | 20070731,0.27,17.61,5.33,-3.15 101 | 20070831,0.29,15.16,-3.68,1.92 102 | 20070930,0.3,6.17,-1.35,3.51 103 | 20071031,0.32,4.83,-12.65,13.28 104 | 20071130,0.31,-15.26,10.79,3.42 105 | 20071231,0.33,11.86,9.18,-0.92 106 | 20080131,0.34,-15.7,9.04,-1.59 107 | 20080229,0.32,2.29,8.68,0.8 108 | 20080331,0.34,-19.92,0.32,-0.04 109 | 20080430,0.33,4.6,-8.91,7.63 110 | 20080531,0.34,-6.93,1.97,-5.64 111 | 20080630,0.33,-20.76,-4.65,4.09 112 | 20080731,0.34,1.65,7.43,-1.66 113 | 20080831,0.34,-15.16,-7.57,4.16 114 | 20080930,0.33,-4.69,-5.87,-4.16 115 | 20081031,0.33,-25.47,-0.05,4.42 116 | 20081130,0.28,9.37,11.22,-3.91 117 | 20081231,0.2,-2.15,9.46,3.37 118 | 20090131,0.19,9.65,7.77,0.79 119 | 20090228,0.17,4.91,2.5,-4.33 120 | 20090331,0.19,14.63,7.53,-0.92 121 | 20090430,0.18,4.17,4.1,3.93 122 | 20090531,0.19,6.09,0.72,-2.62 123 | 20090630,0.18,12.65,-7.56,2.59 124 | 20090731,0.19,15.55,-5.52,-8.08 125 | 20090831,0.19,-21.82,9.4,5.21 126 | 20090930,0.18,4.57,0.11,5.66 127 | 20091031,0.19,8.29,4.42,0.42 128 | 20091130,0.18,7.31,8.89,-0.6 129 | 20091231,0.19,2.25,3.08,3.93 130 | 20100131,0.19,-8.95,6.03,-1.24 131 | 20100228,0.17,2.3,3.37,-2.5 132 | 20100331,0.19,1.49,2.69,1.16 133 | 20100430,0.18,-7.91,0.92,0.09 134 | 20100531,0.19,-8.7,-0.18,0 135 | 20100630,0.18,-7.4,-0.91,2.99 136 | 20100731,0.19,11.27,5.41,1.17 137 | 20100831,0.19,1.78,5.51,-3.9 138 | 20100930,0.18,0.64,-1.57,-1.93 139 | 20101031,0.2,12.39,-3.54,1.57 140 | 20101130,0.2,-4.72,5.37,0.4 141 | 20101231,0.21,-1.14,-1.12,3.99 142 | 20110131,0.23,-2.75,-0.84,4.26 143 | 20110228,0.22,4.84,4.77,-3.36 144 | 20110331,0.25,-0.59,0.78,1.63 145 | 20110430,0.26,-1.51,-1.66,2.1 146 | 20110531,0.27,-6.44,-0.45,1.06 147 | 20110630,0.26,2.27,1.25,0.36 148 | 20110731,0.29,-1.62,2.32,-0.68 149 | 20110831,0.29,-4.88,1.8,0.75 150 | 20110930,0.28,-9.22,-1.96,3.68 151 | 20111031,0.29,4.03,0.14,1.04 152 | 20111130,0.28,-5.47,1.84,1.4 153 | 20111231,0.29,-7.33,-7.44,7.68 154 | 20120131,0.29,2.48,-3.56,2.01 155 | 20120229,0.27,6.97,5.45,-3.09 156 | 20120331,0.29,-7.03,-0.66,1.23 157 | 20120430,0.28,5.46,1.04,-1.68 158 | 20120531,0.29,0.06,0.77,-0.69 159 | 20120630,0.27,-5.21,-0.31,4.75 160 | 20120731,0.25,-5.76,-2.99,4.92 161 | 20120831,0.25,-2.85,6.19,-0.84 162 | 20120930,0.24,2.06,-2.09,0.15 163 | 20121031,0.25,-1.22,1.56,0.59 164 | 20121130,0.24,-6.35,-4.88,3.66 165 | 20121231,0.25,15.02,0.5,-0.9 166 | 20130131,0.25,4.93,2.27,0.73 167 | 20130228,0.23,0,3.31,0.06 168 | 20130331,0.25,-5.49,2.24,0.38 169 | 20130430,0.24,-2.5,-0.61,1.33 170 | 20130531,0.25,7.79,5.53,-3.83 171 | 20130630,0.24,-13.46,-1.68,3.55 172 | 20130731,0.25,2.65,2.75,-2.96 173 | 20130831,0.25,4.99,4.27,0.97 174 | 20130930,0.24,4.25,-0.71,-2.89 175 | 20131031,0.25,-2.65,1.26,5.47 176 | 20131130,0.24,4.18,3.74,-2.79 177 | 20131231,0.25,-4.58,1.8,3.4 178 | 20140131,0.25,-2.3,3.22,-4.83 179 | 20140228,0.23,0.53,4.33,-1.15 180 | 20140331,0.25,-2.69,1.75,4.92 181 | 20140430,0.24,-1.09,-1.39,1.91 182 | 20140531,0.25,1.06,1.26,-3.27 183 | 20140630,0.24,2.16,2.38,-2.19 184 | 20140731,0.25,7.71,0.88,1.88 185 | 20140831,0.25,1.19,2.61,-2.98 186 | 20140930,0.24,6.96,5.38,-3.32 187 | 20141031,0.25,1.54,0.69,2.64 188 | 20141130,0.24,8.83,-1.76,4.76 189 | 20141231,0.23,15.25,-17.35,15.17 190 | 20150131,0.23,0.45,7.52,-2.47 191 | 20150228,0.21,4.31,1.61,-3.06 192 | 20150331,0.21,14.92,8.01,-4.46 193 | 20150430,0.2,16.01,-0.65,-0.91 194 | 20150531,0.2,6.99,17.73,-10.28 195 | 20150630,0.18,-8.12,-2.21,8.76 196 | 20150731,0.17,-14.61,-4.17,6.71 197 | 20150831,0.16,-13.81,0.58,1.98 198 | 20150930,0.14,-4.31,3.68,2.25 199 | 20151031,0.14,14.39,7.65,-5.95 200 | 20151130,0.12,3.24,8.52,-0.04 201 | 20151231,0.13,3.17,3.8,4.21 202 | 20160131,0.13,-25.09,-5.23,5.99 203 | 20160229,0.12,-2.2,-0.2,0.83 204 | 20160331,0.13,14.52,6.77,-0.03 205 | 20160430,0.12,-2.71,1.68,1.02 206 | 20160531,0.13,-0.11,-0.71,1.58 207 | 20160630,0.12,2.21,4.52,-0.83 208 | 20160731,0.13,1.15,-1.43,5.96 209 | 20160831,0.13,3.71,0.56,-0.87 210 | 20160930,0.12,-2.54,1.39,2.08 211 | 20161031,0.13,2.48,1.39,-0.76 212 | 20161130,0.12,3.64,-1.36,1.28 213 | 20161231,0.13,-5.65,0,0.33 214 | 20170131,0.13,0.7,-3.15,2.85 215 | 20170228,0.11,2.82,1.27,0.34 216 | 20170331,0.13,-0.84,-0.55,3.36 217 | 20170430,0.12,-2.27,-3.81,4.69 218 | 20170531,0.13,-1.91,-5.16,5.68 219 | 20170630,0.12,4.14,-0.02,2.66 220 | 20170731,0.13,2.45,-1.25,4.48 221 | 20170831,0.13,2.48,-0.37,-2.28 222 | 20170930,0.12,0.62,0.93,-0.78 223 | 20171031,0.13,1.35,-3.9,3.82 224 | 20171130,0.12,-2.93,-3.13,3.78 225 | 20171231,0.13,-0.12,-1.79,2.29 226 | 20180131,0.13,3.66,-5.84,6.67 227 | 20180228,0.11,-5.19,0.37,0.19 228 | 20180331,0.13,-1.41,4.26,-6.66 229 | 20180430,0.12,-3.28,-1.25,-0.04 230 | 20180531,0.13,0.27,-1.53,4.49 231 | 20180630,0.12,-7.89,-2.86,3.71 232 | 20180731,0.13,0.9,-0.29,3.66 233 | 20180831,0.13,-5.86,-2.6,0.71 234 | 20180930,0.12,2.29,-3.01,3.09 235 | 20181031,0.13,-8.94,-1.26,2.55 236 | 20181130,0.12,0.86,4.64,-3.16 237 | 20181231,0.13,-4.16,-0.32,1.94 238 | 20190131,0.13,2.88,-4.6,6.35 239 | 20190228,0.11,15.74,5.04,-9.55 240 | 20190331,0.13,6.44,4.23,-2.81 241 | 20190430,0.12,-1.33,-2.29,3.06 242 | 20190531,0.13,-6.36,1.4,0.35 243 | 20190630,0.12,3.15,-3.22,3.98 244 | 20190731,0.13,-0.17,-3.11,0.71 245 | 20190831,0.13,-0.77,-1.55,-3.63 246 | 20190930,0.12,0.61,0.69,1.13 247 | 20191031,0.13,1.11,-2.18,1.35 248 | 20191130,0.12,-1.8,-0.81,1.83 249 | 20191231,0.13,6.86,1.11,0.21 250 | 20200131,0.13,-0.7,0.38,-3.06 251 | 20200229,0.12,-0.89,0.7,-3.72 252 | 20200331,0.13,-6.27,2.99,5.08 253 | 20200430,0.12,5.07,-4.06,0.38 254 | 20200531,0.13,0.3,1.64,0.12 255 | 20200630,0.12,8.01,-1.83,1.03 256 | 20200731,0.13,12.6,0.42,0.76 257 | 20200831,0.13,1.99,2.2,1.7 258 | 20200930,0.12,-5.79,-2.19,1.1 259 | 20201031,0.13,0.86,-1.84,1.24 260 | 20201130,0.12,3.65,-1.3,4.63 261 | 20201231,0.13,2.99,-9.23,-4.72 262 | 20210131,0.13,0.34,-9.57,3.36 263 | 20210228,0.13,-0.44,5.29,6.43 264 | 20210331,0.13,-2.93,5.88,4.87 265 | 20210430,0.13,1.77,-2.86,-4.17 266 | 20210531,0.13,5.05,0.75,-3.35 267 | 20210630,0.13,0.79,1.64,-4.6 268 | 20210731,0.13,-4.1,4.54,-3.54 269 | 20210831,0.13,2.85,4.84,6.8 270 | 20210930,0.13,-0.36,-1.35,4.94 271 | 20211031,0.13,-0.27,-3.19,-4.33 272 | 20211130,0.13,1.96,11.52,-3.8 273 | 20211231,0.13,1.01,3.67,5.07 274 | -------------------------------------------------------------------------------- /data/CH_4_fac_update_20211231.csv: -------------------------------------------------------------------------------- 1 | Monthly CH-4 factors,,,,, 2 | mnthdt: Month-end date,,,,, 3 | rf_mon: 1-year bank deposit rate converted to monthly rate,,,,, 4 | mktrf: the excess return on the market,,,,, 5 | SMB (Small-minus-big): Size factor,,,,, 6 | VMG(Value-minus-growth) Value factor,,,,, 7 | PMO (Pessimistic-minus-Optimistic) Turnover factor,,,,, 8 | Unit: %,,,,, 9 | ,,,,, 10 | mnthdt,rf_mon,mktrf,VMG,SMB,PMO 11 | 20000131,0.19,14.79,-0.84,-1.88,-10.9 12 | 20000229,0.18,11.97,-7.28,1.65,-1.27 13 | 20000331,0.19,4.32,-0.5,7.85,7.93 14 | 20000430,0.18,1.68,2.43,-0.56,2.78 15 | 20000531,0.19,2.2,2.06,1.85,-0.93 16 | 20000630,0.18,2.34,5.55,0.89,0.08 17 | 20000731,0.19,4.37,1.93,1.02,-3.93 18 | 20000831,0.19,-1.75,-1.47,2.07,2.59 19 | 20000930,0.18,-5.51,-0.6,1.88,1.45 20 | 20001031,0.19,1.57,0.2,2.73,-0.96 21 | 20001130,0.18,5.57,1.93,1.26,1.18 22 | 20001231,0.19,-0.89,1.69,1.82,0.08 23 | 20010131,0.19,-1.2,2.24,0.31,3.27 24 | 20010228,0.17,-5.78,3.59,-0.9,-1.15 25 | 20010331,0.19,6.7,-0.82,1.96,-0.76 26 | 20010430,0.18,-1.37,-1.88,1.06,1.39 27 | 20010531,0.19,2.55,-1.8,2.78,-0.37 28 | 20010630,0.18,0.15,2.01,1.52,4.14 29 | 20010731,0.19,-13.45,-0.26,-0.15,0.2 30 | 20010831,0.19,-4.06,-0.98,0.75,0.65 31 | 20010930,0.18,-4.51,3.25,-1.53,0.78 32 | 20011031,0.19,-4.77,-0.46,-1.8,-0.38 33 | 20011130,0.18,3.46,1.41,2.1,2.18 34 | 20011231,0.19,-6.08,5.34,-1.45,3.96 35 | 20020131,0.19,-9.93,4.92,-3.78,-0.4 36 | 20020228,0.17,2.14,0.39,2.15,-0.42 37 | 20020331,0.17,5.53,-2.39,2.27,1.49 38 | 20020430,0.16,3.84,-1.78,1.33,0.76 39 | 20020531,0.17,-8.64,-1.66,-1.13,-0.09 40 | 20020630,0.16,14.23,1.09,-3.3,3.11 41 | 20020731,0.17,-4.6,1.44,1.16,2.33 42 | 20020831,0.17,0.73,0.65,-0.33,0.34 43 | 20020930,0.16,-5.44,0.65,-0.93,-0.37 44 | 20021031,0.17,-5,0.39,-0.96,-0.51 45 | 20021130,0.16,-5.46,2.42,-3.51,-0.65 46 | 20021231,0.17,-5.35,0.64,-0.26,0.79 47 | 20030131,0.17,9.36,1.74,1.25,4.48 48 | 20030228,0.15,0.63,0.1,0.94,1.26 49 | 20030331,0.17,-0.4,4.15,-3.03,-0.49 50 | 20030430,0.16,0.67,7.01,-5.89,-2.88 51 | 20030531,0.17,4.14,0.46,-0.56,0.08 52 | 20030630,0.16,-6.2,2.95,-1.22,3.1 53 | 20030731,0.17,-0.61,2.6,-3.41,-0.46 54 | 20030831,0.17,-3.81,-0.86,0.95,1.12 55 | 20030930,0.16,-4.41,0.49,0.23,0.66 56 | 20031031,0.17,-1.67,9.41,-4.87,5.63 57 | 20031130,0.16,2.78,2.55,0.31,-1.98 58 | 20031231,0.17,6.16,10.36,-7.22,4.06 59 | 20040131,0.17,5.96,0.28,3.66,3.93 60 | 20040229,0.16,6.05,-3.37,3.81,1.45 61 | 20040331,0.17,3.61,2.96,2,2.15 62 | 20040430,0.16,-9.36,3.9,0.34,0.84 63 | 20040531,0.17,-2.66,-1.06,1.33,0.95 64 | 20040630,0.16,-10.2,5.07,-2.95,0.5 65 | 20040731,0.17,-0.34,3.15,-1.05,0.35 66 | 20040831,0.17,-3.56,4.86,-1.01,3.2 67 | 20040930,0.16,4.57,3.83,0.5,-1.48 68 | 20041031,0.17,-5.41,4.12,-0.81,1.84 69 | 20041130,0.18,1.18,-2.9,3.98,2.81 70 | 20041231,0.19,-6.01,5.58,-2.03,3.39 71 | 20050131,0.19,-6.16,2.38,-1.42,-1.09 72 | 20050228,0.17,9.34,1.54,1.72,-0.87 73 | 20050331,0.19,-9.55,4.71,-3.9,3 74 | 20050430,0.18,-2.25,7.21,-5.36,-2.42 75 | 20050531,0.19,-8.42,-4.68,5.65,2.91 76 | 20050630,0.18,2,4.93,-2.09,0.26 77 | 20050731,0.19,0.1,8.1,-7.7,-3.56 78 | 20050831,0.19,8.36,-4.38,8.09,1.74 79 | 20050930,0.18,-0.49,2.92,3.05,1.36 80 | 20051031,0.19,-5.75,1.81,-0.21,3.78 81 | 20051130,0.18,0.23,-0.62,1.48,2.63 82 | 20051231,0.19,6.07,4.37,-4.69,0.65 83 | 20060131,0.19,8.62,-0.42,-0.77,2.1 84 | 20060228,0.17,3.61,-2.62,-1.67,2.65 85 | 20060331,0.19,0.65,1.72,-1.02,-2.85 86 | 20060430,0.18,11.28,4.69,-1.07,-3.19 87 | 20060531,0.19,16.03,-8.13,9.7,7.51 88 | 20060630,0.18,2.59,3.75,4.76,-1.08 89 | 20060731,0.19,-6.88,-1.24,4.23,1.69 90 | 20060831,0.2,3.94,-0.16,-0.72,0.04 91 | 20060930,0.2,4.64,-0.98,-0.63,3.44 92 | 20061031,0.21,2.65,8.28,-3.59,1.71 93 | 20061130,0.2,14.63,3.99,-12.64,-1.11 94 | 20061231,0.21,13.93,6.17,-9.55,-1.84 95 | 20070131,0.21,13.68,1.46,12.3,7.74 96 | 20070228,0.19,6.51,-5.9,11.81,-8.69 97 | 20070331,0.22,11.09,-1.15,6.39,-2.22 98 | 20070430,0.23,24.58,-9.3,6.99,-12.46 99 | 20070531,0.24,9.41,0.08,-0.91,3.2 100 | 20070630,0.25,-9.22,13.35,-6.53,11.85 101 | 20070731,0.27,17.61,-3.15,6.26,0.93 102 | 20070831,0.29,15.16,1.92,-4.49,1.11 103 | 20070930,0.3,6.17,3.51,-2.35,1.07 104 | 20071031,0.32,4.83,13.28,-12.72,11.76 105 | 20071130,0.31,-15.26,3.42,10.47,0.56 106 | 20071231,0.33,11.86,-0.92,8.79,2.68 107 | 20080131,0.34,-15.7,-1.59,9.14,0.86 108 | 20080229,0.32,2.29,0.8,8.41,0.41 109 | 20080331,0.34,-19.92,-0.04,0.17,1.55 110 | 20080430,0.33,4.6,7.63,-10.48,-4.45 111 | 20080531,0.34,-6.93,-5.64,3.13,4.11 112 | 20080630,0.33,-20.76,4.09,-5.27,2.27 113 | 20080731,0.34,1.65,-1.66,8.06,0.07 114 | 20080831,0.34,-15.16,4.16,-8,6.63 115 | 20080930,0.33,-4.69,-4.16,-5.08,1.85 116 | 20081031,0.33,-25.47,4.42,-0.6,1.52 117 | 20081130,0.28,9.37,-3.91,10.8,6.31 118 | 20081231,0.2,-2.15,3.37,9.09,2.43 119 | 20090131,0.19,9.65,0.79,6.23,-6.87 120 | 20090228,0.17,4.91,-4.33,3.24,3.2 121 | 20090331,0.19,14.63,-0.92,7.26,-3.88 122 | 20090430,0.18,4.17,3.93,3.3,1.04 123 | 20090531,0.19,6.09,-2.62,1.08,3.15 124 | 20090630,0.18,12.65,2.59,-8.13,1.4 125 | 20090731,0.19,15.55,-8.08,-4.1,4.41 126 | 20090831,0.19,-21.82,5.21,8.61,-1.59 127 | 20090930,0.18,4.57,5.66,-0.69,3.09 128 | 20091031,0.19,8.29,0.42,4.41,0.86 129 | 20091130,0.18,7.31,-0.6,9.4,2.2 130 | 20091231,0.19,2.25,3.93,2.69,3.88 131 | 20100131,0.19,-8.95,-1.24,6.82,1.66 132 | 20100228,0.17,2.3,-2.5,3.82,-0.07 133 | 20100331,0.19,1.49,1.16,2.28,0.21 134 | 20100430,0.18,-7.91,0.09,1.73,5.8 135 | 20100531,0.19,-8.7,0,-0.33,-1.66 136 | 20100630,0.18,-7.4,2.99,-1.73,1.73 137 | 20100731,0.19,11.27,1.17,4.96,-0.83 138 | 20100831,0.19,1.78,-3.9,6.97,-1.73 139 | 20100930,0.18,0.64,-1.93,-1.17,-1.54 140 | 20101031,0.2,12.39,1.57,-4.73,-5.34 141 | 20101130,0.2,-4.72,0.4,3.52,10.41 142 | 20101231,0.21,-1.14,3.99,-1.68,-0.62 143 | 20110131,0.23,-2.75,4.26,-1.85,-1.02 144 | 20110228,0.22,4.84,-3.36,5.25,0.26 145 | 20110331,0.25,-0.59,1.63,-0.31,1.25 146 | 20110430,0.26,-1.51,2.1,-2.48,-0.41 147 | 20110531,0.27,-6.44,1.06,-0.96,0.29 148 | 20110630,0.26,2.27,0.36,1.23,0.59 149 | 20110731,0.29,-1.62,-0.68,2.54,2.9 150 | 20110831,0.29,-4.88,0.75,1.78,0.97 151 | 20110930,0.28,-9.22,3.68,-2.6,4.77 152 | 20111031,0.29,4.03,1.04,0,-2.33 153 | 20111130,0.28,-5.47,1.4,1.89,1.44 154 | 20111231,0.29,-7.33,7.68,-8.56,3.35 155 | 20120131,0.29,2.48,2.01,-4.26,2.17 156 | 20120229,0.27,6.97,-3.09,5.79,0.92 157 | 20120331,0.29,-7.03,1.23,-0.59,2.83 158 | 20120430,0.28,5.46,-1.68,1.58,2.84 159 | 20120531,0.29,0.06,-0.69,1,-1.63 160 | 20120630,0.27,-5.21,4.75,-1.08,0.97 161 | 20120731,0.25,-5.76,4.92,-3.51,-0.94 162 | 20120831,0.25,-2.85,-0.84,6.32,2.34 163 | 20120930,0.24,2.06,0.15,-2.36,-0.35 164 | 20121031,0.25,-1.22,0.59,1.25,2.44 165 | 20121130,0.24,-6.35,3.66,-4.89,-1.58 166 | 20121231,0.25,15.02,-0.9,1,0.6 167 | 20130131,0.25,4.93,0.73,1.65,3 168 | 20130228,0.23,0,0.06,3.19,2.75 169 | 20130331,0.25,-5.49,0.38,1.48,4.02 170 | 20130430,0.24,-2.5,1.33,-0.67,0.5 171 | 20130531,0.25,7.79,-3.83,6.1,-1.3 172 | 20130630,0.24,-13.46,3.55,-2.27,-0.31 173 | 20130731,0.25,2.65,-2.96,3.53,-3.65 174 | 20130831,0.25,4.99,0.97,3.78,2.8 175 | 20130930,0.24,4.25,-2.89,-0.42,-3.41 176 | 20131031,0.25,-2.65,5.47,0.89,2.49 177 | 20131130,0.24,4.18,-2.79,4.28,-0.45 178 | 20131231,0.25,-4.58,3.4,1.31,1.12 179 | 20140131,0.25,-2.3,-4.83,4.22,-1.32 180 | 20140228,0.23,0.53,-1.15,4.3,2.89 181 | 20140331,0.25,-2.69,4.92,0.86,6.31 182 | 20140430,0.24,-1.09,1.91,-1.64,0.29 183 | 20140531,0.25,1.06,-3.27,1.7,2.15 184 | 20140630,0.24,2.16,-2.19,2.89,0.86 185 | 20140731,0.25,7.71,1.88,0.27,2.15 186 | 20140831,0.25,1.19,-2.98,3.05,3.13 187 | 20140930,0.24,6.96,-3.32,5.73,-2.45 188 | 20141031,0.25,1.54,2.64,0.03,-0.89 189 | 20141130,0.24,8.83,4.76,-2.89,-3.7 190 | 20141231,0.23,15.25,15.17,-17.2,-20.19 191 | 20150131,0.23,0.45,-2.47,5.15,11.34 192 | 20150228,0.21,4.31,-3.06,1.76,3.07 193 | 20150331,0.21,14.92,-4.46,8.78,1.06 194 | 20150430,0.2,16.01,-0.91,-0.28,1.48 195 | 20150531,0.2,6.99,-10.28,18.41,12.75 196 | 20150630,0.18,-8.12,8.76,-3.72,0.75 197 | 20150731,0.17,-14.61,6.71,-4.88,4.02 198 | 20150831,0.16,-13.81,1.98,0.15,0.8 199 | 20150930,0.14,-4.31,2.25,3.54,0.25 200 | 20151031,0.14,14.39,-5.95,7.35,-8.57 201 | 20151130,0.12,3.24,-0.04,7.9,-3.5 202 | 20151231,0.13,3.17,4.21,3.82,3.71 203 | 20160131,0.13,-25.09,5.99,-4.81,7.92 204 | 20160229,0.12,-2.2,0.83,-0.04,1.71 205 | 20160331,0.13,14.52,-0.03,5.65,-7.05 206 | 20160430,0.12,-2.71,1.02,1.81,3.3 207 | 20160531,0.13,-0.11,1.58,-1.02,-0.41 208 | 20160630,0.12,2.21,-0.83,4.32,-4.37 209 | 20160731,0.13,1.15,5.96,-1.59,8.61 210 | 20160831,0.13,3.71,-0.87,0.89,2.12 211 | 20160930,0.12,-2.54,2.08,1.06,0.11 212 | 20161031,0.13,2.48,-0.76,1.53,0.21 213 | 20161130,0.12,3.64,1.28,-1.79,-0.52 214 | 20161231,0.13,-5.65,0.33,-0.14,-0.95 215 | 20170131,0.13,0.7,2.85,-3.34,-2.78 216 | 20170228,0.11,2.82,0.34,1,1.85 217 | 20170331,0.13,-0.84,3.36,-1.01,-0.93 218 | 20170430,0.12,-2.27,4.69,-4.12,-1.92 219 | 20170531,0.13,-1.91,5.68,-5.79,-3.06 220 | 20170630,0.12,4.14,2.66,-0.78,2.37 221 | 20170731,0.13,2.45,4.48,-2.03,0.84 222 | 20170831,0.13,2.48,-2.28,-0.15,1.82 223 | 20170930,0.12,0.62,-0.78,1.1,0.06 224 | 20171031,0.13,1.35,3.82,-4.4,4.63 225 | 20171130,0.12,-2.93,3.78,-3.88,-0.48 226 | 20171231,0.13,-0.12,2.29,-1.78,-1.26 227 | 20180131,0.13,3.66,6.67,-6.76,-1.69 228 | 20180228,0.11,-5.19,0.19,0.03,1.27 229 | 20180331,0.13,-1.41,-6.66,4.8,2.69 230 | 20180430,0.12,-3.28,-0.04,-1.21,0.46 231 | 20180531,0.13,0.27,4.49,-1.77,2.03 232 | 20180630,0.12,-7.89,3.71,-3.46,-0.4 233 | 20180731,0.13,0.9,3.66,-0.9,4.08 234 | 20180831,0.13,-5.86,0.71,-2.53,2.43 235 | 20180930,0.12,2.29,3.09,-3.24,-0.82 236 | 20181031,0.13,-8.94,2.55,-1.47,1.8 237 | 20181130,0.12,0.86,-3.16,5.06,2.2 238 | 20181231,0.13,-4.16,1.94,-0.25,2.44 239 | 20190131,0.13,2.88,6.35,-5.11,3.26 240 | 20190228,0.11,15.74,-9.55,5.69,-7.22 241 | 20190331,0.13,6.44,-2.81,4.66,0.51 242 | 20190430,0.12,-1.33,3.06,-2.45,2.6 243 | 20190531,0.13,-6.36,0.35,1.69,3.71 244 | 20190630,0.12,3.15,3.98,-3.8,0.86 245 | 20190731,0.13,-0.17,0.71,-3.08,1.18 246 | 20190831,0.13,-0.77,-3.63,-0.74,-3.34 247 | 20190930,0.12,0.61,1.13,0.53,0.49 248 | 20191031,0.13,1.11,1.35,-2.12,0.69 249 | 20191130,0.12,-1.8,1.83,-1.06,-0.3 250 | 20191231,0.13,6.86,0.21,1,-0.41 251 | 20200131,0.13,-0.7,-3.06,0.81,-5.27 252 | 20200229,0.12,-0.89,-3.72,1.12,-6.38 253 | 20200331,0.13,-6.27,5.08,2.39,7.63 254 | 20200430,0.12,5.07,0.38,-3.77,-0.57 255 | 20200531,0.13,0.3,0.12,1.67,0.06 256 | 20200630,0.12,8.01,1.03,-1.7,-2.03 257 | 20200731,0.13,12.6,0.76,0.1,-2.07 258 | 20200831,0.13,1.99,1.7,2.14,1.28 259 | 20200930,0.12,-5.79,1.1,-2.14,-1.08 260 | 20201031,0.13,0.86,1.24,-1.78,-3.52 261 | 20201130,0.12,3.65,4.63,-1.8,1.99 262 | 20201231,0.13,2.99,-4.72,-7.99,-2.76 263 | 20210131,0.13,0.34,3.36,-9.07,-2.17 264 | 20210228,0.13,-0.44,6.43,4.55,1.67 265 | 20210331,0.13,-2.93,4.87,5.35,1.15 266 | 20210430,0.13,1.77,-4.17,-2.74,2.93 267 | 20210531,0.13,5.05,-3.35,0.73,-1.4 268 | 20210630,0.13,0.79,-4.6,2.24,1.39 269 | 20210731,0.13,-4.1,-3.54,4.96,-3.96 270 | 20210831,0.13,2.85,6.8,3.92,-0.44 271 | 20210930,0.13,-0.36,4.94,-1.7,5.03 272 | 20211031,0.13,-0.27,-4.33,-2.57,6.3 273 | 20211130,0.13,1.96,-3.8,11.49,4.85 274 | 20211231,0.13,1.01,5.07,3.11,2.24 275 | -------------------------------------------------------------------------------- /pytorchtools_change.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | class EarlyStoppings: 5 | """Early stops the training if validation loss doesn't improve after a given patience.""" 6 | def __init__(self,para_list, patience=7, verbose=False, delta=0): 7 | """ 8 | Args: 9 | patience (int): How long to wait after last time validation loss improved. 10 | 上次验证集损失值改善后等待几个epoch 11 | Default: 7 12 | verbose (bool): If True, prints a message for each validation loss improvement. 13 | 如果是True,为每个验证集损失值改善打印一条信息 14 | Default: False 15 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 16 | 监测数量的最小变化,以符合改进的要求 17 | Default: 0 18 | """ 19 | self.patience = patience 20 | self.verbose = verbose 21 | self.counter = 0 22 | self.best_score = None 23 | self.early_stop = False 24 | self.val_loss_min = np.Inf 25 | self.delta = delta 26 | 27 | def __call__(self, val_loss, model,para_list): 28 | 29 | score = -val_loss 30 | 31 | if self.best_score is None: 32 | self.best_score = score 33 | self.save_checkpoint(val_loss, model,para_list) 34 | elif score < self.best_score + self.delta: 35 | self.counter += 1 36 | # print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 37 | if self.counter >= self.patience: 38 | self.early_stop = True 39 | else: 40 | self.best_score = score 41 | self.save_checkpoint(val_loss, model,para_list) 42 | self.counter = 0 43 | 44 | def save_checkpoint(self, val_loss, model,para_list): 45 | ''' 46 | Saves model when validation loss decrease. 47 | 验证损失减少时保存模型。 48 | ''' 49 | if self.verbose: 50 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 51 | torch.save(model.state_dict(),para_list+'checkpoint.pt') # 这里会存储迄今最优模型的参数 52 | # torch.save(model, 'finish_model.pkl') # 这里会存储迄今最优的模型 53 | self.val_loss_min = val_loss --------------------------------------------------------------------------------