├── 01OLS-AC.py
├── 02Lasso_AC.py
├── 03Ridge_AC.py
├── 04ENet-AC.py
├── 05PCA_AC.py
├── 06RPPCA_AC.py
├── 07RNN-AC.py
├── 08LSTM-AC.py
├── 09DFN-AC.py
├── 10result_pro.py
├── 11get_weight_result.py
├── README.md
├── data
    ├── CH_3_update_20211231.csv
    └── CH_4_fac_update_20211231.csv
└── pytorchtools_change.py


/01OLS-AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Dec  2 09:33:11 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 |  
  8 | import os
  9 | import numpy as np
 10 | import random
 11 | import pandas as pd
 12 | import warnings
 13 | random.seed(135)
 14 | warnings.filterwarnings("ignore")
 15 | 
 16 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 17 | 
 18 | #%% 主要代码
 19 | 
 20 |  
 21 | ##获取等权投资组合
 22 | def get_equal_weight(ret):
 23 |     ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0])
 24 |     ret_m.columns=['date','permno','ret']
 25 |     ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6])
 26 |     ret_m=ret_m.dropna(axis=0).astype(float)
 27 |     count=ret_m.groupby(['date'])['ret'].count().reset_index()
 28 |     retd=pd.merge(ret_m,count,on=['date'],how='left')
 29 |     
 30 |     retd['weight']=1/retd['ret_y']
 31 |     weight1=pd.pivot(retd,index='date',columns='permno',values='weight')
 32 |     weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer')
 33 |     weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer')
 34 |     weight2.columns=weight2.columns.astype(float)    
 35 | 
 36 |     return  weight2
 37 | 
 38 |  
 39 | 
 40 | def get_tc(j,theta0,rets,chs,weights,cr=0.005):
 41 |     '''
 42 |     j:从1开始，假设第一期到第二期无交易费用
 43 |     cr:表示费率，一般取0.005
 44 |     '''
 45 |     
 46 |     r=rets.iloc[j:j+1,:].values
 47 |     c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values
 48 |     wbar=weights.iloc[j:j+1,:].fillna(0).values
 49 |     w0bar=weights.iloc[j-1:j,:].fillna(0).values
 50 |     c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values
 51 |     r0=rets.iloc[j-1:j,:].values
 52 |     r00=rets.iloc[j-1:j,:].fillna(0).values
 53 |     
 54 |     wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00))
 55 |     w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 
 56 | 
 57 |     ##固定比列的交易成本
 58 |     lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T)   
 59 |     tc1=cr*np.dot(np.sign(w-wp),lc)
 60 |     
 61 |     return tc1.T
 62 | 
 63 | 
 64 | 
 65 | def power_utility(r,gamma=5):   ##越大越好
 66 |     return np.power(1+r,1-gamma)/(1-gamma)
 67 | 
 68 |  
 69 | 
 70 | #根据资产的特征和收益数据，获得特征的权重系数theta
 71 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):  ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束
 72 |     
 73 |     retsv=rets.fillna(0).values
 74 |     weightsv=weights.fillna(0).values 
 75 |     rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
 76 |     rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for  j in range(len(rets))])    
 77 |     sigmac=np.cov(rcs)
 78 |     
 79 |     cmean=np.mean(rcs,axis=1)
 80 |     cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T
 81 |     sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1)
 82 |     
 83 |     if rcs.shape[0]==1:
 84 |         theta=np.dot(1/sigmac,(cmean.reshape(len(cmean),1)/gamma-sigmabc.reshape(len(cmean),1) ))
 85 |     else: ##inv求逆函数要求矩阵是二维的      
 86 |         theta=np.dot(np.linalg.inv(sigmac),(cmean.reshape(len(cmean),1)/gamma-sigmabc.reshape(len(cmean),1) ))
 87 |     
 88 |     return theta
 89 |     
 90 |  
 91 | 
 92 | 
 93 |  
 94 | #根据特征的权重系数theta，进而获得投资组合权重w
 95 | def test(theta,rett,weightt,chs,allow_short_selling):   
 96 |     
 97 |     rets=rett
 98 |     weightsv=weightt.fillna(0).values 
 99 |  
100 |     #是否允许卖空
101 |     if  allow_short_selling==True: #允许卖空
102 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
103 |         # r= rbs+ np.dot(theta.T,rcs)
104 |     elif  allow_short_selling==False: #不允许卖空
105 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
106 |         w[w<0]=0             
107 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
108 |         w=w/wsum
109 |     
110 |     return w
111 |      
112 | 
113 | 
114 | 
115 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type):
116 |     trw=7
117 |     viw=3
118 |    
119 |     #样本集
120 |     weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合    
121 |     
122 |     rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date')
123 |     rets1.columns=['date','permno','ret']
124 |     rets1=rets1.sort_values(by=['date']).astype(float)
125 |     chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno'])  ##训练验证测试所用的特征
126 |     
127 |     retm=ret.iloc[i:i+12*(trw+viw),:]
128 |     lambdaopt=0
129 |     
130 |     theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
131 |     print(theta)
132 |     ##测试集
133 |     rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
134 |     weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
135 |     wp=test(theta,rett,weightt,chs,allow_short_selling)
136 |     wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns)
137 |     return wp
138 |  
139 | 
140 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type):
141 |     trww=7*12
142 |     viww=3*12
143 |     teww=1*12  
144 |     weight0= get_equal_weight(ret)
145 |     weight=pd.DataFrame()
146 |     for i in range(0,len(ret)-trww-viww-teww+1,12):
147 |         print(i)
148 |         w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
149 |         weight=pd.concat([weight,w],axis=0,join='outer')
150 |     weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
151 | 
152 | 
153 | 
154 | #%%#所需数据，代码调用
155 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
156 | ret.index.name='date'
157 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
158 | ch=ch.sort_values(by=['date','permno'],ascending= True)
159 | 
160 | 
161 | # %%单个特征检验的权重输出
162 | # for i in range(ch.shape[1]-2):
163 | #     para=[ch.columns[2+i]]+[0,5,0.005,True,'MV',False]
164 | #     chm=pd.concat([ch.iloc[:,:2],ch.iloc[:,i+2]],axis=1)
165 | #     get_result(ret,chm,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
166 | 
167 |  
168 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重
169 | #%%#OLS-AC的权重输出
170 | para_list=[['OLS',0,5,0.005,True,'MV',False],['OLS',0,5,0.005,False,'MV',False],['OLS',0,10,0.005,True,'MV',False]] #,
171 | for para in para_list:
172 |     print(para)
173 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
174 | 
175 |  
176 | #%%#OLS-5C的权重输出
177 | ch=ch[['date','permno','01_size', '19_mom12', '29_BM', '59_ROE', '43_AG']]
178 | para_list=[['ff5',0,5,0.005,True,'MV',False],['ff5',0,5,0.005,False,'MV',False],['ff5',0,10,0.005,True,'MV',False]] 
179 | for para in para_list:
180 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
181 | 
182 | 
183 | #%%#OLS-3C的权重输出
184 | ch1=ch[['date','permno','01_size', '19_mom12', '29_BM']]
185 | para_list=[['ff3',0,5,0.005,True,'MV',False],['ff3',0,5,0.005,False,'MV',False],['ff3',0,10,0.005,True,'MV',False]] 
186 | for para in para_list:
187 |     get_result(ret,ch1,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
188 | 
189 | 
190 |  
191 | 
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/02Lasso_AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Nov 13 22:01:15 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 | import os
  8 | import numpy as np
  9 | import random
 10 | import pandas as pd
 11 | import warnings
 12 | from sklearn.model_selection import KFold
 13 | random.seed(135)
 14 | warnings.filterwarnings("ignore")
 15 | 
 16 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 17 | #%%#等权投资组合
 18 | def get_equal_weight(ret):
 19 |     ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0])
 20 |     ret_m.columns=['date','permno','ret']
 21 |     ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6])
 22 |     ret_m=ret_m.dropna(axis=0).astype(float)
 23 |   
 24 |     count=ret_m.groupby(['date'])['ret'].count().reset_index()
 25 |     retd=pd.merge(ret_m,count,on=['date'],how='left')
 26 |     
 27 |     retd['weight']=1/retd['ret_y']
 28 |     weight1=pd.pivot(retd,index='date',columns='permno',values='weight')
 29 |     weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer')
 30 |     weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer')
 31 |     
 32 |     weight2.columns=weight2.columns.astype(float)    
 33 | 
 34 |     return  weight2
 35 | 
 36 |  
 37 | 
 38 | 
 39 | def get_tc(j,theta0,rets,chs,weights,cr=0.005):
 40 |     '''
 41 |     j:从1开始，假设第一期到第二期无交易费用
 42 |     cr:表示费率，一般取0.005
 43 |     '''
 44 |     
 45 |     r=rets.iloc[j:j+1,:].values
 46 |     c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values
 47 | 
 48 |     wbar=weights.iloc[j:j+1,:].fillna(0).values
 49 |             
 50 |     w0bar=weights.iloc[j-1:j,:].fillna(0).values
 51 |     c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values
 52 |     r0=rets.iloc[j-1:j,:].values
 53 |     r00=rets.iloc[j-1:j,:].fillna(0).values
 54 |  
 55 |     wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00))    
 56 |     w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 
 57 |     
 58 |     ##固定比列的交易成本
 59 |     lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T)
 60 |     tc1=cr*np.dot(np.sign(w-wp),lc)
 61 |     
 62 |     return tc1.T
 63 | 
 64 | 
 65 | def power_utility(r,gamma=5):   ##越大越好
 66 |     return np.power(1+r,1-gamma)/(1-gamma)
 67 | 
 68 | 
 69 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):
 70 |     
 71 |     retsv=rets.fillna(0).values
 72 |     weightsv=weights.fillna(0).values 
 73 | 
 74 |     #是否允许卖空
 75 |     w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])         
 76 |     retw=w*retsv
 77 |     r= np.sum(retw ,axis=1)   #.reshape(1,len(wsum))
 78 |  
 79 |     #是否更换效用函数  
 80 |     if utility_function=='crra':
 81 |         utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
 82 |     elif utility_function=='MV':
 83 |         sigma=np.std(r,ddof=1)
 84 |         utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
 85 |  
 86 |     if cost_type==False:
 87 |         utility=utility
 88 |     elif cost_type==True:
 89 |         wp=w*(1+retsv)
 90 |         tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1))  ##计算交易成本 
 91 |         utility=utility+cr*tc
 92 |     
 93 |     return utility
 94 |        
 95 | 
 96 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):  ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束
 97 |     
 98 |     retsv=rets.fillna(0).values
 99 |     weightsv=weights.fillna(0).values 
100 |     rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
101 |     rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for  j in range(len(rets))])    
102 |     sigmac=np.cov(rcs)
103 |     
104 |     cmean=np.mean(rcs,axis=1)
105 |     cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T
106 |     sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1)
107 |     uc=cmean
108 |     
109 |     k=len(ch.columns)-2  ##特征的个数
110 |     theta0=np.ones((k,1))*1.5
111 |     eps=10**(-8)
112 |     
113 |     t=1
114 |     beta1=0.9
115 |     beta2=0.999
116 |     alpha=0.2 ##学习率0.1
117 |     
118 |  
119 |     utility0=100
120 |     
121 |     
122 |     for u in range(100):   
123 |         print('batch',u)
124 |         
125 |         batch_size=2  ##如何设置
126 |         batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)]
127 |         random.shuffle(batch_starts)
128 |         
129 |         m0=0
130 |         v0=0
131 |         
132 |         for p in batch_starts:
133 |             print('第多少轮次梯度',t)
134 |   
135 |             if utility_function=='crra':
136 |                 gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
137 |             elif utility_function=='MV': 
138 |                 gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
139 |  
140 |                 
141 |             if cost_type==False:   #不考虑交易成本
142 |                 gra=gra  ##梯度
143 |             elif cost_type==True:  #考虑交易成本
144 |                 tc=0
145 |                 for j in range(p,p+batch_size):
146 |                     # print('成本',j)
147 |                     tc1=get_tc(j,theta0,rets,chs,weights,cr)
148 |                     tc=tc+tc1
149 |                 gra=gra+tc   ##梯度
150 |                 
151 |             m=beta1*m0 +(1-beta1)*gra
152 |             v=beta2*v0+(1-beta2)*np.dot(gra.T,gra)
153 |             
154 |             beta1t=beta1**t
155 |             beta2t=beta2**t
156 |             
157 |             mh=m/(1-beta1t)
158 |             vh=v/(1-beta2t)
159 |             
160 |             theta= theta0 -alpha*mh/(np.sqrt(vh)+eps)
161 |             
162 |             utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
163 |         
164 |  
165 |             print('utility',utility)           
166 |             if utility>utility0:
167 |                 break
168 |                          
169 |             if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5)  :
170 |                 print(np.linalg.norm(utility-utility0))
171 |                 break  
172 |             
173 |             theta0=theta
174 |             utility0=utility
175 |             t=t+1
176 |             
177 |     return theta
178 | 
179 | 
180 |  
181 | 
182 | def test(theta,rett,weightt,chs,allow_short_selling):   
183 |     
184 |     rets=rett
185 |     weightsv=weightt.fillna(0).values 
186 |  
187 |     #是否允许卖空
188 |     if  allow_short_selling==True: #允许卖空
189 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
190 | 
191 |     elif  allow_short_selling==False: #不允许卖空
192 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
193 |         w[w<0]=0             
194 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
195 |         w=w/wsum
196 |     
197 |     return w
198 |      
199 | 
200 |  
201 | 
202 | 
203 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type):
204 |     trw=7
205 |     viw=3
206 |    
207 |     #样本集
208 |     weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合    
209 |     
210 |     rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date')
211 |     rets1.columns=['date','permno','ret']
212 |     rets1=rets1.sort_values(by=['date']).astype(float)
213 |     chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno'])  ##训练验证测试所用的特征
214 |     
215 |     retm=ret.iloc[i:i+12*(trw+viw),:]
216 |     # lambdaopt=0.01
217 |     
218 |     if i==0:##首先仅考虑对第一期进行超参数调整
219 |         kf=KFold(n_splits=5)
220 |         umax=10000000  #最小效用
221 |         global lambdaopt
222 |         lambdaopt=0
223 |         lambda_list=[0.001,0.0001] # 正则化项的项的参数是待调参数，范围可以从 [0.001,0.0001]
224 |         for lambda1 in lambda_list:
225 |             print(lambda1)
226 |             
227 |             ##五折交叉验证进行优化lambda
228 |             util_list=[]
229 |             for rets_index,retv_index in kf.split(retm):
230 |                 print(rets_index.shape,retv_index.shape)
231 |                 theta=train(retm.iloc[rets_index,:],chs,weights.iloc[rets_index,:],gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
232 |                 
233 |                 util=loss(retm.iloc[retv_index,:],chs,weights.iloc[retv_index,:],theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
234 |                 util_list.append(util)
235 |  
236 |             util_mean=np.mean(util_list)
237 |  
238 |             
239 |             if util_mean<umax:
240 |                 lambdaopt=lambda1
241 |                 umax=util_mean
242 |                 
243 |         print('lambda',lambdaopt)
244 |         pd.DataFrame([lambdaopt],columns=['lambda1']).to_csv('result/parameter/RPP'+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
245 |         
246 |     theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
247 |     print(theta)
248 |     ##测试集
249 |     rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
250 |     weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
251 |     wp=test(theta,rett,weightt,chs,allow_short_selling)
252 |     wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns)
253 |     return wp
254 |  
255 | 
256 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type):
257 |     trww=7*12
258 |     viww=3*12
259 |     teww=1*12  
260 |     weight0= get_equal_weight(ret)
261 |     weight=pd.DataFrame()
262 |     for i in range(0,len(ret)-trww-viww-teww+1,12):
263 |         print(i)
264 |         w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
265 |         weight=pd.concat([weight,w],axis=0,join='outer')
266 |     weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+str(lambdaopt)+'.csv')
267 | 
268 | 
269 | #%%#所需数据，代码调用
270 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
271 | ret.index.name='date'
272 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
273 | ch=ch.sort_values(by=['date','permno'],ascending= True)
274 | 
275 | 
276 |     
277 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重
278 | para_list=[['lasso',1,5,0.005,True,'MV',False],['lasso',1,5,0.005,False,'MV',False],['lasso',1,10,0.005,True,'MV',False]]
279 | for para in para_list:
280 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
281 | 
282 | 
283 |  
284 |  
285 |  
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 


--------------------------------------------------------------------------------
/03Ridge_AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov 14 09:28:20 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 | import os
  8 | import numpy as np
  9 | import random
 10 | import pandas as pd
 11 | import warnings
 12 | from sklearn.model_selection import KFold
 13 | random.seed(135)
 14 | warnings.filterwarnings("ignore")
 15 | 
 16 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径 
 17 | #%%#等权投资组合
 18 | def get_equal_weight(ret):
 19 |     ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0])
 20 |     ret_m.columns=['date','permno','ret']
 21 |     ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6])
 22 |     ret_m=ret_m.dropna(axis=0).astype(float)
 23 |  
 24 |     
 25 |     count=ret_m.groupby(['date'])['ret'].count().reset_index()
 26 |     retd=pd.merge(ret_m,count,on=['date'],how='left')
 27 |     
 28 |     retd['weight']=1/retd['ret_y']
 29 |     weight1=pd.pivot(retd,index='date',columns='permno',values='weight')
 30 |     weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer')
 31 |     weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer')
 32 | 
 33 |     weight2.columns=weight2.columns.astype(float)    
 34 | 
 35 |     return  weight2
 36 | 
 37 |  
 38 | 
 39 | 
 40 | def get_tc(j,theta0,rets,chs,weights,cr=0.005):
 41 |     '''
 42 |     j:从1开始，假设第一期到第二期无交易费用
 43 |     cr:表示费率，一般取0.005
 44 |     '''
 45 |     
 46 |     r=rets.iloc[j:j+1,:].values
 47 |     c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values
 48 | 
 49 |     wbar=weights.iloc[j:j+1,:].fillna(0).values
 50 |             
 51 |     w0bar=weights.iloc[j-1:j,:].fillna(0).values
 52 |     c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values
 53 |     r0=rets.iloc[j-1:j,:].values
 54 |     r00=rets.iloc[j-1:j,:].fillna(0).values
 55 |  
 56 |  
 57 |     wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00))
 58 |     w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 
 59 |     
 60 |     ##固定比列的交易成本
 61 |     lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T)
 62 |     tc1=cr*np.dot(np.sign(w-wp),lc)
 63 |     
 64 |     return tc1.T
 65 | 
 66 | 
 67 | 
 68 | def power_utility(r,gamma=5):    
 69 |     return np.power(1+r,1-gamma)/(1-gamma)
 70 | 
 71 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):
 72 |     
 73 |     retsv=rets.fillna(0).values
 74 |     weightsv=weights.fillna(0).values 
 75 |  
 76 |     #是否允许卖空
 77 |     w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])         
 78 |  
 79 |     retw=w*retsv
 80 |     r= np.sum(retw ,axis=1)   #.reshape(1,len(wsum))
 81 |     
 82 |  
 83 |         
 84 |     #是否更换效用函数  
 85 |     if utility_function=='crra':
 86 |         utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
 87 |     elif utility_function=='MV':
 88 |         sigma=np.std(r,ddof=1)
 89 |         utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
 90 |  
 91 |     if cost_type==False:
 92 |         utility=utility
 93 |     elif cost_type==True:
 94 |         wp=w*(1+retsv)
 95 |         tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1))  ##计算交易成本 
 96 |         utility=utility+cr*tc
 97 |     
 98 |     return utility
 99 |        
100 | 
101 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):  ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束
102 |     
103 |     retsv=rets.fillna(0).values
104 |     weightsv=weights.fillna(0).values 
105 |     rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
106 |     rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for  j in range(len(rets))])    
107 |     sigmac=np.cov(rcs)
108 |     
109 |     cmean=np.mean(rcs,axis=1)
110 |     cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T
111 |     sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1)
112 |     uc=cmean
113 |     
114 |     k=len(ch.columns)-2  ##特征的个数
115 |     theta0=np.ones((k,1))*1.5
116 |     eps=10**(-8)
117 |     
118 |     t=1
119 |     beta1=0.9
120 |     beta2=0.999
121 |     alpha=0.2 ##学习率 
122 |     
123 |  
124 |     utility0=100
125 |     
126 |     
127 |     for u in range(100):   
128 |         print('batch',u)
129 |  
130 |         
131 |         batch_size=2  ##如何设置
132 |         
133 |         batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)]
134 |         random.shuffle(batch_starts)
135 |         
136 |         m0=0
137 |         v0=0
138 |         
139 |         for p in batch_starts:
140 |             print('第多少轮次梯度',t)
141 |             
142 |             
143 |             if utility_function=='crra':
144 |                 gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
145 |             elif utility_function=='MV': 
146 |                 gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
147 |                 
148 |             
149 |                 
150 |             if cost_type==False:   #不考虑交易成本
151 |                 gra=gra  ##梯度
152 |             elif cost_type==True:  #考虑交易成本
153 |                 tc=0
154 |                 for j in range(p,p+batch_size):
155 |                     # print('成本',j)
156 |                     tc1=get_tc(j,theta0,rets,chs,weights,cr)
157 |                     tc=tc+tc1
158 |                 gra=gra+tc   ##梯度
159 |                 
160 |             m=beta1*m0 +(1-beta1)*gra
161 |             v=beta2*v0+(1-beta2)*np.dot(gra.T,gra)
162 |             
163 |             beta1t=beta1**t
164 |             beta2t=beta2**t
165 |             
166 |             mh=m/(1-beta1t)
167 |             vh=v/(1-beta2t)
168 |             
169 |             theta= theta0 -alpha*mh/(np.sqrt(vh)+eps)
170 |             utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
171 |             print('utility',utility)
172 |   
173 |             
174 |             if utility>utility0:
175 |                 break
176 |                           
177 |             if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5)  :
178 |                 print(np.linalg.norm(utility-utility0))
179 |                 break  
180 |             
181 |             theta0=theta
182 |             utility0=utility
183 |             t=t+1
184 |             
185 |     return theta
186 | 
187 | 
188 |  
189 | 
190 | def test(theta,rett,weightt,chs,allow_short_selling):   
191 |     
192 |     rets=rett
193 |     weightsv=weightt.fillna(0).values 
194 |  
195 |     #是否允许卖空
196 |     if  allow_short_selling==True: #允许卖空
197 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
198 |     elif  allow_short_selling==False: #不允许卖空
199 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
200 |         w[w<0]=0             
201 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
202 |         w=w/wsum
203 |     
204 |     return w
205 |      
206 |  
207 | 
208 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type):
209 |     trw=7
210 |     viw=3
211 |    
212 |     #样本集
213 |     weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合    
214 |     
215 |     rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date')
216 |     rets1.columns=['date','permno','ret']
217 |     rets1=rets1.sort_values(by=['date']).astype(float)
218 |     chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno'])  ##训练验证测试所用的特征
219 |     
220 |     retm=ret.iloc[i:i+12*(trw+viw),:]
221 |     
222 |     if i==0:##首先仅考虑对第一期进行超参数调整
223 |         kf=KFold(n_splits=5)
224 |         umax=10000000  #最小效用
225 |         global lambdaopt
226 |         lambdaopt=0
227 |         lambda_list=[0.001,0.0001]# 正则化项的项的参数是待调参数，范围可以从 [0.001,0.0001]
228 |         for lambda1 in lambda_list:
229 |             print(lambda1)
230 |             
231 |             ##五折交叉验证进行优化lambda
232 |             util_list=[]
233 |             for rets_index,retv_index in kf.split(retm):
234 |                 print(rets_index.shape,retv_index.shape)
235 |                 theta=train(retm.iloc[rets_index,:],chs,weights.iloc[rets_index,:],gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
236 |                 
237 |                 util=loss(retm.iloc[retv_index,:],chs,weights.iloc[retv_index,:],theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
238 |                 util_list.append(util)
239 |  
240 |             util_mean=np.mean(util_list)
241 |             if util_mean<umax:
242 |                 lambdaopt=lambda1
243 |                 umax=util_mean
244 |                 
245 |         print('lambda',lambdaopt)
246 |         pd.DataFrame([lambdaopt],columns=['lambda1']).to_csv('result/parameter/RPP'+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
247 |         
248 |     theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
249 |     print(theta)
250 |     ##测试集
251 |     rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
252 |     weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
253 |     wp=test(theta,rett,weightt,chs,allow_short_selling)
254 |     wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns)
255 |     return wp
256 |  
257 | 
258 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type):
259 |     trww=7*12
260 |     viww=3*12
261 |     teww=1*12  
262 |     weight0= get_equal_weight(ret)
263 |     weight=pd.DataFrame()
264 |     for i in range(0,len(ret)-trww-viww-teww+1,12):
265 |         print(i)
266 |         w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
267 |         weight=pd.concat([weight,w],axis=0,join='outer')
268 |     weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+str(lambdaopt)+'.csv')
269 | 
270 | 
271 | #%%#所需数据，代码调用
272 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
273 | ret.index.name='date'
274 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
275 | ch=ch.sort_values(by=['date','permno'],ascending= True)
276 | 
277 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重
278 | para_list=[['ridge',0,5,0.005,True,'MV',False],['ridge',0,5,0.005,False,'MV',False],['ridge',0,10,0.005,True,'MV',False]]
279 | for para in para_list:
280 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
281 | 
282 |  
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 


--------------------------------------------------------------------------------
/04ENet-AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov 14 09:28:50 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 |  
  8 | import os
  9 | import numpy as np
 10 | import random
 11 | import pandas as pd
 12 | import warnings
 13 | from sklearn.model_selection import KFold
 14 |  
 15 | random.seed(135)
 16 | warnings.filterwarnings("ignore")
 17 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 18 |  
 19 | #%%#等权投资组合
 20 | 
 21 | def get_equal_weight(ret):
 22 |     ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0])
 23 |     ret_m.columns=['date','permno','ret']
 24 |     ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6])
 25 |     ret_m=ret_m.dropna(axis=0).astype(float)
 26 |  
 27 |     count=ret_m.groupby(['date'])['ret'].count().reset_index()
 28 |     retd=pd.merge(ret_m,count,on=['date'],how='left')
 29 |     
 30 |     retd['weight']=1/retd['ret_y']
 31 |     weight1=pd.pivot(retd,index='date',columns='permno',values='weight')
 32 |     weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer')
 33 |     weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer')
 34 |  
 35 |     weight2.columns=weight2.columns.astype(float)    
 36 | 
 37 |     return  weight2
 38 | 
 39 |  
 40 | 
 41 | 
 42 | def get_tc(j,theta0,rets,chs,weights,cr=0.005):
 43 |     '''
 44 |     j:从1开始，假设第一期到第二期无交易费用
 45 |     cr:表示费率，一般取0.005
 46 |     '''
 47 |     
 48 |     r=rets.iloc[j:j+1,:].values
 49 |     c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values
 50 | 
 51 |     wbar=weights.iloc[j:j+1,:].fillna(0).values
 52 |             
 53 |     w0bar=weights.iloc[j-1:j,:].fillna(0).values
 54 |     c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values
 55 |     r0=rets.iloc[j-1:j,:].values
 56 |     r00=rets.iloc[j-1:j,:].fillna(0).values
 57 |  
 58 |     wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00))
 59 |     w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 
 60 |  
 61 |     ##固定比列的交易成本
 62 |     lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T)
 63 |     tc1=cr*np.dot(np.sign(w-wp),lc)
 64 |     
 65 |     return tc1.T
 66 | 
 67 | 
 68 | 
 69 | def power_utility(r,gamma=5):   ##越大越好
 70 |     return np.power(1+r,1-gamma)/(1-gamma)
 71 | 
 72 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):
 73 |     
 74 |     retsv=rets.fillna(0).values
 75 |     weightsv=weights.fillna(0).values 
 76 |  
 77 |     #是否允许卖空
 78 |     w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
 79 |     retw=w*retsv
 80 |     r= np.sum(retw ,axis=1)   #.reshape(1,len(wsum))
 81 |     
 82 |  
 83 |         
 84 |     #是否更换效用函数  
 85 |     if utility_function=='crra':
 86 |         utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
 87 |     elif utility_function=='MV':
 88 |         sigma=np.std(r,ddof=1)
 89 |         utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
 90 |  
 91 |     if cost_type==False:
 92 |         utility=utility
 93 |     elif cost_type==True:
 94 |         wp=w*(1+retsv)
 95 |         tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1))  ##计算交易成本 
 96 |         utility=utility+cr*tc
 97 |     
 98 |     return utility
 99 |        
100 | 
101 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):  ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束
102 |     
103 |     retsv=rets.fillna(0).values
104 |     weightsv=weights.fillna(0).values 
105 |     rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
106 |     rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for  j in range(len(rets))])    
107 |     sigmac=np.cov(rcs)
108 |     
109 |     cmean=np.mean(rcs,axis=1)
110 |     cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T
111 |     sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1)
112 |     uc=cmean
113 |     
114 |     k=len(ch.columns)-2  ##特征的个数
115 |     theta0=np.ones((k,1))*1.5
116 |     eps=10**(-8)
117 |     
118 |     t=1
119 |     beta1=0.9
120 |     beta2=0.999
121 |     alpha=0.2 ##学习率0.1
122 |  
123 |  
124 |     utility0=100
125 |     
126 |     
127 |     for u in range(100):   
128 |         print('batch',u)
129 |  
130 |         batch_size=2  ##如何设置
131 |         
132 |         batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)]
133 |         random.shuffle(batch_starts)
134 |         
135 |         m0=0
136 |         v0=0
137 |         
138 |         for p in batch_starts:
139 |             print('第多少轮次梯度',t)
140 |             
141 |             
142 |             if utility_function=='crra':
143 |                 gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
144 |             elif utility_function=='MV': 
145 |                 gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
146 |                 
147 |             
148 |                 
149 |             if cost_type==False:   #不考虑交易成本
150 |                 gra=gra  ##梯度
151 |             elif cost_type==True:  #考虑交易成本
152 |                 tc=0
153 |                 for j in range(p,p+batch_size):
154 |                     # print('成本',j)
155 |                     tc1=get_tc(j,theta0,rets,chs,weights,cr)
156 |                     tc=tc+tc1
157 |                 gra=gra+tc   ##梯度
158 |                 
159 |             m=beta1*m0 +(1-beta1)*gra
160 |             v=beta2*v0+(1-beta2)*np.dot(gra.T,gra)
161 |             
162 |             beta1t=beta1**t
163 |             beta2t=beta2**t
164 |             
165 |             mh=m/(1-beta1t)
166 |             vh=v/(1-beta2t)
167 |             
168 |             theta= theta0 -alpha*mh/(np.sqrt(vh)+eps)
169 |             utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
170 |  
171 |             print('utility',utility)
172 |  
173 |             if utility>utility0:
174 |                 break
175 |                          
176 |             if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5)  :
177 |                 print(np.linalg.norm(utility-utility0))
178 |                 break  
179 |             
180 |             theta0=theta
181 |             utility0=utility
182 |             t=t+1
183 |             
184 |     return theta
185 | 
186 | 
187 |  
188 | 
189 | def test(theta,rett,weightt,chs,allow_short_selling):   
190 |     
191 |     rets=rett
192 |     weightsv=weightt.fillna(0).values 
193 |  
194 |     #是否允许卖空
195 |     if  allow_short_selling==True: #允许卖空
196 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
197 |         # r= rbs+ np.dot(theta.T,rcs)
198 |     elif  allow_short_selling==False: #不允许卖空
199 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
200 |         w[w<0]=0             
201 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
202 |         w=w/wsum
203 |     
204 |     return w
205 |      
206 |  
207 | 
208 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type):
209 |     trw=7
210 |     viw=3
211 |    
212 |     #样本集
213 |     weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合    
214 |     
215 |     rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date')
216 |     rets1.columns=['date','permno','ret']
217 |     rets1=rets1.sort_values(by=['date']).astype(float)
218 |     chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno'])  ##训练验证测试所用的特征
219 |     
220 |     retm=ret.iloc[i:i+12*(trw+viw),:]
221 |  
222 |     
223 |     if i==0:##首先仅考虑对第一期进行超参数调整
224 |         kf=KFold(n_splits=5)
225 |         umax=10000000  #最小效用
226 |         global lambdaopt
227 |         lambdaopt=0
228 |         lambda_list=[0.001,0.0001] # 正则化项的项的参数是待调参数，范围可以从 [0.001,0.0001]
229 |         for lambda1 in lambda_list:
230 |             print(lambda1)
231 |             
232 |             ##五折交叉验证进行优化lambda
233 |             util_list=[]
234 |             for rets_index,retv_index in kf.split(retm):
235 |                 print(rets_index.shape,retv_index.shape)
236 |                 theta=train(retm.iloc[rets_index,:],chs,weights.iloc[rets_index,:],gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
237 |                 
238 |                 util=loss(retm.iloc[retv_index,:],chs,weights.iloc[retv_index,:],theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
239 |                 util_list.append(util)
240 |  
241 |             util_mean=np.mean(util_list)
242 |  
243 |             if util_mean<umax:
244 |                 lambdaopt=lambda1
245 |                 umax=util_mean
246 |                 
247 |         print('lambda',lambdaopt)
248 |         pd.DataFrame([lambdaopt],columns=['lambda1']).to_csv('result/parameter/RPP'+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
249 |         
250 |     theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
251 |     print(theta)
252 |     ##测试集
253 |     rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
254 |     weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
255 |     wp=test(theta,rett,weightt,chs,allow_short_selling)
256 |     wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns)
257 |     return wp
258 |  
259 | 
260 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type):
261 |     trww=7*12
262 |     viww=3*12
263 |     teww=1*12  
264 |     weight0= get_equal_weight(ret)
265 |     weight=pd.DataFrame()
266 |     for i in range(0,len(ret)-trww-viww-teww+1,12):
267 |         print(i)
268 |         w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
269 |         weight=pd.concat([weight,w],axis=0,join='outer')
270 |     weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+str(lambdaopt)+'.csv')
271 | 
272 | 
273 | #%%#所需数据，代码调用
274 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
275 | ret.index.name='date'
276 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
277 | ch=ch.sort_values(by=['date','permno'],ascending= True)
278 | 
279 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重
280 | para_list=[['elasticnet',0.5,5,0.005,True,'MV',False],['elasticnet',0.5,5,0.005,False,'MV',False],['elasticnet',0.5,10,0.005,True,'MV',False]]
281 | for para in para_list:
282 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
283 | 
284 | 
285 |  
286 |  
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 


--------------------------------------------------------------------------------
/05PCA_AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Nov 27 22:05:40 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 | import os
  8 | import numpy as np
  9 | import random
 10 | import pandas as pd
 11 | import warnings
 12 | from sklearn.preprocessing import StandardScaler
 13 | from sklearn.decomposition import PCA
 14 | random.seed(135)
 15 | warnings.filterwarnings("ignore")
 16 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 17 |  
 18 | 
 19 | #%% 读取数据
 20 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
 21 | ret.index.name='date'
 22 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
 23 | ch=ch.sort_values(by=['date','permno'],ascending= True)
 24 | 
 25 | #%%#PCA降维
 26 | df = StandardScaler().fit_transform(ch.iloc[:,2:])
 27 | pca = PCA(n_components=0.8)  #0.8
 28 | pc=pca.fit_transform(df)  #(411424, 39)
 29 | 
 30 | #将PAC降维后的因子进行横截面归一化
 31 | pcdf=pd.DataFrame(pc)
 32 | pcdf.index=ch['permno']
 33 | pcdf=pcdf.reset_index()
 34 | pcdf.index=ch['date']
 35 | pcdf=pcdf.reset_index()
 36 | 
 37 | rr=ret.reset_index() 
 38 | rm=pd.melt(rr,id_vars=rr.columns[0])
 39 | rm.columns=['date','permno','ret']      
 40 | rm=rm.dropna()  
 41 | 
 42 | ic= rm[['date','permno']].astype(float)
 43 | col=pd.DataFrame(columns=ret.columns.astype(float))
 44 | ind=pd.DataFrame(index=ret.index.astype(float))
 45 | 
 46 | data=rm[['date','permno']].astype(float)
 47 | 
 48 | for i in range(2,pcdf.shape[1]): #
 49 |     print(i)
 50 |     fm=pcdf[['date','permno',pcdf.columns[i]]]
 51 |     fr=pd.merge(ic,fm,how='left',on=['date','permno'])
 52 |     fp=pd.pivot(fr,index='date',columns='permno')
 53 |     fp=fp.droplevel(None,axis=1)
 54 |     fp.columns=fp.columns.astype(float)
 55 |     
 56 |     f_dp=pd.concat([col,fp],axis=0,join='inner')   
 57 |     f_dp=pd.concat([col,f_dp],axis=0,join='outer')
 58 |  
 59 |     f_s=f_dp.T.apply(lambda x:  (x-x.mean())/x.std() if (x.min() !=x.max()) else x.min()-x.max() ) #将数据标准化到均值为0，方差为1 #截面上如果只有一个数据，则让他等于0
 60 |     f_f=f_s.T.reset_index()      
 61 |     fm=pd.melt(f_f,id_vars='index').astype(float)
 62 |     fm.columns=['date','permno',pcdf.columns[i]]
 63 |     data=pd.merge(data,fm,how='left',on=['date','permno'])
 64 |     
 65 | data=data.fillna(0)  ##有收益的股票，若其特征值缺失，则填充横截面均值0
 66 | data.to_csv('data/char_pca.csv') ##获得pca降维后的特征
 67 | 
 68 | 
 69 | 
 70 | 
 71 |  
 72 | 
 73 | #%%#等权投资组合
 74 | def get_equal_weight(ret):
 75 |     ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0])
 76 |     ret_m.columns=['date','permno','ret']
 77 |     ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6])
 78 |     ret_m=ret_m.dropna(axis=0).astype(float)
 79 |  
 80 |     count=ret_m.groupby(['date'])['ret'].count().reset_index()
 81 |     retd=pd.merge(ret_m,count,on=['date'],how='left')
 82 |     
 83 |     retd['weight']=1/retd['ret_y']
 84 |     weight1=pd.pivot(retd,index='date',columns='permno',values='weight')
 85 |     weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer')
 86 |     weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer')
 87 |  
 88 |     weight2.columns=weight2.columns.astype(float)    
 89 | 
 90 |     return  weight2
 91 | 
 92 |  
 93 | 
 94 | 
 95 | def get_tc(j,theta0,rets,chs,weights,cr=0.005):
 96 |     '''
 97 |     j:从1开始，假设第一期到第二期无交易费用
 98 |     cr:表示费率，一般取0.005
 99 |     '''
100 |     
101 |     r=rets.iloc[j:j+1,:].values
102 |     c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values
103 |     wbar=weights.iloc[j:j+1,:].fillna(0).values
104 |             
105 |     w0bar=weights.iloc[j-1:j,:].fillna(0).values
106 |     c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values
107 |     r0=rets.iloc[j-1:j,:].values
108 |     r00=rets.iloc[j-1:j,:].fillna(0).values
109 |  
110 |     wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00))
111 |     w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 
112 |     
113 |  
114 |     ##固定比列的交易成本
115 |     lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T)
116 |     tc1=cr*np.dot(np.sign(w-wp),lc)
117 |     
118 |     return tc1.T
119 | 
120 | 
121 | 
122 | def power_utility(r,gamma=5):   ##越大越好
123 |     return np.power(1+r,1-gamma)/(1-gamma)
124 | 
125 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):
126 |     
127 |     retsv=rets.fillna(0).values
128 |     weightsv=weights.fillna(0).values 
129 |  
130 |     #是否允许卖空
131 |     if  allow_short_selling==True: #允许卖空
132 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
133 |  
134 |     elif  allow_short_selling==False: #不允许卖空
135 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
136 |         w[w<0]=0             
137 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
138 |         w=w/wsum
139 |         
140 |     retw=w*retsv
141 |     r= np.sum(retw ,axis=1)   #.reshape(1,len(wsum))
142 |     
143 |  
144 |         
145 |     #是否更换效用函数  
146 |     if utility_function=='crra':
147 |         utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
148 |     elif utility_function=='MV':
149 |         sigma=np.std(r,ddof=1)
150 |         utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
151 |  
152 |     if cost_type==False:
153 |         utility=utility
154 |     elif cost_type==True:
155 |         wp=w*(1+retsv)
156 |         tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1))  ##计算交易成本 
157 |         utility=utility+cr*tc
158 |     
159 |     return utility
160 |        
161 | 
162 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):  ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束
163 |     
164 |     retsv=rets.fillna(0).values
165 |     weightsv=weights.fillna(0).values 
166 |     rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
167 |     rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for  j in range(len(rets))])    
168 |     sigmac=np.cov(rcs)
169 |     
170 |     cmean=np.mean(rcs,axis=1)
171 |     cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T
172 |     sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1)
173 |     uc=cmean
174 |     
175 |     k=len(ch.columns)-2  ##特征的个数
176 |     theta0=np.ones((k,1))*1.5
177 |     eps=10**(-8)
178 |     
179 |     t=1
180 |     beta1=0.9
181 |     beta2=0.999
182 |     alpha=0.2 ##学习率0.1
183 |   
184 |     utility0=100
185 |     
186 |     
187 |     for u in range(100):   
188 |         print('batch',u)
189 |         batch_size=2  ##如何设置
190 |         batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)]
191 |         random.shuffle(batch_starts)
192 |         
193 |         m0=0
194 |         v0=0
195 |         
196 |         for p in batch_starts:
197 |             print('第多少轮次梯度',t)
198 |             if utility_function=='crra':
199 |                 gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
200 |             elif utility_function=='MV': 
201 |                 gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
202 |                 
203 |             if cost_type==False:   #不考虑交易成本
204 |                 gra=gra  ##梯度
205 |             elif cost_type==True:  #考虑交易成本
206 |                 tc=0
207 |                 for j in range(p,p+batch_size):
208 |                     # print('成本',j)
209 |                     tc1=get_tc(j,theta0,rets,chs,weights,cr)
210 |                     tc=tc+tc1
211 |                 gra=gra+tc   ##梯度
212 |                 
213 |             m=beta1*m0 +(1-beta1)*gra
214 |             v=beta2*v0+(1-beta2)*np.dot(gra.T,gra)
215 |             
216 |             beta1t=beta1**t
217 |             beta2t=beta2**t
218 |             
219 |             mh=m/(1-beta1t)
220 |             vh=v/(1-beta2t)
221 |             
222 |             theta= theta0 -alpha*mh/(np.sqrt(vh)+eps)
223 |             
224 |             utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
225 |             print('utility',utility)
226 |  
227 |             if utility>utility0:
228 |                 break
229 |                          
230 |             if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5)  :
231 |                 print(np.linalg.norm(utility-utility0))
232 |                 break  
233 |             
234 |             theta0=theta
235 |             utility0=utility
236 |             t=t+1
237 |             
238 |     return theta
239 | 
240 | 
241 |  
242 | 
243 | def test(theta,rett,weightt,chs,allow_short_selling):   
244 |     
245 |     rets=rett
246 |     weightsv=weightt.fillna(0).values 
247 |  
248 |     #是否允许卖空
249 |     if  allow_short_selling==True: #允许卖空
250 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
251 |  
252 |     elif  allow_short_selling==False: #不允许卖空
253 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
254 |         w[w<0]=0             
255 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
256 |         w=w/wsum
257 |     
258 |     return w
259 |      
260 | 
261 |  
262 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type):
263 |     trw=7
264 |     viw=3
265 |    
266 |     #样本集
267 |     weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合    
268 |     
269 |     rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date')
270 |     rets1.columns=['date','permno','ret']
271 |     rets1=rets1.sort_values(by=['date']).astype(float)
272 |     chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno'])  ##训练验证测试所用的特征
273 |     
274 |     retm=ret.iloc[i:i+12*(trw+viw),:]
275 |     lambdaopt=0
276 |  
277 |     theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
278 |     print(theta)
279 |     ##测试集
280 |     rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
281 |     weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
282 |     wp=test(theta,rett,weightt,chs,allow_short_selling)
283 |     wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns)
284 |     return wp
285 |  
286 | 
287 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type):
288 |     trww=7*12
289 |     viww=3*12
290 |     teww=1*12  
291 |     weight0= get_equal_weight(ret)
292 |     weight=pd.DataFrame()
293 |     for i in range(0,len(ret)-trww-viww-teww+1,12):
294 |         print(i)
295 |         w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
296 |         weight=pd.concat([weight,w],axis=0,join='outer')
297 |     weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
298 | 
299 | 
300 | #%%#所需数据，代码调用
301 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
302 | ret.index.name='date'
303 | ch=pd.read_csv('data/char_pca.csv',index_col=0).astype(float) #读取特征
304 | ch=ch.sort_values(by=['date','permno'],ascending= True)
305 |  
306 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重
307 | para_list=[['PCA',0,5,0.005,True,'MV',False],['PCA',0,5,0.005,False,'MV',False],['PCA',0,10,0.005,True,'MV',False] ]
308 | for para in para_list:
309 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
310 | 
311 |  
312 | 
313 |  


--------------------------------------------------------------------------------
/06RPPCA_AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Nov 13 15:56:43 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 |  
  8 | import os 
  9 | import numpy as np
 10 | import random
 11 | import pandas as pd
 12 | import warnings
 13 | random.seed(135)
 14 | warnings.filterwarnings("ignore")
 15 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 16 | 
 17 | #%% 读取数据
 18 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
 19 | ret.index.name='date'
 20 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
 21 | ch=ch.sort_values(by=['date','permno'],ascending= True)
 22 | 
 23 | #%%#RPPCA降维
 24 | gamma=20
 25 | num=39  ##pca的方差大于0.8，对应的方差个数39  
 26 | def RP_PCA(chara,gamma,num):
 27 |     '''
 28 |     计算方式参考：Factors That Fit the Time Series and Cross-Section of Stock Returns
 29 |     gamma:用来控制RP_PCA中对于一阶的均值考虑的比重
 30 |     num:用来控制主成分的个数 
 31 |     chara:T*N,N为特征的个数，T为样本数
 32 |     返回降维后的特征
 33 |     '''
 34 |     T,N=chara.shape
 35 |     chmean=chara.mean()
 36 |     sigma=np.dot(chara.T,chara)/T+gamma* np.dot(np.array(chmean).reshape(N,1),np.array(chmean).reshape(1,N))
 37 | 
 38 |     def eigen(A):
 39 |         '''
 40 |         用来计算矩阵的特征分解、并根绝特征值大小对特征向量进行排序
 41 |         '''
 42 |         eigenValues, eigenVectors = np.linalg.eig(A)
 43 |         idx = eigenValues.argsort()[::-1]  
 44 |         eigenValues = eigenValues[idx]
 45 |         eigenVectors = eigenVectors[:,idx]
 46 |         return (eigenValues, eigenVectors)
 47 |     
 48 |     vals,vecs =eigen(sigma)
 49 |     gam=vecs[:,:num]
 50 |     factor=np.dot(chara,gam).dot(np.linalg.inv(np.dot(gam.T,gam)))
 51 |     return factor
 52 |  
 53 | 
 54 | chara=ch.iloc[:,2:]
 55 | pc=RP_PCA(chara,gamma,num)
 56 | 
 57 | #将PAC降维后的因子进行横截面归一化
 58 | pcdf=pd.DataFrame(pc,index=ch['permno'])
 59 | pcdf=pcdf.reset_index()
 60 | pcdf.index=ch['date']
 61 | pcdf=pcdf.reset_index()
 62 | 
 63 | rr=ret.reset_index() 
 64 | rm=pd.melt(rr,id_vars=rr.columns[0])
 65 | rm.columns=['date','permno','ret']      
 66 | rm=rm.dropna()  
 67 | 
 68 | ic= rm[['date','permno']].astype(float)
 69 | col=pd.DataFrame(columns=ret.columns.astype(float))
 70 | ind=pd.DataFrame(index=ret.index.astype(float))
 71 | 
 72 | data=rm[['date','permno']].astype(float)
 73 | 
 74 | for i in range(2,pcdf.shape[1]): #
 75 |     print(i)
 76 |     fm=pcdf[['date','permno',pcdf.columns[i]]]
 77 |     fr=pd.merge(ic,fm,how='left',on=['date','permno'])
 78 |     fp=pd.pivot(fr,index='date',columns='permno')
 79 |     fp=fp.droplevel(None,axis=1)
 80 |     fp.columns=fp.columns.astype(float)
 81 |     
 82 |     f_dp=pd.concat([col,fp],axis=0,join='inner')   
 83 |     f_dp=pd.concat([col,f_dp],axis=0,join='outer')
 84 |  
 85 | 
 86 |     f_s=f_dp.T.apply(lambda x:  (x-x.mean())/x.std() if (x.min() !=x.max()) else x.min()-x.max() ) #将数据标准化到均值为0，方差为1 #截面上如果只有一个数据，则让他等于0
 87 |     f_f=f_s.T.reset_index()   ##缺失值不填充，填充会降低权重    
 88 |     fm=pd.melt(f_f,id_vars='index').astype(float)
 89 |     fm.columns=['date','permno',pcdf.columns[i]]
 90 |  
 91 |     data=pd.merge(data,fm,how='left',on=['date','permno'])
 92 |     
 93 | data=data.fillna(0)  ##有收益的股票，若其特征值缺失，则填充横截面均值0
 94 | data.to_csv('data/char_rppca.csv')
 95 | 
 96 | 
 97 | 
 98 |  
 99 | ##等权投资组合
100 | def get_equal_weight(ret):
101 |     ret_m=pd.melt(ret.reset_index(),id_vars=ret.reset_index().columns[0])
102 |     ret_m.columns=['date','permno','ret']
103 |     ret_m['month']=ret_m['date'].apply(lambda x:str(x)[:6])
104 |     ret_m=ret_m.dropna(axis=0).astype(float)
105 |  
106 |     count=ret_m.groupby(['date'])['ret'].count().reset_index()
107 |     retd=pd.merge(ret_m,count,on=['date'],how='left')
108 |     
109 |     retd['weight']=1/retd['ret_y']
110 |     weight1=pd.pivot(retd,index='date',columns='permno',values='weight')
111 |     weight2=pd.concat([pd.DataFrame(columns=ret.columns.astype(float)),weight1],axis=0,join='outer')
112 |     weight2=pd.concat([pd.DataFrame(index=ret.index),weight2],axis=1,join='outer')
113 |     weight2.columns=weight2.columns.astype(float)    
114 | 
115 |     return  weight2
116 | 
117 |  
118 | def get_tc(j,theta0,rets,chs,weights,cr=0.005):
119 |     '''
120 |     j:从1开始，假设第一期到第二期无交易费用
121 |     cr:表示费率，一般取0.005
122 |     '''
123 |     
124 |     r=rets.iloc[j:j+1,:].values
125 |     c=chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values
126 | 
127 |     wbar=weights.iloc[j:j+1,:].fillna(0).values
128 |             
129 |     w0bar=weights.iloc[j-1:j,:].fillna(0).values
130 |     c0=chs[chs.date==rets.index[j-1]].sort_values('permno').iloc[:,2:].fillna(0).values
131 |     r0=rets.iloc[j-1:j,:].values
132 |     r00=rets.iloc[j-1:j,:].fillna(0).values
133 |  
134 |     wp=np.multiply(w0bar + np.dot(theta0.T,c0.T)/np.sum(~np.isnan(r0),axis=1),(1+r00)) 
135 |     w=wbar + np.dot(theta0.T,c.T)/np.sum(~np.isnan(r),axis=1) 
136 |  
137 |     ##固定比列的交易成本
138 |     lc= c/np.sum(~np.isnan(r),axis=1) - np.multiply(c0/np.sum(~np.isnan(r0),axis=1), 1+np.repeat(r00,c0.shape[1],axis=0).T)
139 |     tc1=cr*np.dot(np.sign(w-wp),lc)
140 |     
141 |     return tc1.T
142 | 
143 | 
144 | 
145 | def power_utility(r,gamma=5):   ##越大越好
146 |     return np.power(1+r,1-gamma)/(1-gamma)
147 | 
148 | def loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):
149 |     
150 |     retsv=rets.fillna(0).values
151 |     weightsv=weights.fillna(0).values 
152 |  
153 |     #是否允许卖空
154 |     if  allow_short_selling==True: #允许卖空
155 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
156 |  
157 |     elif  allow_short_selling==False: #不允许卖空
158 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
159 |         w[w<0]=0             
160 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
161 |         w=w/wsum
162 |         
163 |     retw=w*retsv
164 |     r= np.sum(retw ,axis=1)   #.reshape(1,len(wsum))
165 |     
166 |  
167 |         
168 |     #是否更换效用函数  
169 |     if utility_function=='crra':
170 |         utility=-np.mean(power_utility(r,gamma))+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
171 |     elif utility_function=='MV':
172 |         sigma=np.std(r,ddof=1)
173 |         utility= gamma/2*sigma-np.mean(r)+lambda1*rho*np.linalg.norm(theta,1)+lambda1*(1-rho)/2*np.linalg.norm(theta,2)
174 |  
175 |     if cost_type==False:
176 |         utility=utility
177 |     elif cost_type==True:
178 |         wp=w*(1+retsv)
179 |         tc=np.mean(np.sum(np.abs(w[1:,:]-wp[:-1,:]),axis=1))  ##计算交易成本 
180 |         utility=utility+cr*tc
181 |     
182 |     return utility
183 |        
184 | 
185 | def train(rets,chs,weights,gamma,lambda1,rho,cr, allow_short_selling,utility_function,cost_type):  ##rho=1,L1; rho=0,L2 ; cr:cost rate费率 ; allow_short_selling默认无卖空约束
186 |     
187 |     retsv=rets.fillna(0).values
188 |     weightsv=weights.fillna(0).values 
189 |     rcs=np.hstack([np.dot(retsv[j:j+1,:],chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values).T/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
190 |     rbs=np.hstack([np.dot(retsv[j:j+1,:],weightsv[j:j+1,:].T) for  j in range(len(rets))])    
191 |     sigmac=np.cov(rcs)
192 |     
193 |     cmean=np.mean(rcs,axis=1)
194 |     cmeanm=np.vstack([cmean for x in range(rcs.shape[1])]).T
195 |     sigmabc=np.dot(rbs-np.mean(rbs), (rcs-cmeanm).T)/(rcs.shape[1]-1)
196 |     uc=cmean
197 |     
198 |     k=len(ch.columns)-2  ##特征的个数
199 |     theta0=np.ones((k,1))*1.5
200 |     eps=10**(-8)
201 |     
202 |     t=1
203 |     beta1=0.9
204 |     beta2=0.999
205 |     alpha=0.2 ##学习率0.1
206 |     
207 |  
208 |     utility0=100
209 |     
210 |     
211 |     for u in range(100):   
212 |         print('batch',u)
213 |          
214 |         batch_size=2  ##如何设置
215 |         
216 |         batch_starts=[start for start in range(1,len(rets)-batch_size,batch_size)]
217 |         random.shuffle(batch_starts)
218 |         
219 |         m0=0
220 |         v0=0
221 |         
222 |         for p in batch_starts:
223 |             print('第多少轮次梯度',t)
224 |             
225 |             
226 |             if utility_function=='crra':
227 |                 gra=-np.power(1+rbs+ np.dot(theta0.T,rcs),-gamma).dot(rcs.T).T + lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
228 |             elif utility_function=='MV': 
229 |                 gra=gamma * np.dot(sigmac,theta0) + gamma * sigmabc.T -uc.reshape(len(uc),1)+ lambda1*rho*np.sign(theta0)+lambda1*(1-rho)*theta0   ##梯度
230 |                 
231 |             
232 |                 
233 |             if cost_type==False:   #不考虑交易成本
234 |                 gra=gra  ##梯度
235 |             elif cost_type==True:  #考虑交易成本
236 |                 tc=0
237 |                 for j in range(p,p+batch_size):
238 |                     # print('成本',j)
239 |                     tc1=get_tc(j,theta0,rets,chs,weights,cr)
240 |                     tc=tc+tc1
241 |                 gra=gra+tc   ##梯度
242 |                 
243 |             m=beta1*m0 +(1-beta1)*gra
244 |             v=beta2*v0+(1-beta2)*np.dot(gra.T,gra)
245 |             
246 |             beta1t=beta1**t
247 |             beta2t=beta2**t
248 |             
249 |             mh=m/(1-beta1t)
250 |             vh=v/(1-beta2t)
251 |             
252 |             theta= theta0 -alpha*mh/(np.sqrt(vh)+eps)
253 |             
254 |             utility=loss(rets,chs,weights,theta,gamma,lambda1,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
255 |             print('utility',utility)
256 | 
257 |             if utility>utility0:
258 |                 break
259 | 
260 |             if np.linalg.norm(theta-theta0) <= 10**(-5) or np.linalg.norm(utility-utility0)<= 10**(-5)  :
261 |                 print(np.linalg.norm(utility-utility0))
262 |                 break  
263 |             
264 |             theta0=theta
265 |             utility0=utility
266 |             t=t+1
267 |             
268 |     return theta
269 | 
270 | 
271 |  
272 | 
273 | def test(theta,rett,weightt,chs,allow_short_selling):   
274 |     
275 |     rets=rett
276 |     weightsv=weightt.fillna(0).values 
277 |  
278 |     #是否允许卖空
279 |     if  allow_short_selling==True: #允许卖空
280 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
281 |  
282 |     elif  allow_short_selling==False: #不允许卖空
283 |         w=weightsv + np.vstack([np.dot(theta.T,chs[chs.date==rets.index[j]].sort_values('permno').iloc[:,2:].fillna(0).values.T)/np.sum(~np.isnan(rets.iloc[j:j+1,:]),axis=1).values  for  j in range(len(rets)) ])     
284 |         w[w<0]=0             
285 |         wsum=np.sum(w,axis=1).reshape(w.shape[0],1).repeat(w.shape[1],axis=1)
286 |         w=w/wsum
287 |     
288 |     return w
289 |      
290 |  
291 | 
292 | def get_weights(i,ret,ch,weight0,gamma,rho,cr,allow_short_selling,utility_function,cost_type):
293 |     trw=7
294 |     viw=3
295 |    
296 |     #样本集
297 |     weights=weight0.iloc[i:i+12*(trw+viw),:] ##市值加权的投资组合    
298 |     
299 |     rets1=pd.melt(ret.iloc[i:i+12*(trw+viw+1),:].reset_index(),id_vars='date')
300 |     rets1.columns=['date','permno','ret']
301 |     rets1=rets1.sort_values(by=['date']).astype(float)
302 |     chs=pd.merge(rets1[['date','permno']],ch,how='left',on=['date','permno'])  ##训练验证测试所用的特征
303 |     
304 |     retm=ret.iloc[i:i+12*(trw+viw),:]
305 |     lambdaopt=0
306 |   
307 |     theta=train(ret.iloc[i:i+12*(trw+viw),:],chs,weight0.iloc[i:i+12*(trw+viw),:],gamma,lambdaopt,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
308 |     print(theta)
309 |     ##测试集
310 |     rett=ret.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
311 |     weightt=weight0.iloc[i+12*(trw+viw):i+12*(trw+viw+1),:]    
312 |     wp=test(theta,rett,weightt,chs,allow_short_selling)
313 |     wp=pd.DataFrame(wp,index=weightt.index,columns=weightt.columns)
314 |     return wp
315 |  
316 | 
317 | def get_result(ret,ch,methodname,gamma,rho,cr, allow_short_selling,utility_function,cost_type):
318 |     trww=7*12
319 |     viww=3*12
320 |     teww=1*12  
321 |     weight0= get_equal_weight(ret)
322 |     weight=pd.DataFrame()
323 |     for i in range(0,len(ret)-trww-viww-teww+1,12):
324 |         print(i)
325 |         w=get_weights(i,ret,ch,weight0,gamma,rho,cr, allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)  
326 |         weight=pd.concat([weight,w],axis=0,join='outer')
327 |     weight.to_csv('result/middle/weight_all/weight_'+methodname+'_'+str(rho)+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
328 | 
329 | 
330 | #%%#所需数据，代码调用
331 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
332 | ret.index.name='date'
333 | ch=pd.read_csv('data/char_rppca.csv',index_col=0).astype(float) #读取特征
334 | ch=ch.sort_values(by=['date','permno'],ascending= True)
335 |  
336 | #1.主结果的投资组合权重 2.卖空约束下的投资组合权重 3. 风险厌恶系数等于10的投资组合权重
337 | para_list=[['RPPCA',0,5,0.005,True,'MV',False],['RPPCA',0,5,0.005,False,'MV',False],['RPPCA',0,10,0.005,True,'MV',False] ]  #
338 | for para in para_list:
339 |     get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
340 | 
341 |  
342 | 
343 | 
344 | 
345 | # para=['PCA',0,5,0.005,True,'crra',True]
346 | # get_result(ret,ch,methodname=para[0],rho=para[1],gamma=para[2],cr=para[3], allow_short_selling=para[4],utility_function=para[5],cost_type=para[6])
347 | 
348 | # methodname=para[0]
349 | # rho=para[1]
350 | # gamma=para[2]
351 | # cr=para[3]
352 | # allow_short_selling=para[4]
353 | # utility_function=para[5]
354 | # cost_type=para[6]
355 | 
356 | 
357 | 
358 | # para=['PCA',0,0.005,False,'crra',False]  
359 | # get_result(rho=para[0],cr=para[1], allow_short_selling=para[2],utility_function=para[3],cost_type=para[4])
360 | 
361 | # para=['PCA',0,0.005,True,'MV',False]  
362 | # get_result(rho=para[0],cr=para[1], allow_short_selling=para[2],utility_function=para[3],cost_type=para[4])
363 | 
364 | # para=['PCA',0,0.005,True,'crra',True]  
365 | # get_result(rho=para[0],cr=para[1], allow_short_selling=para[2],utility_function=para[3],cost_type=para[4])
366 | 
367 |  
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 


--------------------------------------------------------------------------------
/07RNN-AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov  7 15:26:45 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 | ##RNN预测神经网络
  8 | 
  9 | # from TCNmodel import TCN
 10 | import torch
 11 | from torch import nn
 12 | from torch.nn import init
 13 | import numpy as np
 14 | import torch.utils.data as Data
 15 | import random
 16 | import pandas as pd
 17 | import os
 18 | import warnings
 19 | from pytorchtools_change import EarlyStoppings  ##点开pytorchtools，复制里面的代码，即可新建pytorchtools
 20 | warnings.filterwarnings("ignore")
 21 | seed = 100
 22 | random.seed(seed)  # random
 23 | np.random.seed(seed)  # numpy
 24 | torch.manual_seed(seed)  # torch+CPU
 25 | torch.cuda.manual_seed(seed)  # torch+GPU
 26 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 27 | 
 28 | 
 29 | #%%读取数据
 30 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 
 31 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
 32 | 
 33 | 
 34 | def get_data(ret,ch):
 35 |     '''
 36 |     对收益、特征数据进行调整
 37 |     '''
 38 |     ret.index.name='date'
 39 |     
 40 |     ##为了保证每一期的股票数量的位置相同，生成全部的date+permno
 41 |     new=pd.melt(ret.fillna(0).reset_index(),id_vars='date').astype(float)
 42 |     new.columns=['date','permno','ret']
 43 |     new=new[['date','permno']]
 44 |     new=new.sort_values(by=['date','permno'])    
 45 |    
 46 |     ret0=pd.melt(ret.reset_index(),id_vars='date').astype(float)
 47 |     ret0.columns=['date','permno','ret']
 48 |     
 49 |         
 50 |     ch=ch.fillna(0)    
 51 |         
 52 |     retch=pd.merge(new,ch,how='left',on=['date','permno']).fillna(0)
 53 |     retch=pd.merge(retch,ret0,how='left',on=['date','permno'])
 54 | 
 55 |     data_list=[]
 56 |     for i in ret.index:
 57 |         data_list.append(retch[retch.date==i])    
 58 |         
 59 |     return data_list
 60 | 
 61 | data_list=get_data(ret,ch)
 62 |  
 63 | 
 64 |  
 65 | 
 66 | 
 67 | # #获得数据迭代器   
 68 | def load_batch(data_arrays, batch_size, N, is_train=True):
 69 |     '''
 70 |     自定义生成训练集中的batch数据集，为了保持横截面的样本顺序
 71 |     '''
 72 |     batch_starts=[start for start in range(0, int(data_arrays[0].shape[0]+1-batch_size),batch_size)]
 73 |     
 74 |     if is_train:
 75 |         random.shuffle(batch_starts)
 76 |     
 77 |     cr_list=[]
 78 | 
 79 |     for j in batch_starts :
 80 |         cr_list.append([data_arrays[0][j:(j+batch_size)],data_arrays[1][j:(j+batch_size)]])
 81 |         
 82 |     if np.max(batch_starts)+batch_size < data_arrays[0].shape[0]:
 83 |         cr_list.append([data_arrays[0][(np.max(batch_starts)+batch_size):],data_arrays[1][(np.max(batch_starts)+batch_size):]])
 84 |     
 85 |     return cr_list
 86 | 
 87 | 
 88 | 
 89 | class RNN(nn.Module):
 90 |     def __init__(self, input_size, hidden_size,output_size,num_layers,dropout):
 91 |         super(RNN, self).__init__()
 92 |         
 93 |         # 这里构建LSTM 还可以构建RNN、GRU等方法类似
 94 |         self.rnn1 = nn.RNN(
 95 |             input_size=input_size, 
 96 |             hidden_size=32,
 97 |             num_layers=1,
 98 |             # dropout=dropout,
 99 |             batch_first=True  # 如果为True，输入输出数据格式是(batch, seq_len, feature)
100 |             # 为False，输入输出数据格式是(seq_len, batch, feature)，
101 |         )
102 |         
103 |         self.rnn2 = nn.RNN(
104 |             input_size=32, 
105 |             hidden_size=16,
106 |             num_layers=1,
107 |             # dropout=dropout,
108 |             batch_first=True  # 如果为True，输入输出数据格式是(batch, seq_len, feature)
109 |             # 为False，输入输出数据格式是(seq_len, batch, feature)，
110 |         )
111 |         self.fc = nn.Linear(16,output_size,bias=False) #拼接隐藏层
112 |  
113 |     def forward(self, x):
114 |         r_out, _  = self.rnn1(x)
115 |         r_out, _  = self.rnn2(r_out)
116 |         out = self.fc(r_out) # 全连接层进行分类
117 |         return out
118 | 
119 | 
120 | def loss(net,w_hat,r,gamma,N,allow_short_selling=True,utility_function='crra',cost_type=False):
121 |     '''
122 |     what:上一层的输出，为权重的一部分，等价于y_pred
123 |     r:对应的收益数据
124 |     '''
125 |  
126 |     weigh0=1/(~torch.isnan(r)).sum(axis=1)   ##计算等权投资组合的权重
127 |     w_hat_mul=torch.where(torch.isnan(r),torch.full_like(r, np.nan), w_hat)
128 |     w_hatm=torch.where(torch.isnan(r),torch.full_like(r, 0), w_hat)
129 |     
130 |     ws=torch.sum(w_hatm,axis=1)
131 |     num=torch.sum(~torch.isnan(r),axis=1)
132 |     wn=ws/num
133 |     
134 |       
135 |     w0=torch.empty(w_hat.shape) 
136 |     wh=torch.empty(w_hat.shape) 
137 |     
138 |     for j in range(w_hat.shape[0]):
139 |         w0[j]=torch.where(torch.isnan(r[j]),torch.full_like(r[j], 0), weigh0[j])
140 |         wh[j]=(w_hat_mul[j]-wn[j])/num[j]
141 | 
142 | 
143 |     wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh)
144 |     w=w0+wh
145 |     if allow_short_selling==True:  ##没有权重约束
146 |         w=w
147 |     elif allow_short_selling==False: ##卖空约束
148 |         w=w.clamp(0,1)
149 |     w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape)   ##重新要求权重之和等于1   
150 | 
151 |     r0=torch.where(torch.isnan(r),torch.full_like(r, 0), r)
152 |     retw=r0.mul(w)
153 |  
154 |     mret=retw.sum(axis=1)
155 |     if utility_function=='crra':   #指数效用函数
156 |         utility=-torch.pow(1+mret,1-gamma)/(1-gamma)
157 |         utilitymean=torch.mean(utility)
158 |     elif utility_function=='MV':  #均值方差效用函数
159 |         sigma=torch.var(mret)
160 |         utilitymean=sigma.mul(gamma/2)-torch.mean(mret)
161 |         
162 |     tc=torch.mean(torch.sum(torch.abs(w[1:,:,:]-retw[:-1,:,:]),axis=1),axis=0) ##计算交易成本       
163 |     if cost_type==False:
164 |         loss=utilitymean
165 |     elif cost_type==True:
166 |         loss=utilitymean+0.005*tc
167 |  
168 | 
169 |     return loss
170 | 
171 | 
172 | #记录列表（list），存储训练集和测试集上经过每一轮次，loss的变化
173 | def train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size,dropout,
174 |                                                num_epochs,batch_size,params=None,lr=None,optimizer=None,allow_short_selling=True,utility_function='crra',cost_type=False):
175 |     train_loss=[]
176 |     valid_loss=[]
177 |     
178 |     p_list='RNNPP'+str(allow_short_selling)+utility_function+str(cost_type)
179 |     early_stopping = EarlyStoppings(para_list=p_list,patience=7, verbose=True)
180 |     
181 |     for epoch in range(num_epochs):#外循环控制循环轮次          
182 |         for c,r in train_iter:#内循环控制训练批次
183 |  
184 |             w_hat = net(c.to(torch.float32)) 
185 |             l = loss(net,w_hat,r.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
186 | 
187 |             #梯度清零
188 |             if optimizer is not None:
189 |                 optimizer.zero_grad()
190 |             elif params is not None and params[0].grad is not None:
191 |                 for param in params:
192 |                     param.grad.data.zero_()
193 |             
194 |                     
195 |             #梯度回传       
196 |             l.backward()
197 |         
198 |             if optimizer is None:
199 |                 torch.optim.SGD(net.parameters(),lr,batch_size)
200 |                 # SGD(params,lr,batch_size)
201 |             else:
202 |                 optimizer.step()       
203 | 
204 |         train_loss.append((loss(net,net(tc.to(torch.float32)),tr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())#loss本身就默认了取平均值！
205 |         valid_loss.append((loss(net,net(vc.to(torch.float32)),vr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())
206 |         
207 |         print("epoch %d,train_loss %.6f,valid_loss %.6f"%(epoch+1,train_loss[epoch],valid_loss[epoch])) 
208 | 
209 |         valid_lossave = np.average(valid_loss)
210 |         
211 |         early_stopping(valid_loss[epoch], net,para_list=p_list)
212 |         
213 |         if early_stopping.early_stop:
214 |             print("Early stopping")
215 |             break
216 |  
217 |     # load the last checkpoint with the best model
218 |     net.load_state_dict(torch.load(p_list+'checkpoint.pt'))
219 |     
220 |     return net, train_loss, valid_loss
221 | 
222 | 
223 | 
224 | 
225 | def test_model(netopt,ec,er,N,allow_short_selling=True):
226 |     w_hat=netopt(ec.to(torch.float32))
227 |     weigh0=1/(~torch.isnan(er)).sum(axis=1)   ##计算等权投资组合的权重
228 |   
229 |     w_hat_mul=torch.where(torch.isnan(er),torch.full_like(er, np.nan), w_hat)
230 |     w_hatm=torch.where(torch.isnan(er),torch.full_like(er, 0), w_hat)
231 |     
232 |     ws=torch.sum(w_hatm,axis=1)
233 |     num=torch.sum(~torch.isnan(er),axis=1)
234 |     wn=ws/num
235 |  
236 |     w0=torch.empty(w_hat.shape) 
237 |     wh=torch.empty(w_hat.shape) 
238 |     
239 |     for j in range(w_hat.shape[0]):
240 |         w0[j]=torch.where(torch.isnan(er[j]),torch.full_like(er[j], 0), weigh0[j])
241 |         wh[j]= (w_hat_mul[j]-wn[j])/num[j]   #times
242 |     
243 | 
244 |     wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh)    
245 |     w=w0+wh
246 |  
247 |     
248 |     if allow_short_selling==True:  ##没有权重约束
249 |         w=w
250 |     elif allow_short_selling==False: ##卖空约束
251 |         w=w.clamp(0,1)
252 |     w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape)   ##重新要求权重之和等于1   
253 |     return w
254 |   
255 |  
256 |     
257 | def get_weights(i,data_list,ret,gamma,dropout,allow_short_selling,utility_function,cost_type):
258 |     
259 |     '''
260 |     i：以12为倍数
261 |     '''
262 |  
263 |     lr=0.01
264 |     batch_size = 10# 设置小批量大小  
265 |     num_epochs = 100  #100
266 | 
267 |     trw=7*12
268 |     viw=3*12
269 |     tew=1*12
270 |     N=data_list[0].shape[0]          ##股票数量 
271 |    
272 |     weight_index=ret.index[i+trw+viw:i+trw+viw+tew]
273 |     
274 |     input_size=data_list[0].shape[1]-3 ##特征数量
275 |     hidden_size=32
276 |     output_size=1
277 |     num_layers=2 ##LSTM的层数
278 |  
279 |         
280 |     ##训练集
281 |     tc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i:i+trw]])) 
282 |     tr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i:i+trw]]))     
283 |     
284 |     #验证集
285 |     vc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw:i+trw+viw]])) 
286 |     vr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw:i+trw+viw]]))
287 |     
288 |     
289 |     #测试集
290 |     ec=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 
291 |     er=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw+viw:i+trw+viw+tew]]))
292 |     
293 |     ##形成batch数据
294 |     train_iter = load_batch([tc,tr], batch_size, N, is_train=False)
295 |     valid_iter = load_batch([vc,vr], batch_size, N, is_train=False)
296 |      
297 |     net = RNN(input_size, hidden_size,output_size,num_layers,dropout)     
298 |     optimizer =torch.optim.Adam(net.parameters(),lr=lr,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False)    
299 |     
300 |     netopt, train_loss, valid_loss=train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size, dropout,
301 |                                                num_epochs,batch_size,params=None,lr=lr,optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
302 |     
303 |     w=test_model(netopt,ec,er.to(torch.float32),N,allow_short_selling)
304 |     path = 'result/weightRNN'+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)
305 |     if not os.path.exists(path):
306 |         os.mkdir(path)
307 |     pd.DataFrame(w.squeeze().detach().numpy(),index=weight_index,columns=ret.columns).to_csv(path+'/'+str(i)+'.csv')
308 | 
309 |  
310 |  
311 | #%%#1.原始模型 2.卖空约束 3.更换效用函数  
312 | para_list=[[5,True,'MV',False],[5,False,'MV',False],[10,True,'MV',False]]   
313 | for para in para_list:
314 |     trw=7*12
315 |     viw=3*12
316 |     tew=1*12  
317 |     for i in range(0,len(data_list)-trw-viw-tew+1,12):
318 |         print(i)
319 |         get_weights(i,data_list,ret,gamma=para[0],dropout=0,allow_short_selling=para[1],utility_function=para[2],cost_type=para[3] )
320 |         
321 |         
322 |  
323 |  
324 | 
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | 
342 | 
343 |     
344 |     
345 |     
346 |     
347 |     
348 |     
349 |     
350 |     
351 |     
352 |     
353 |     
354 |     
355 |     
356 |     
357 |     


--------------------------------------------------------------------------------
/08LSTM-AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov  7 15:26:45 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 | import torch
  8 | from torch import nn
  9 | from torch.nn import init
 10 | import numpy as np
 11 | import torch.utils.data as Data
 12 | import random
 13 | import pandas as pd
 14 | import os
 15 | import warnings
 16 | from pytorchtools_change import EarlyStoppings  ##点开pytorchtools，复制里面的代码，即可新建pytorchtools
 17 | warnings.filterwarnings("ignore")
 18 |  
 19 | seed = 100
 20 | random.seed(seed)  # random
 21 | np.random.seed(seed)  # numpy
 22 | torch.manual_seed(seed)  # torch+CPU
 23 | torch.cuda.manual_seed(seed)  # torch+GPU
 24 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 25 | 
 26 | 
 27 | #%%读取数据
 28 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 
 29 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
 30 | 
 31 | 
 32 | def get_data(ret,ch):
 33 |     '''
 34 |     对收益、特征数据进行调整
 35 |     '''
 36 |     ret.index.name='date'
 37 |     
 38 |     ##为了保证每一期的股票数量的位置相同，生成全部的date+permno
 39 |     new=pd.melt(ret.fillna(0).reset_index(),id_vars='date').astype(float)
 40 |     new.columns=['date','permno','ret']
 41 |     new=new[['date','permno']]
 42 |     new=new.sort_values(by=['date','permno'])    
 43 |    
 44 |     ret0=pd.melt(ret.reset_index(),id_vars='date').astype(float)
 45 |     ret0.columns=['date','permno','ret']        
 46 |     ch=ch.fillna(0)    
 47 |         
 48 |     retch=pd.merge(new,ch,how='left',on=['date','permno']).fillna(0)
 49 |     retch=pd.merge(retch,ret0,how='left',on=['date','permno'])
 50 | 
 51 |     data_list=[]
 52 |     for i in ret.index:
 53 |         data_list.append(retch[retch.date==i])   ##看看此处的数据有没有乱,没乱
 54 |         
 55 |     return data_list
 56 | 
 57 | data_list=get_data(ret,ch)
 58 |  
 59 | 
 60 |  
 61 | 
 62 | 
 63 | # #获得数据迭代器   
 64 | def load_batch(data_arrays, batch_size, N, is_train=True):
 65 |     '''
 66 |     自定义生成训练集中的batch数据集，为了保持横截面的样本顺序
 67 |     '''
 68 |     batch_starts=[start for start in range(0, int(data_arrays[0].shape[0]+1-batch_size),batch_size)]
 69 |     
 70 |     if is_train:
 71 |         random.shuffle(batch_starts)
 72 |     
 73 |     cr_list=[]
 74 | 
 75 |     for j in batch_starts :
 76 |         cr_list.append([data_arrays[0][j:(j+batch_size)],data_arrays[1][j:(j+batch_size)]])
 77 |         
 78 |     if np.max(batch_starts)+batch_size < data_arrays[0].shape[0]:
 79 |         cr_list.append([data_arrays[0][(np.max(batch_starts)+batch_size):],data_arrays[1][(np.max(batch_starts)+batch_size):]])
 80 |     
 81 |     return cr_list
 82 | 
 83 | 
 84 | 
 85 | class LSTM(nn.Module):
 86 |     def __init__(self, input_size, hidden_size,output_size,num_layers,dropout):
 87 |         super(LSTM, self).__init__()
 88 |         
 89 |         # 这里构建LSTM 还可以构建RNN、GRU等方法类似
 90 |         self.rnn1 = nn.LSTM(
 91 |             input_size=input_size, 
 92 |             hidden_size=32,
 93 |             num_layers=1,
 94 |             batch_first=True  # 如果为True，输入输出数据格式是(batch, seq_len, feature)
 95 |             # 为False，输入输出数据格式是(seq_len, batch, feature)，
 96 |         )
 97 |         
 98 |         self.rnn2 = nn.LSTM(
 99 |             input_size=32, 
100 |             hidden_size=16,
101 |             num_layers=1,
102 |             batch_first=True  # 如果为True，输入输出数据格式是(batch, seq_len, feature)
103 |             # 为False，输入输出数据格式是(seq_len, batch, feature)，
104 |         )
105 |         self.fc = nn.Linear(16,output_size,bias=False) #拼接隐藏层
106 |  
107 |     def forward(self, x):
108 |         r_out, (h_s, h_c)  = self.rnn1(x)
109 |         r_out, (h_s, h_c)  = self.rnn2(r_out)
110 |         out = self.fc(r_out) # 全连接层进行分类
111 |         return out
112 | 
113 | 
114 | def loss(net,w_hat,r,gamma,N,allow_short_selling=True,utility_function='crra',cost_type=False):
115 |     '''
116 |     what:上一层的输出，为权重的一部分，等价于y_pred
117 |     r:对应的收益数据
118 |     '''
119 |  
120 |     weigh0=1/(~torch.isnan(r)).sum(axis=1)   ##计算等权投资组合的权重
121 |     w_hat_mul=torch.where(torch.isnan(r),torch.full_like(r, np.nan), w_hat)
122 |     w_hatm=torch.where(torch.isnan(r),torch.full_like(r, 0), w_hat)
123 |     
124 |     ws=torch.sum(w_hatm,axis=1)
125 |     num=torch.sum(~torch.isnan(r),axis=1)
126 |     wn=ws/num
127 |     
128 |       
129 |     w0=torch.empty(w_hat.shape) 
130 |     wh=torch.empty(w_hat.shape) 
131 |     
132 |     for j in range(w_hat.shape[0]):
133 |         w0[j]=torch.where(torch.isnan(r[j]),torch.full_like(r[j], 0), weigh0[j])
134 |         wh[j]=(w_hat_mul[j]-wn[j])/num[j]
135 | 
136 | 
137 |     wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh)
138 |     w=w0+wh
139 |     if allow_short_selling==True:  ##没有权重约束
140 |         w=w
141 |     elif allow_short_selling==False: ##卖空约束
142 |         w=w.clamp(0,1)
143 |     w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape)   ##重新要求权重之和等于1        
144 | 
145 |     r0=torch.where(torch.isnan(r),torch.full_like(r, 0), r)
146 |     retw=r0.mul(w)
147 |  
148 |     mret=retw.sum(axis=1)
149 |     if utility_function=='crra':   #指数效用函数
150 |         utility=-torch.pow(1+mret,1-gamma)/(1-gamma)
151 |         utilitymean=torch.mean(utility)
152 |     elif utility_function=='MV':  #均值方差效用函数
153 |         sigma=torch.var(mret)
154 |         utilitymean=sigma.mul(gamma/2)-torch.mean(mret)
155 |         
156 |     tc=torch.mean(torch.sum(torch.abs(w[1:,:,:]-retw[:-1,:,:]),axis=1),axis=0) ##计算交易成本       
157 |     if cost_type==False:
158 |         loss=utilitymean
159 |     elif cost_type==True:
160 |         loss=utilitymean+0.005*tc
161 |             
162 |     return loss
163 | 
164 | 
165 | #记录列表（list），存储训练集和测试集上经过每一轮次，loss的变化
166 | def train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size,dropout,
167 |                                                num_epochs,batch_size,params=None,lr=None,optimizer=None,allow_short_selling=True,utility_function='crra',cost_type=False):
168 |     train_loss=[]
169 |     valid_loss=[]
170 |     
171 |     p_list='LSTMPP'+str(allow_short_selling)+utility_function+str(cost_type)
172 |     early_stopping = EarlyStoppings(para_list=p_list,patience=7, verbose=True)
173 |     
174 |     for epoch in range(num_epochs):#外循环控制循环轮次
175 |         #step1在训练集上，进行小批量梯度下降更新参数
176 |                 
177 |         for c,r in train_iter:#内循环控制训练批次
178 |  
179 |             w_hat = net(c.to(torch.float32)) 
180 |             l = loss(net,w_hat,r.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
181 |             
182 |             #梯度清零
183 |             if optimizer is not None:
184 |                 optimizer.zero_grad()
185 |             elif params is not None and params[0].grad is not None:
186 |                 for param in params:
187 |                     param.grad.data.zero_()
188 |             
189 |                     
190 |             #梯度回传       
191 |             l.backward()
192 |         
193 |             if optimizer is None:
194 |                 torch.optim.SGD(net.parameters(),lr,batch_size)
195 |                 # SGD(params,lr,batch_size)
196 |             else:
197 |                 optimizer.step()       
198 |                 
199 |         train_loss.append((loss(net,net(tc.to(torch.float32)),tr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())#loss本身就默认了取平均值！
200 |         valid_loss.append((loss(net,net(vc.to(torch.float32)),vr.to(torch.float32),gamma,N,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())
201 |         
202 |         print("epoch %d,train_loss %.6f,valid_loss %.6f"%(epoch+1,train_loss[epoch],valid_loss[epoch])) 
203 | 
204 |         valid_lossave = np.average(valid_loss)
205 |         
206 |         early_stopping(valid_loss[epoch], net,para_list=p_list)
207 |         if early_stopping.early_stop:
208 |             print("Early stopping")
209 |             break
210 |         
211 | 
212 | 
213 |     # load the last checkpoint with the best model
214 |     net.load_state_dict(torch.load(p_list+'checkpoint.pt'))
215 |     
216 |     return net, train_loss, valid_loss
217 | 
218 | 
219 | 
220 | 
221 | def test_model(netopt,ec,er,N,allow_short_selling=True):
222 |     w_hat=netopt(ec.to(torch.float32))
223 |  
224 |     weigh0=1/(~torch.isnan(er)).sum(axis=1)   ##计算等权投资组合的权重  
225 |     w_hat_mul=torch.where(torch.isnan(er),torch.full_like(er, np.nan), w_hat)
226 |     w_hatm=torch.where(torch.isnan(er),torch.full_like(er, 0), w_hat)
227 |     
228 |     ws=torch.sum(w_hatm,axis=1)
229 |     num=torch.sum(~torch.isnan(er),axis=1)
230 |     wn=ws/num
231 |     
232 |     
233 |     w0=torch.empty(w_hat.shape) 
234 |     wh=torch.empty(w_hat.shape) 
235 |     
236 |     for j in range(w_hat.shape[0]):
237 |         w0[j]=torch.where(torch.isnan(er[j]),torch.full_like(er[j], 0), weigh0[j])
238 |         wh[j]= (w_hat_mul[j]-wn[j])/num[j]   #times
239 |     
240 | 
241 |     wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh)    
242 |     w=w0+wh
243 |     
244 |     if allow_short_selling==True:  ##没有权重约束
245 |         w=w
246 |     elif allow_short_selling==False: ##卖空约束
247 |         w=w.clamp(0,1)
248 |     w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape)   ##重新要求权重之和等于1    
249 |     return w
250 |  
251 |     
252 | def get_weights(i,data_list,ret,gamma,dropout,allow_short_selling,utility_function,cost_type):
253 |     
254 |     '''
255 |     i：以12为倍数
256 |     '''
257 |     
258 |     lr=0.01
259 |     batch_size = 10# 设置小批量大小  
260 |     num_epochs = 100  #100
261 | 
262 |     trw=7*12
263 |     viw=3*12
264 |     tew=1*12
265 |     N=data_list[0].shape[0]          ##股票数量 
266 |    
267 |     weight_index=ret.index[i+trw+viw:i+trw+viw+tew]
268 |     
269 |     input_size=data_list[0].shape[1]-3 ##特征数量
270 |     hidden_size=32
271 |     output_size=1
272 |     num_layers=2 ##LSTM的层数
273 |  
274 |     ##训练集
275 |     tc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i:i+trw]])) 
276 |     tr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i:i+trw]]))     
277 |     
278 |     #验证集
279 |     vc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw:i+trw+viw]])) 
280 |     vr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw:i+trw+viw]]))
281 |     
282 |     
283 |     #测试集
284 |     ec=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 
285 |     er=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw+viw:i+trw+viw+tew]]))
286 |     
287 |     ##形成batch数据
288 |     train_iter = load_batch([tc,tr], batch_size, N, is_train=False)
289 |     valid_iter = load_batch([vc,vr], batch_size, N, is_train=False)
290 |  
291 |     
292 |     net = LSTM(input_size, hidden_size,output_size,num_layers,dropout)     
293 |     
294 |     optimizer =torch.optim.Adam(net.parameters(),lr=lr,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False)    
295 |     
296 |     netopt, train_loss, valid_loss=train_model(net,train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size, output_size, dropout,
297 |                                                num_epochs,batch_size,params=None,lr=lr,optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
298 |     
299 |     w=test_model(netopt,ec,er.to(torch.float32),N,allow_short_selling)
300 |     path = 'result/weightLSTM'+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)
301 |     if not os.path.exists(path):
302 |         os.mkdir(path)
303 |     pd.DataFrame(w.squeeze().detach().numpy(),index=weight_index,columns=ret.columns).to_csv(path+'/'+str(i)+'.csv')
304 | 
305 |  
306 | 
307 | #%%#1.原始模型 2.卖空约束 3.更换效用函数 
308 | para_list=[[5,True,'MV',False],[5,False,'MV',False],[10,True,'MV',False]]
309 | for para in para_list:
310 |     trw=7*12
311 |     viw=3*12
312 |     tew=1*12  
313 |     for i in range(0,len(data_list)-trw-viw-tew+1,12):
314 |         print(i)
315 |         get_weights(i,data_list,ret,gamma=para[0],dropout=0,allow_short_selling=para[1],utility_function=para[2],cost_type=para[3] )
316 |         
317 |         
318 |  
319 | 
320 | 
321 | 
322 | 
323 | 
324 |     
325 |     
326 |     
327 |     
328 |     
329 |     
330 |     
331 |     
332 |     
333 |     
334 |     
335 |     
336 |     
337 |     
338 |     


--------------------------------------------------------------------------------
/09DFN-AC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Nov  5 16:51:01 2022
  4 | @author: whufi
  5 | """
  6 | 
  7 | 
  8 | import torch
  9 | from torch import nn
 10 | from torch.nn import init
 11 | import numpy as np
 12 | import torch.utils.data as Data
 13 |  
 14 | import random
 15 | import pandas as pd
 16 | import os
 17 | import warnings
 18 | from pytorchtools_change import EarlyStoppings  ##点开pytorchtools，复制里面的代码，即可新建pytorchtools
 19 | import joblib
 20 | from sklearn.model_selection import KFold
 21 | warnings.filterwarnings("ignore")
 22 | # os.chdir(r'E:\02实验\98ML-AC-code')  ##设置文件路径
 23 | 
 24 | #%%读取数据
 25 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益 
 26 | ch=pd.read_csv('data/char.csv',index_col=0).astype(float) #读取特征
 27 | 
 28 | 
 29 | def get_data(ret,ch):
 30 |     '''
 31 |     对收益、特征数据进行调整
 32 |     '''
 33 |     ret.index.name='date'
 34 |     
 35 |     ##为了保证每一期的股票数量的位置相同，生成全部的date+permno
 36 |     new=pd.melt(ret.fillna(0).reset_index(),id_vars='date').astype(float)
 37 |     new.columns=['date','permno','ret']
 38 |     new=new[['date','permno']]
 39 |     new=new.sort_values(by=['date','permno'])    
 40 |    
 41 |     ret0=pd.melt(ret.reset_index(),id_vars='date').astype(float)
 42 |     ret0.columns=['date','permno','ret']
 43 |     ch=ch.fillna(0)    
 44 |         
 45 |     retch=pd.merge(new,ch,how='left',on=['date','permno']).fillna(0)
 46 |     retch=pd.merge(retch,ret0,how='left',on=['date','permno'])
 47 | 
 48 |     data_list=[]
 49 |     for i in ret.index:
 50 |         data_list.append(retch[retch.date==i])   ##看看此处的数据有没有乱,没乱
 51 |         
 52 |     return data_list
 53 | 
 54 | data_list=get_data(ret,ch)
 55 |  
 56 | 
 57 |  
 58 | 
 59 | 
 60 | # #获得数据迭代器   
 61 | def load_batch(data_arrays, batch_size, N, is_train=True):
 62 |     '''
 63 |     自定义生成训练集中的batch数据集，为了保持横截面的样本顺序
 64 |     '''
 65 |     batch_starts=[start for start in range(0, int(data_arrays[0].shape[0]+1-batch_size),batch_size)]
 66 |     
 67 |     if is_train:
 68 |         random.shuffle(batch_starts)
 69 |     
 70 |     cr_list=[]
 71 | 
 72 |     for j in batch_starts :
 73 |         cr_list.append([data_arrays[0][j:(j+batch_size)],data_arrays[1][j:(j+batch_size)]])
 74 |         
 75 |     if np.max(batch_starts)+batch_size < data_arrays[0].shape[0]:
 76 |         cr_list.append([data_arrays[0][(np.max(batch_starts)+batch_size):],data_arrays[1][(np.max(batch_starts)+batch_size):]])
 77 |     
 78 |     return cr_list
 79 | 
 80 | 
 81 | ###简化标准化过程
 82 | 
 83 | class Normalization(nn.Module):
 84 |     '''
 85 |     标准化每一个截面
 86 |     '''
 87 |     def __init__(self):
 88 |         super(Normalization, self).__init__()
 89 |         # self.r=r
 90 |         # self.N=N
 91 |         
 92 |     def forward(self,x):
 93 |         xs=torch.empty(x.shape) 
 94 |     
 95 |         for i in range(x.shape[0]):
 96 |             mean=torch.mean(x[i,:,:],axis=0) 
 97 |             std=torch.std(x[i,:,:],axis=0) 
 98 |             xs[i,:,:]=(x[i,:,:]-mean)/std
 99 |         
100 |         return xs
101 | 
102 | 
103 | 
104 | # Neural Network Model  
105 | class Net(nn.Module):
106 |     #初始化网络结构
107 |     def __init__(self, input_size=95, hidden_size=32, num_classes=1):
108 |         super(Net, self).__init__()
109 |         self.fc1 = nn.Linear(input_size, hidden_size)   #输入层，线性（liner）关系  ,bias=False
110 |         self.relu1 = nn.LeakyReLU(negative_slope=0.01)   #隐藏层，使用ReLU函数  
111 |         self.norm1=Normalization()  
112 |         self.dropout1 = nn.Dropout(p=0.1)  # dropout训练
113 |         
114 |         
115 |         self.fc2 = nn.Linear(hidden_size, 16)   #输入层，线性（liner）关系  ,bias=False
116 |         self.relu2 = nn.LeakyReLU(negative_slope=0.01)   #隐藏层，使用ReLU函数   
117 |         self.norm2=Normalization()  
118 |         self.dropout2 = nn.Dropout(p=0.1)  # dropout训练        
119 |         self.fc4 = nn.Linear(16, num_classes,bias=False)  #输出层，线性（liner）关系 ,bias=False
120 |  
121 |     
122 |     #forword 参数传递函数，网络中数据的流动
123 |     def forward(self, x):
124 |         out = self.fc1(x)
125 |         out = self.relu1(out)
126 |         out = self.norm1(out) 
127 |         out=self.dropout1(out)
128 |         
129 |         
130 |         out = self.fc2(out)
131 |         out = self.relu2(out)        
132 |         out = self.norm2(out) 
133 |         out=self.dropout2(out)   
134 |         out = self.fc4(out)
135 |         return out
136 | 
137 | net=Net()
138 |     
139 |     
140 | def loss(net,w_hat,r,gamma,N,lambda1,allow_short_selling=True,utility_function='crra',cost_type=False):
141 |     '''
142 |     w_hat:上一层的输出，为权重的一部分，等价于y_pred
143 |     r:对应的收益数据
144 |     '''
145 |     weigh0=1/(~torch.isnan(r)).sum(axis=1)   ##计算等权投资组合的权重
146 |     w_hat_mul=torch.where(torch.isnan(r),torch.full_like(r, np.nan), w_hat)
147 |     w_hatm=torch.where(torch.isnan(r),torch.full_like(r, 0), w_hat)
148 |     
149 |     ws=torch.sum(w_hatm,axis=1)
150 |     num=torch.sum(~torch.isnan(r),axis=1)
151 |     wn=ws/num
152 |     
153 |       
154 |     w0=torch.empty(w_hat.shape) 
155 |     wh=torch.empty(w_hat.shape) 
156 |     
157 |     for j in range(w_hat.shape[0]):
158 |         w0[j]=torch.where(torch.isnan(r[j]),torch.full_like(r[j], 0), weigh0[j])
159 |         wh[j]=(w_hat_mul[j]-wn[j])/num[j]
160 |     wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh)
161 | 
162 |     w=w0+wh
163 |     if allow_short_selling==True:  ##没有权重约束
164 |         w=w
165 |     elif allow_short_selling==False: ##卖空约束
166 |         w=w.clamp(0,1)    
167 |     w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape)   ##重新要求权重之和等于1 
168 |  
169 |     r0=torch.where(torch.isnan(r),torch.full_like(r, 0), r)
170 |     retw=r0.mul(w)
171 |  
172 |     mret=retw.sum(axis=1)
173 |     if utility_function=='crra':   #指数效用函数
174 |         utility=-torch.pow(1+mret,1-gamma)/(1-gamma)
175 |         utilitymean=torch.mean(utility)
176 |         
177 |     elif utility_function=='MV':  #均值方差效用函数
178 |         sigma=torch.var(mret)
179 |         utilitymean=sigma.mul(gamma/2)-torch.mean(mret)
180 |         
181 |     tc=torch.mean(torch.sum(torch.abs(w[1:,:,:]-retw[:-1,:,:]),axis=1),axis=0) ##计算交易成本       
182 |     if cost_type==False:
183 |         loss=utilitymean
184 |     elif cost_type==True:
185 |         loss=utilitymean+0.005*tc
186 |               
187 |  
188 |     ##加入正则化项    
189 |     l1_reg = torch.tensor(0.)
190 |     for param in net.parameters():
191 |         # print(param)
192 |         l1_reg += torch.sum(torch.abs(param))
193 |     loss = loss+ lambda1 * l1_reg
194 |     
195 |     return loss
196 | 
197 |  
198 | 
199 | #记录列表（list），存储训练集和测试集上经过每一轮次，loss的变化
200 | def train_model(train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size,hidden_size,num_classes,
201 |                                                num_epochs,batch_size,params=None,lr=None,lambda1=None,optimizer=None,allow_short_selling=True,utility_function='crra',cost_type=False,seed=100):
202 |     train_loss=[]
203 |     valid_loss=[]
204 |     
205 |     p_list='FDNPP'+str(allow_short_selling)+utility_function+str(cost_type)+str(seed)
206 |     
207 |     early_stopping = EarlyStoppings(para_list=p_list,patience=7, verbose=True)
208 |     
209 |     for epoch in range(num_epochs):#外循环控制循环轮次
210 |         #step1在训练集上，进行小批量梯度下降更新参数
211 |                 
212 |         for c,r in train_iter:#内循环控制训练批次
213 |             w_hat = net(c.to(torch.float32))
214 |             l = loss(net,w_hat,r.to(torch.float32),gamma,N,lambda1,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)
215 |             #梯度清零
216 |             if optimizer is not None:
217 |                 optimizer.zero_grad()
218 |             elif params is not None and params[0].grad is not None:
219 |                 for param in params:
220 |                     param.grad.data.zero_()
221 |                     
222 |             #梯度回传       
223 |             l.backward()
224 |             
225 |             
226 |             if optimizer is None:
227 |                 torch.optim.SGD(net.parameters(),lr,batch_size)
228 |                 # SGD(params,lr,batch_size)
229 |             else:
230 |                 optimizer.step()            
231 |  
232 |         train_loss.append((loss(net,net(tc.to(torch.float32)),tr.to(torch.float32),gamma,N,lambda1,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())#loss本身就默认了取平均值！
233 |         valid_loss.append((loss(net,net(vc.to(torch.float32)),vr.to(torch.float32),gamma,N,lambda1,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type)).item())
234 |         
235 |         print("epoch %d,train_loss %.6f,valid_loss %.6f"%(epoch+1,train_loss[epoch],valid_loss[epoch])) 
236 |  
237 |         valid_lossave = np.average(valid_loss)   
238 |         early_stopping(valid_loss[epoch], net,para_list=p_list)
239 |  
240 |         if early_stopping.early_stop:
241 |             print("Early stopping")
242 |             break
243 | 
244 |     # load the last checkpoint with the best model
245 |     net.load_state_dict(torch.load(p_list+'checkpoint.pt'))
246 |     
247 |     return net, train_loss, valid_loss
248 | 
249 | 
250 | 
251 | 
252 | def test_model(netopt,ec,er,N,allow_short_selling=True):
253 |     w_hat=netopt(ec.to(torch.float32))
254 |     weigh0=1/(~torch.isnan(er)).sum(axis=1)   ##计算等权投资组合的权重
255 |     w_hat_mul=torch.where(torch.isnan(er),torch.full_like(er, np.nan), w_hat)
256 |     w_hatm=torch.where(torch.isnan(er),torch.full_like(er, 0), w_hat)
257 |     
258 |     ws=torch.sum(w_hatm,axis=1)
259 |     num=torch.sum(~torch.isnan(er),axis=1)
260 |     wn=ws/num
261 |     
262 |     
263 |     w0=torch.empty(w_hat.shape) 
264 |     wh=torch.empty(w_hat.shape) 
265 |         
266 |     for j in range(w_hat.shape[0]):
267 |         w0[j]=torch.where(torch.isnan(er[j]),torch.full_like(er[j], 0), weigh0[j])
268 |         wh[j]=(w_hat_mul[j]-wn[j])/num[j]   #times
269 |     
270 | 
271 |     wh=torch.where(torch.isnan(wh),torch.full_like(wh, 0), wh)    
272 |     w=w0+wh
273 |     
274 |     if allow_short_selling==True:  ##没有权重约束
275 |         w=w
276 |     elif allow_short_selling==False: ##卖空约束
277 |         w=w.clamp(0,1)
278 |     w=w/torch.sum(w,axis=1).repeat(1,w.shape[1]).reshape(w.shape)   ##重新要求权重之和等于1   
279 |     return w
280 |  
281 |     
282 | def get_weights(i,data_list,ret,gamma,allow_short_selling,utility_function,cost_type,seed):
283 |     
284 |     '''
285 |     i：以12为倍数
286 |     '''
287 |     batch_size =10  # 设置小批量大小  
288 |     num_epochs = 100  #100
289 | 
290 |     trw=7*12
291 |     viw=3*12
292 |     tew=1*12
293 |     N=data_list[0].shape[0]          ##股票数量 
294 |    
295 |     weight_index=ret.index[i+trw+viw:i+trw+viw+tew]
296 |     
297 |     input_size=data_list[0].shape[1]-3 ##特征数量
298 |     
299 |     hidden_size=32
300 |     num_classes=1
301 |     
302 |     allow_short_selling=allow_short_selling
303 |     utility_function=utility_function
304 |     cost_type=cost_type
305 |  
306 |         
307 |     ##训练集
308 |     tc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i:i+trw]])) 
309 |     tr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i:i+trw]]))     
310 |     
311 |     #验证集
312 |     vc=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw:i+trw+viw]])) 
313 |     vr=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw:i+trw+viw]]))
314 |     
315 |     
316 |     #测试集
317 |     ec=torch.tensor(np.stack([x.iloc[:,2:-1] for x in data_list[i+trw+viw:i+trw+viw+tew]])) 
318 |     er=torch.tensor(np.stack([x.iloc[:,-1:] for x in data_list[i+trw+viw:i+trw+viw+tew]]))
319 |     
320 |     ##形成batch数据
321 |     train_iter = load_batch([tc,tr], batch_size, N, is_train=False)
322 |     valid_iter = load_batch([vc,vr], batch_size, N, is_train=False)
323 |     
324 | 
325 |     
326 |     if i==0:##首先仅考虑对第一期进行超参数调整
327 |         data=data_list[i:i+trw+viw] ##用于训练和验证的数据
328 |     
329 |         kf=KFold(n_splits=5)
330 |         umax=10000000  #最小效用
331 |         global lambdaopt
332 |         global lropt
333 |         lambdaopt=0
334 |         lropt=0.01
335 |         
336 |         lr_list=[0.01,0.001] # 正则化项的项的参数是待调参数，范围可以从 [0.01,0.1]
337 |         lambda_list=[0.0001]
338 |         # lambda_list=[0.00001,0.0001,0.001]
339 |         for lambda1 in lambda_list:
340 |             for lr in lr_list:
341 |                 print(lambda1,lr)
342 |             
343 |                 ##五折交叉验证进行优化lambda
344 |                 valoss_list=[]
345 |                 for rets_index,retv_index in kf.split(data):
346 |                     print(rets_index.shape,retv_index.shape)
347 |                     
348 |                     #训练集
349 |                     ttc=torch.tensor(np.stack([data[x].iloc[:,2:-1] for x in rets_index])) 
350 |                     ttr=torch.tensor(np.stack([data[x].iloc[:,-1:] for x in rets_index]))
351 |                     
352 |                     #验证集
353 |                     vvc=torch.tensor(np.stack([data[x].iloc[:,2:-1] for x in retv_index])) 
354 |                     vvr=torch.tensor(np.stack([data[x].iloc[:,-1:] for x in retv_index])) 
355 |                     
356 |                     tt_iter = load_batch([ttc,ttr], batch_size, N, is_train=False)
357 |                     vv_iter = load_batch([vvc,vvr], batch_size, N, is_train=False)
358 |                     
359 |                     optimizer =torch.optim.Adam(net.parameters(),lr=lr,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False)  
360 |                     netopt, train_loss, valid_loss=train_model(tt_iter,vv_iter,ttc,ttr,vvc,vvr,gamma,N,loss,input_size,hidden_size,num_classes,
361 |                                                                num_epochs,batch_size,params=None,lr=lr,lambda1=torch.tensor(lambda1),optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type,seed=seed)
362 |                     valoss_list.append(valid_loss[-8])
363 |                 util_mean=np.mean(valoss_list)
364 |                 
365 |                 # print(util_mean)
366 |                 
367 |                 if util_mean<umax:
368 |                     lambdaopt=lambda1
369 |                     lropt=lr
370 |                     umax=util_mean
371 |                 
372 |         print('lambda',lambdaopt,'lr',lropt)
373 |         pd.DataFrame([lambdaopt,lropt],index=['lambda1','lr']).to_csv('result/parameter/DFNPP'+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'.csv')
374 |     
375 |     
376 |     
377 | 
378 |     
379 |     # optimizer = torch.optim.SGD(net.parameters(),lr)
380 |     optimizer =torch.optim.Adam(net.parameters(),lr=lropt,betas=(0.9, 0.999),eps=1e-08,weight_decay=0.0001,amsgrad=False)    
381 |         
382 |     
383 |     netopt, train_loss, valid_loss=train_model(train_iter,valid_iter,tc,tr,vc,vr,gamma,N,loss,input_size,hidden_size,num_classes,
384 |                                                num_epochs,batch_size,params=None,lr=lropt,lambda1=torch.tensor(lambdaopt),optimizer=optimizer,allow_short_selling=allow_short_selling,utility_function=utility_function,cost_type=cost_type,seed=seed)
385 |     
386 |     w=test_model(netopt,ec,er.to(torch.float32),N,allow_short_selling)
387 |  
388 |     path = 'result/weight_PPNN'+'_'+str(gamma)+'_'+str(allow_short_selling)+'_'+str(utility_function)+'_'+str(cost_type)+'_'+str(seed)
389 |     if not os.path.exists(path):
390 |         os.mkdir(path)
391 |     pd.DataFrame(w.squeeze().detach().numpy(),index=weight_index,columns=ret.columns).to_csv(path+'/'+str(i)+'.csv')
392 | 
393 | 
394 |  
395 | 
396 | def get_res(seed):
397 |     random.seed(seed)  # random
398 |     np.random.seed(seed)  # numpy
399 |     torch.manual_seed(seed)  # torch+CPU
400 |     torch.cuda.manual_seed(seed)  # torch+GPU
401 |  
402 |     #1.原始模型 2.卖空约束 3.更换效用函数 
403 |     para_list=[[5,True,'MV',False],[5,False,'MV',False],[10,True,'MV',False]] 
404 |         
405 |     for para in para_list:
406 |         trw=7*12
407 |         viw=3*12
408 |         tew=1*12  
409 |         for i in range(0,len(data_list)-trw-viw-tew+1,12):
410 |             print(i)
411 |             get_weights(i,data_list,ret,gamma=para[0],allow_short_selling=para[1],utility_function=para[2],cost_type=para[3],seed=seed)    
412 |             
413 |             
414 | #%%#投资组合权重为5次结果的平均，采用多线性方法调用           
415 | if __name__ == '__main__':
416 |     
417 |     value_list=[]
418 |     for tim in range(100,105):
419 |         value_list.append(tim)    
420 |     n_jobs = len(value_list)
421 |     joblib.Parallel(n_jobs=n_jobs)(joblib.delayed(get_res)(d) for d in value_list)
422 |     
423 |     
424 |  
425 |     
426 |     
427 |     
428 |     
429 |     
430 |  
431 | 
432 | 
433 | 
434 | 


--------------------------------------------------------------------------------
/10result_pro.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Sep 14 21:34:31 2022
  4 | 
  5 | @author: tuxueyong
  6 | """
  7 | 
  8 | import numpy as np
  9 | import random
 10 | import pandas as pd
 11 | from scipy.linalg import sqrtm  # for finding the squared root of Sigma
 12 | import os
 13 | import glob
 14 | import warnings
 15 | warnings.filterwarnings("ignore")
 16 | 
 17 | 
 18 | os.chdir(r'E:\02实验\98ML-AC-code')
 19 | 
 20 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
 21 | ret.index.name='date'
 22 | 
 23 | def get_weight(path,method):
 24 |     '''
 25 |     path:权重路径
 26 |     method:'MV'、'SPO'
 27 |     '''
 28 |     file = glob.glob(os.path.join(path, "*.csv"))
 29 |     f = []  ##存放因子+超额收益
 30 |     weight=pd.DataFrame() #columns=ret.columns.astype(float)
 31 |     for i in range(len(file)):
 32 |         f_data=pd.read_csv(file[i], header=0, index_col=0)
 33 |         f_data.columns=f_data.columns.astype(float)
 34 |         weight=pd.concat([weight,f_data],axis=0,join='outer')
 35 |         
 36 |     wr=weight.reset_index()      
 37 |     weight=wr.sort_values(wr.columns[0],ascending=[True]).set_index(wr.columns[0])
 38 |     weight.to_csv('result/middle/weight_all/weight_'+method+'.csv')
 39 |     
 40 |     return weight
 41 |     
 42 | #%%整理LSTM-AC的投资权重
 43 | method_list=['weightLSTM_5_False_MV_False','weightLSTM_5_True_MV_False','weightLSTM_10_True_MV_False']
 44 | for i in range(len(method_list)):
 45 |     path = 'result/'+method_list[i]+'/'
 46 |     method=method_list[i]
 47 |     get_weight(path,method) 
 48 |     
 49 | #%%整理LSTM-AC的投资权重
 50 | method_list=['weightRNN_5_False_MV_False','weightRNN_5_True_MV_False','weightRNN_10_True_MV_False']
 51 | for i in range(len(method_list)):
 52 |     path = 'result/'+method_list[i]+'/'
 53 |     method=method_list[i]
 54 |     get_weight(path,method) 
 55 |     
 56 |  
 57 | 
 58 | #%%整理DFN-AC的投资权重
 59 | def get_weight_sum(path,method):
 60 |     '''
 61 |     path:权重路径
 62 |     method:'MV'、'SPO'
 63 |     '''
 64 |     file = glob.glob(os.path.join(path, "*.csv"))
 65 |     weight=pd.DataFrame() #columns=ret.columns.astype(float)
 66 |     for i in range(len(file)):
 67 |         f_data=pd.read_csv(file[i], header=0, index_col=0)
 68 |         f_data.columns=f_data.columns.astype(float)
 69 |         weight=pd.concat([weight,f_data],axis=0,join='outer')
 70 |         
 71 |     wr=weight.reset_index()      
 72 |     weight=wr.sort_values(wr.columns[0],ascending=[True]).set_index(wr.columns[0])
 73 |     return weight
 74 | 
 75 | 
 76 | weig0=0
 77 | for i in [100,101,102,103,104]:
 78 |     method='weight_PPNN_5_True_MV_False_'+str(i)  
 79 |     path = 'result/'+method+'/'
 80 |     print(path)
 81 |     weig=get_weight_sum(path,method) 
 82 |     weig0=weig0+weig
 83 | weig0=weig0/5  
 84 | weig0.to_csv('result/middle/weight_all/weight_'+method+'.csv')    
 85 |     
 86 | 
 87 | weig0=0
 88 | for i in [100,101,102,103,104]:
 89 |     method='weight_PPNN_5_False_MV_False_'+str(i)  
 90 |     path = 'result/'+method+'/'
 91 |     print(path)
 92 |     weig=get_weight_sum(path,method) 
 93 |     weig0=weig0+weig
 94 | weig0=weig0/5    
 95 | weig0.to_csv('result/middle/weight_all/weight_'+method+'.csv')
 96 | 
 97 |  
 98 | weig0=0
 99 | for i in [100,101,102,103,104]:
100 |     method='weight_PPNN_10_True_MV_False_'+str(i)  
101 |     path = 'result/'+method+'/'
102 |     print(path)
103 |     weig=get_weight_sum(path,method)   
104 |     weig0=weig0+weig
105 | weig0=weig0/5 
106 | weig0.to_csv('result/middle/weight_all/weight_'+method+'.csv')  
107 | 
108 | 
109 |  


--------------------------------------------------------------------------------
/11get_weight_result.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Oct 12 14:35:37 2022
  4 | 
  5 | @author: whufi
  6 | """
  7 | 
  8 | ##计算基本的权重指标
  9 |  
 10 | import pandas as pd 
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | import pandas as pd 
 14 | import glob, os
 15 | from scipy import stats
 16 | import statsmodels.formula.api as smf
 17 | import statsmodels.api as sm
 18 | from sklearn.preprocessing import scale
 19 | 
 20 | os.chdir(r'E:\02实验\98ML-AC-code')
 21 | 
 22 | ret=pd.read_csv('data/ret_clean.csv',index_col=0).astype(float)/100 #读取收益   
 23 | ret.index.name='date'
 24 | ret=ret.astype(float)
 25 | 
 26 | ##中国市场的三因子和四因子数据
 27 | ch3=pd.read_csv(r'data\CH_3_update_20211231.csv').iloc[7:,:]
 28 | ch4=pd.read_csv(r'data\CH_4_fac_update_20211231.csv').iloc[8:,:]
 29 | 
 30 | def process_ch(ch):
 31 |     ch.columns=ch.iloc[0,:]
 32 |     ch=ch.iloc[1:,:]
 33 |     ch.iloc[:,0]= ch.iloc[:,0].apply(lambda x:str(x)[:6])
 34 |     ch=ch.astype(float)
 35 |     ch=ch.set_index('mnthdt')/100
 36 |     return ch
 37 | 
 38 | ch3=process_ch(ch3)
 39 | ch4=process_ch(ch4)
 40 | 
 41 | #用于计算回撤
 42 | def get_cumret(ret):
 43 |     '''
 44 |     输入每一期的收益:列为因子收益，行为日期
 45 |     输出从起始日期的每日累计收益：列为因子累计收益，行为日期
 46 |     '''
 47 |     cumpd=pd.DataFrame(columns=ret.columns,index=ret.index)
 48 |     ret=ret.fillna(0)
 49 |     for j in range(ret.shape[1]):
 50 |         for i in range(ret.shape[0]):
 51 |             if i==0:
 52 |                 cumpd.iloc[i,j]=ret.iloc[i,j]+1
 53 |             else:
 54 |                 cumpd.iloc[i,j]=(ret.iloc[i,j]+1)*cumpd.iloc[i-1,j]
 55 |     return cumpd
 56 | 
 57 | 
 58 | def get_DD(ret):
 59 |     '''
 60 |     输入每一期的收益:列为因子收益，行为日期
 61 |     输出:每一列因子的最大回撤
 62 |     '''
 63 |     ret=get_cumret(ret)
 64 |     dd=pd.DataFrame(columns=ret.columns,index=ret.index)
 65 |     for j in range(ret.shape[1]):
 66 |         for i in range(ret.shape[0]):
 67 |             dd.iloc[i,j] = (np.max(ret.iloc[:i+1,j])-ret.iloc[i,j])/np.max(ret.iloc[:i+1,j])
 68 |         
 69 |     MDD=np.max(dd,axis=0)
 70 |     return MDD
 71 | 
 72 | 
 73 | #%%计算每个月的权重的最大、最小
 74 | 
 75 | def get_single_result(weight,ret,ch3,ch4,methodname):
 76 |     weightraw=weight.copy()
 77 |     weight=weight.fillna(0)
 78 |  
 79 |     retn=pd.concat([pd.DataFrame(index=weight.index),ret],axis=1,join='inner')
 80 |     count=retn.count(axis=1)
 81 |  
 82 |     wn=weight.copy()
 83 |     wn[wn>=0]=0 
 84 |     wn[wn<0]=1 #用于统计负权重的占比
 85 |     
 86 |     #平均绝对权重
 87 |     wabs=np.sum(np.abs(weight),axis=1)
 88 |     wabsmean=np.mean(wabs/count)*100  ##先横截面平均、再时间序列上求平均
 89 |      
 90 |     #平均最大绝对权重
 91 |     wmax=np.max(np.abs(weight),axis=1)  
 92 |     wmaxmean=np.mean(wmax)*100    ##权重变动范围还比较小
 93 |     
 94 |     #平均最小权重
 95 |     wmin=np.min(weightraw,axis=1)  
 96 |     wminmean=np.mean(wmin)*100    ## 
 97 |     
 98 |     #平均负权重之和
 99 |     wneg=np.sum(weight[weight<0],axis=1)
100 |     wnegmean=np.mean(wneg)
101 |     
102 |     #平均负权重的占比
103 |     wnegn=np.sum(wn,axis=1)
104 |     wnegmeannum=np.mean(wnegn/count)
105 |     
106 |     ##平均权重变动之和（类似于换手率的概念）
107 |     retn=pd.concat([pd.DataFrame(index=weight.index),ret],axis=1,join='inner')
108 |     wplus=weight*(1+retn)
109 |     wplus.index=wplus.reset_index().iloc[:,0].shift(-1)
110 |     wplus=wplus.iloc[:-1,:]
111 |     weightn=weight.iloc[1:,:]
112 |     wwabs=np.sum(np.abs(weightn-wplus),axis=1)
113 |     wwabsmean=np.mean(wwabs)
114 |     
115 |     ##定义均值、方差、夏普比、CER
116 |     retport=np.sum(weight*retn,axis=1)
117 |     mean=np.mean(retport)*12
118 |     std=np.std(retport,ddof=1)*np.sqrt(12)
119 |     sr=mean/std
120 |     cer=mean-std*std/2 
121 |     
122 |     mdd=get_DD(pd.DataFrame(retport))[0]
123 |     
124 |     
125 |     skew=stats.skew(retport)#使用stats计算偏度
126 |     kurtosis = stats.kurtosis(retport)#使用stats计算峰度
127 |     
128 |     ch3=pd.concat([pd.DataFrame(retport,columns=['retp']),ch3],axis=1,join='inner')
129 |     ch4=pd.concat([pd.DataFrame(retport,columns=['retp']),ch4],axis=1,join='inner')
130 |     
131 |     #ch3-α检验
132 |     ch3test = smf.ols('retp~mktrf+SMB+VMG',ch3).fit(cov_type = 'HAC',cov_kwds = {'maxlags':5})
133 |     ch3_a = ch3test.params[0]
134 |     ch3_t = ch3test.tvalues[0]         
135 |     res=[cer,wabsmean,wmaxmean,wminmean,wnegmean,wnegmeannum,wwabsmean,mean,mdd,std,skew,kurtosis,sr,ch3_a,
136 |          ch3_t]
137 |     
138 |     result=pd.DataFrame(res,columns=[methodname],index=['CER','w_abs','w_max','w_min','w_neg','w_negnum','ww_abs','Mean','MDD','StdDev',
139 |                                'Skew','Kurt','SR','CH3_alpha','CH3_t'])
140 |     return result
141 | 
142 | 
143 | def get_all_result(path,respath,name):  ##为多个数据集所用
144 |     os.chdir(path)
145 |     file = glob.glob(os.path.join("*.csv"))
146 |     result_all=pd.DataFrame(index=['CER','w_abs','w_max','w_min','w_neg','w_negnum','ww_abs','Mean','MDD',
147 |                                    'StdDev','Skew','Kurt','SR','CH3_alpha','CH3_t'])
148 | 
149 |     for i in range(len(file)):
150 |         # i=0
151 |         weight=pd.read_csv(file[i],index_col=0)
152 |         result=get_single_result(weight,ret,ch3,ch4,file[i][:-4])
153 |         result_all=pd.concat([result_all,result],axis=1)        
154 |     result_all.to_csv(respath+'/result_'+name+'.csv') ##所有结果
155 | 
156 |  
157 | #%%#输出结果
158 | respath=r'E:\02实验\98ML-AC-code\result\final'
159 | path=r'E:\02实验\98ML-AC-code\result\middle\weight_all'
160 | name='all_results'
161 | get_all_result(path,respath,name)
162 | 
163 |  
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 |  


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 基于机器学习和资产特征的投资组合选择研究 (Research on portfolio selection based on machine learning and asset characteristics)
 2 | 
 3 | 李斌,屠雪永.基于机器学习和资产特征的投资组合选择研究[J/OL].系统工程理论与实践:1-27[2023-12-22]. http://kns.cnki.net/kcms/detail/11.2267.N.20231212.1114.002.html.
 4 | 
 5 | 摘要: 随着可投资资产与资产信息的爆炸式增长,投资组合选择研究面临资产和特征双重高维挑战.为此,本文提出一个基于机器学习和资产特征的投资组合选择框架,该框架借助机器学习技术的天然优势,运用高维特征直接预测投资组合权重,避开了常规的两步投资组合管理范式中的收益预测过程,并用于中国股票市场的资产配置研究.结果显示: (1)基于此框架提出的投资策略能够捕捉高维特征中的增量信息,并挖掘资产特征与投资权重之间线性与非线性关系,大幅提升了投资绩效;(2)交易摩擦类特征是投资权重预测中最为重要的资产特征;(3)策略在套利限制较为严重的股票上回报更高,而对宏观经济状态变化的敏感性较低;在其他经济约束下,策略表现依然稳健.本文拓展了现代投资组合理论的研究框架,促进了人工智能与量化投资领域的交叉融合发展. 
 6 | 
 7 | 关键词:投资组合选择;人工智能;资产特征;大维资产配置;量化投资
 8 | 
 9 | 
10 | 1.代码共包含9种策略，简单线性的OLS-AC策略和8种ML-AC策略，分别放在01-09号py脚本中，相互独立；输入数据为：资产特征char和收益ret_clean; 输出数据为：样本外的投资组合权重
11 | 
12 | 2.10号py文件为对非线性策略的投资组合权重的整理；
13 | 
14 | 3.11号py文件基于所得的投资组合权重，计算所有策略的投资绩效，包括收益、标准差、夏普比率等多种指标。
15 | 
16 | 4.数据见百度云链接: https://pan.baidu.com/s/12I_HWhi0hzpr4MeGld1BhQ?pwd=64un 提取码: 64un ； 
17 | 将char.csv和ret_clean.csv下载后放入data文件夹
18 | 


--------------------------------------------------------------------------------
/data/CH_3_update_20211231.csv:
--------------------------------------------------------------------------------
  1 | Monthly CH-3 factors,,,,
  2 | mnthdt: Month-end date,,,,
  3 | rf_mon: 1-year bank deposit rate,,,,
  4 | mktrf: the excess return on the market,,,,
  5 | SMB (Small-minus-big): Size factor,,,,
  6 | VMG(Value-minus-growth) Value factor,,,,
  7 | Unit: %,,,,
  8 | ,,,,
  9 | mnthdt,rf_mon,mktrf,SMB,VMG
 10 | 20000131,0.19,14.79,-1.61,-0.84
 11 | 20000229,0.18,11.97,1.34,-7.28
 12 | 20000331,0.19,4.32,7.72,-0.5
 13 | 20000430,0.18,1.68,-0.49,2.43
 14 | 20000531,0.19,2.2,1.95,2.06
 15 | 20000630,0.18,2.34,1.11,5.55
 16 | 20000731,0.19,4.37,1.14,1.93
 17 | 20000831,0.19,-1.75,2.1,-1.47
 18 | 20000930,0.18,-5.51,1.83,-0.6
 19 | 20001031,0.19,1.57,2.83,0.2
 20 | 20001130,0.18,5.57,1.38,1.93
 21 | 20001231,0.19,-0.89,1.89,1.69
 22 | 20010131,0.19,-1.2,0.35,2.24
 23 | 20010228,0.17,-5.78,-0.45,3.59
 24 | 20010331,0.19,6.7,1.86,-0.82
 25 | 20010430,0.18,-1.37,0.96,-1.88
 26 | 20010531,0.19,2.55,2.64,-1.8
 27 | 20010630,0.18,0.15,1.49,2.01
 28 | 20010731,0.19,-13.45,-0.19,-0.26
 29 | 20010831,0.19,-4.06,0.75,-0.98
 30 | 20010930,0.18,-4.51,-1.17,3.25
 31 | 20011031,0.19,-4.77,-1.75,-0.46
 32 | 20011130,0.18,3.46,2.35,1.41
 33 | 20011231,0.19,-6.08,-0.89,5.34
 34 | 20020131,0.19,-9.93,-3.37,4.92
 35 | 20020228,0.17,2.14,2.2,0.39
 36 | 20020331,0.17,5.53,2.05,-2.39
 37 | 20020430,0.16,3.84,1.23,-1.78
 38 | 20020531,0.17,-8.64,-1.29,-1.66
 39 | 20020630,0.16,14.23,-3.31,1.09
 40 | 20020731,0.17,-4.6,1.44,1.44
 41 | 20020831,0.17,0.73,-0.39,0.65
 42 | 20020930,0.16,-5.44,-0.89,0.65
 43 | 20021031,0.17,-5,-0.98,0.39
 44 | 20021130,0.16,-5.46,-3.21,2.42
 45 | 20021231,0.17,-5.35,-0.19,0.64
 46 | 20030131,0.17,9.36,1.38,1.74
 47 | 20030228,0.15,0.63,0.99,0.1
 48 | 20030331,0.17,-0.4,-2.39,4.15
 49 | 20030430,0.16,0.67,-5.53,7.01
 50 | 20030531,0.17,4.14,-0.49,0.46
 51 | 20030630,0.16,-6.2,-0.43,2.95
 52 | 20030731,0.17,-0.61,-2.93,2.6
 53 | 20030831,0.17,-3.81,0.81,-0.86
 54 | 20030930,0.16,-4.41,0.31,0.49
 55 | 20031031,0.17,-1.67,-3.81,9.41
 56 | 20031130,0.16,2.78,0.77,2.55
 57 | 20031231,0.17,6.16,-5.47,10.36
 58 | 20040131,0.17,5.96,3.96,0.28
 59 | 20040229,0.16,6.05,3.26,-3.37
 60 | 20040331,0.17,3.61,2.31,2.96
 61 | 20040430,0.16,-9.36,0.86,3.9
 62 | 20040531,0.17,-2.66,1.11,-1.06
 63 | 20040630,0.16,-10.2,-2.22,5.07
 64 | 20040731,0.17,-0.34,-0.81,3.15
 65 | 20040831,0.17,-3.56,0.13,4.86
 66 | 20040930,0.16,4.57,1.05,3.83
 67 | 20041031,0.17,-5.41,-0.21,4.12
 68 | 20041130,0.18,1.18,3.66,-2.9
 69 | 20041231,0.19,-6.01,-1.43,5.58
 70 | 20050131,0.19,-6.16,-1.16,2.38
 71 | 20050228,0.17,9.34,1.82,1.54
 72 | 20050331,0.19,-9.55,-3.18,4.71
 73 | 20050430,0.18,-2.25,-4.58,7.21
 74 | 20050531,0.19,-8.42,5.11,-4.68
 75 | 20050630,0.18,2,-1.49,4.93
 76 | 20050731,0.19,0.1,-6.62,8.1
 77 | 20050831,0.19,8.36,7.39,-4.38
 78 | 20050930,0.18,-0.49,3.29,2.92
 79 | 20051031,0.19,-5.75,-0.79,1.81
 80 | 20051130,0.18,0.23,1.05,-0.62
 81 | 20051231,0.19,6.07,-3.73,4.37
 82 | 20060131,0.19,8.62,-0.72,-0.42
 83 | 20060228,0.17,3.61,-1.69,-2.62
 84 | 20060331,0.19,0.65,-1.22,1.72
 85 | 20060430,0.18,11.28,-1.04,4.69
 86 | 20060531,0.19,16.03,9.16,-8.13
 87 | 20060630,0.18,2.59,5.38,3.75
 88 | 20060731,0.19,-6.88,4.05,-1.24
 89 | 20060831,0.2,3.94,-0.88,-0.16
 90 | 20060930,0.2,4.64,-1.09,-0.98
 91 | 20061031,0.21,2.65,-2.5,8.28
 92 | 20061130,0.2,14.63,-11.81,3.99
 93 | 20061231,0.21,13.93,-8.73,6.17
 94 | 20070131,0.21,13.68,11.91,1.46
 95 | 20070228,0.19,6.51,12.26,-5.9
 96 | 20070331,0.22,11.09,6.43,-1.15
 97 | 20070430,0.23,24.58,8.2,-9.3
 98 | 20070531,0.24,9.41,-1.37,0.08
 99 | 20070630,0.25,-9.22,-5.64,13.35
100 | 20070731,0.27,17.61,5.33,-3.15
101 | 20070831,0.29,15.16,-3.68,1.92
102 | 20070930,0.3,6.17,-1.35,3.51
103 | 20071031,0.32,4.83,-12.65,13.28
104 | 20071130,0.31,-15.26,10.79,3.42
105 | 20071231,0.33,11.86,9.18,-0.92
106 | 20080131,0.34,-15.7,9.04,-1.59
107 | 20080229,0.32,2.29,8.68,0.8
108 | 20080331,0.34,-19.92,0.32,-0.04
109 | 20080430,0.33,4.6,-8.91,7.63
110 | 20080531,0.34,-6.93,1.97,-5.64
111 | 20080630,0.33,-20.76,-4.65,4.09
112 | 20080731,0.34,1.65,7.43,-1.66
113 | 20080831,0.34,-15.16,-7.57,4.16
114 | 20080930,0.33,-4.69,-5.87,-4.16
115 | 20081031,0.33,-25.47,-0.05,4.42
116 | 20081130,0.28,9.37,11.22,-3.91
117 | 20081231,0.2,-2.15,9.46,3.37
118 | 20090131,0.19,9.65,7.77,0.79
119 | 20090228,0.17,4.91,2.5,-4.33
120 | 20090331,0.19,14.63,7.53,-0.92
121 | 20090430,0.18,4.17,4.1,3.93
122 | 20090531,0.19,6.09,0.72,-2.62
123 | 20090630,0.18,12.65,-7.56,2.59
124 | 20090731,0.19,15.55,-5.52,-8.08
125 | 20090831,0.19,-21.82,9.4,5.21
126 | 20090930,0.18,4.57,0.11,5.66
127 | 20091031,0.19,8.29,4.42,0.42
128 | 20091130,0.18,7.31,8.89,-0.6
129 | 20091231,0.19,2.25,3.08,3.93
130 | 20100131,0.19,-8.95,6.03,-1.24
131 | 20100228,0.17,2.3,3.37,-2.5
132 | 20100331,0.19,1.49,2.69,1.16
133 | 20100430,0.18,-7.91,0.92,0.09
134 | 20100531,0.19,-8.7,-0.18,0
135 | 20100630,0.18,-7.4,-0.91,2.99
136 | 20100731,0.19,11.27,5.41,1.17
137 | 20100831,0.19,1.78,5.51,-3.9
138 | 20100930,0.18,0.64,-1.57,-1.93
139 | 20101031,0.2,12.39,-3.54,1.57
140 | 20101130,0.2,-4.72,5.37,0.4
141 | 20101231,0.21,-1.14,-1.12,3.99
142 | 20110131,0.23,-2.75,-0.84,4.26
143 | 20110228,0.22,4.84,4.77,-3.36
144 | 20110331,0.25,-0.59,0.78,1.63
145 | 20110430,0.26,-1.51,-1.66,2.1
146 | 20110531,0.27,-6.44,-0.45,1.06
147 | 20110630,0.26,2.27,1.25,0.36
148 | 20110731,0.29,-1.62,2.32,-0.68
149 | 20110831,0.29,-4.88,1.8,0.75
150 | 20110930,0.28,-9.22,-1.96,3.68
151 | 20111031,0.29,4.03,0.14,1.04
152 | 20111130,0.28,-5.47,1.84,1.4
153 | 20111231,0.29,-7.33,-7.44,7.68
154 | 20120131,0.29,2.48,-3.56,2.01
155 | 20120229,0.27,6.97,5.45,-3.09
156 | 20120331,0.29,-7.03,-0.66,1.23
157 | 20120430,0.28,5.46,1.04,-1.68
158 | 20120531,0.29,0.06,0.77,-0.69
159 | 20120630,0.27,-5.21,-0.31,4.75
160 | 20120731,0.25,-5.76,-2.99,4.92
161 | 20120831,0.25,-2.85,6.19,-0.84
162 | 20120930,0.24,2.06,-2.09,0.15
163 | 20121031,0.25,-1.22,1.56,0.59
164 | 20121130,0.24,-6.35,-4.88,3.66
165 | 20121231,0.25,15.02,0.5,-0.9
166 | 20130131,0.25,4.93,2.27,0.73
167 | 20130228,0.23,0,3.31,0.06
168 | 20130331,0.25,-5.49,2.24,0.38
169 | 20130430,0.24,-2.5,-0.61,1.33
170 | 20130531,0.25,7.79,5.53,-3.83
171 | 20130630,0.24,-13.46,-1.68,3.55
172 | 20130731,0.25,2.65,2.75,-2.96
173 | 20130831,0.25,4.99,4.27,0.97
174 | 20130930,0.24,4.25,-0.71,-2.89
175 | 20131031,0.25,-2.65,1.26,5.47
176 | 20131130,0.24,4.18,3.74,-2.79
177 | 20131231,0.25,-4.58,1.8,3.4
178 | 20140131,0.25,-2.3,3.22,-4.83
179 | 20140228,0.23,0.53,4.33,-1.15
180 | 20140331,0.25,-2.69,1.75,4.92
181 | 20140430,0.24,-1.09,-1.39,1.91
182 | 20140531,0.25,1.06,1.26,-3.27
183 | 20140630,0.24,2.16,2.38,-2.19
184 | 20140731,0.25,7.71,0.88,1.88
185 | 20140831,0.25,1.19,2.61,-2.98
186 | 20140930,0.24,6.96,5.38,-3.32
187 | 20141031,0.25,1.54,0.69,2.64
188 | 20141130,0.24,8.83,-1.76,4.76
189 | 20141231,0.23,15.25,-17.35,15.17
190 | 20150131,0.23,0.45,7.52,-2.47
191 | 20150228,0.21,4.31,1.61,-3.06
192 | 20150331,0.21,14.92,8.01,-4.46
193 | 20150430,0.2,16.01,-0.65,-0.91
194 | 20150531,0.2,6.99,17.73,-10.28
195 | 20150630,0.18,-8.12,-2.21,8.76
196 | 20150731,0.17,-14.61,-4.17,6.71
197 | 20150831,0.16,-13.81,0.58,1.98
198 | 20150930,0.14,-4.31,3.68,2.25
199 | 20151031,0.14,14.39,7.65,-5.95
200 | 20151130,0.12,3.24,8.52,-0.04
201 | 20151231,0.13,3.17,3.8,4.21
202 | 20160131,0.13,-25.09,-5.23,5.99
203 | 20160229,0.12,-2.2,-0.2,0.83
204 | 20160331,0.13,14.52,6.77,-0.03
205 | 20160430,0.12,-2.71,1.68,1.02
206 | 20160531,0.13,-0.11,-0.71,1.58
207 | 20160630,0.12,2.21,4.52,-0.83
208 | 20160731,0.13,1.15,-1.43,5.96
209 | 20160831,0.13,3.71,0.56,-0.87
210 | 20160930,0.12,-2.54,1.39,2.08
211 | 20161031,0.13,2.48,1.39,-0.76
212 | 20161130,0.12,3.64,-1.36,1.28
213 | 20161231,0.13,-5.65,0,0.33
214 | 20170131,0.13,0.7,-3.15,2.85
215 | 20170228,0.11,2.82,1.27,0.34
216 | 20170331,0.13,-0.84,-0.55,3.36
217 | 20170430,0.12,-2.27,-3.81,4.69
218 | 20170531,0.13,-1.91,-5.16,5.68
219 | 20170630,0.12,4.14,-0.02,2.66
220 | 20170731,0.13,2.45,-1.25,4.48
221 | 20170831,0.13,2.48,-0.37,-2.28
222 | 20170930,0.12,0.62,0.93,-0.78
223 | 20171031,0.13,1.35,-3.9,3.82
224 | 20171130,0.12,-2.93,-3.13,3.78
225 | 20171231,0.13,-0.12,-1.79,2.29
226 | 20180131,0.13,3.66,-5.84,6.67
227 | 20180228,0.11,-5.19,0.37,0.19
228 | 20180331,0.13,-1.41,4.26,-6.66
229 | 20180430,0.12,-3.28,-1.25,-0.04
230 | 20180531,0.13,0.27,-1.53,4.49
231 | 20180630,0.12,-7.89,-2.86,3.71
232 | 20180731,0.13,0.9,-0.29,3.66
233 | 20180831,0.13,-5.86,-2.6,0.71
234 | 20180930,0.12,2.29,-3.01,3.09
235 | 20181031,0.13,-8.94,-1.26,2.55
236 | 20181130,0.12,0.86,4.64,-3.16
237 | 20181231,0.13,-4.16,-0.32,1.94
238 | 20190131,0.13,2.88,-4.6,6.35
239 | 20190228,0.11,15.74,5.04,-9.55
240 | 20190331,0.13,6.44,4.23,-2.81
241 | 20190430,0.12,-1.33,-2.29,3.06
242 | 20190531,0.13,-6.36,1.4,0.35
243 | 20190630,0.12,3.15,-3.22,3.98
244 | 20190731,0.13,-0.17,-3.11,0.71
245 | 20190831,0.13,-0.77,-1.55,-3.63
246 | 20190930,0.12,0.61,0.69,1.13
247 | 20191031,0.13,1.11,-2.18,1.35
248 | 20191130,0.12,-1.8,-0.81,1.83
249 | 20191231,0.13,6.86,1.11,0.21
250 | 20200131,0.13,-0.7,0.38,-3.06
251 | 20200229,0.12,-0.89,0.7,-3.72
252 | 20200331,0.13,-6.27,2.99,5.08
253 | 20200430,0.12,5.07,-4.06,0.38
254 | 20200531,0.13,0.3,1.64,0.12
255 | 20200630,0.12,8.01,-1.83,1.03
256 | 20200731,0.13,12.6,0.42,0.76
257 | 20200831,0.13,1.99,2.2,1.7
258 | 20200930,0.12,-5.79,-2.19,1.1
259 | 20201031,0.13,0.86,-1.84,1.24
260 | 20201130,0.12,3.65,-1.3,4.63
261 | 20201231,0.13,2.99,-9.23,-4.72
262 | 20210131,0.13,0.34,-9.57,3.36
263 | 20210228,0.13,-0.44,5.29,6.43
264 | 20210331,0.13,-2.93,5.88,4.87
265 | 20210430,0.13,1.77,-2.86,-4.17
266 | 20210531,0.13,5.05,0.75,-3.35
267 | 20210630,0.13,0.79,1.64,-4.6
268 | 20210731,0.13,-4.1,4.54,-3.54
269 | 20210831,0.13,2.85,4.84,6.8
270 | 20210930,0.13,-0.36,-1.35,4.94
271 | 20211031,0.13,-0.27,-3.19,-4.33
272 | 20211130,0.13,1.96,11.52,-3.8
273 | 20211231,0.13,1.01,3.67,5.07
274 | 


--------------------------------------------------------------------------------
/data/CH_4_fac_update_20211231.csv:
--------------------------------------------------------------------------------
  1 | Monthly CH-4 factors,,,,,
  2 | mnthdt: Month-end date,,,,,
  3 | rf_mon: 1-year bank deposit rate converted to monthly rate,,,,,
  4 | mktrf: the excess return on the market,,,,,
  5 | SMB (Small-minus-big): Size factor,,,,,
  6 | VMG(Value-minus-growth) Value factor,,,,,
  7 | PMO (Pessimistic-minus-Optimistic) Turnover factor,,,,,
  8 | Unit: %,,,,,
  9 | ,,,,,
 10 | mnthdt,rf_mon,mktrf,VMG,SMB,PMO
 11 | 20000131,0.19,14.79,-0.84,-1.88,-10.9
 12 | 20000229,0.18,11.97,-7.28,1.65,-1.27
 13 | 20000331,0.19,4.32,-0.5,7.85,7.93
 14 | 20000430,0.18,1.68,2.43,-0.56,2.78
 15 | 20000531,0.19,2.2,2.06,1.85,-0.93
 16 | 20000630,0.18,2.34,5.55,0.89,0.08
 17 | 20000731,0.19,4.37,1.93,1.02,-3.93
 18 | 20000831,0.19,-1.75,-1.47,2.07,2.59
 19 | 20000930,0.18,-5.51,-0.6,1.88,1.45
 20 | 20001031,0.19,1.57,0.2,2.73,-0.96
 21 | 20001130,0.18,5.57,1.93,1.26,1.18
 22 | 20001231,0.19,-0.89,1.69,1.82,0.08
 23 | 20010131,0.19,-1.2,2.24,0.31,3.27
 24 | 20010228,0.17,-5.78,3.59,-0.9,-1.15
 25 | 20010331,0.19,6.7,-0.82,1.96,-0.76
 26 | 20010430,0.18,-1.37,-1.88,1.06,1.39
 27 | 20010531,0.19,2.55,-1.8,2.78,-0.37
 28 | 20010630,0.18,0.15,2.01,1.52,4.14
 29 | 20010731,0.19,-13.45,-0.26,-0.15,0.2
 30 | 20010831,0.19,-4.06,-0.98,0.75,0.65
 31 | 20010930,0.18,-4.51,3.25,-1.53,0.78
 32 | 20011031,0.19,-4.77,-0.46,-1.8,-0.38
 33 | 20011130,0.18,3.46,1.41,2.1,2.18
 34 | 20011231,0.19,-6.08,5.34,-1.45,3.96
 35 | 20020131,0.19,-9.93,4.92,-3.78,-0.4
 36 | 20020228,0.17,2.14,0.39,2.15,-0.42
 37 | 20020331,0.17,5.53,-2.39,2.27,1.49
 38 | 20020430,0.16,3.84,-1.78,1.33,0.76
 39 | 20020531,0.17,-8.64,-1.66,-1.13,-0.09
 40 | 20020630,0.16,14.23,1.09,-3.3,3.11
 41 | 20020731,0.17,-4.6,1.44,1.16,2.33
 42 | 20020831,0.17,0.73,0.65,-0.33,0.34
 43 | 20020930,0.16,-5.44,0.65,-0.93,-0.37
 44 | 20021031,0.17,-5,0.39,-0.96,-0.51
 45 | 20021130,0.16,-5.46,2.42,-3.51,-0.65
 46 | 20021231,0.17,-5.35,0.64,-0.26,0.79
 47 | 20030131,0.17,9.36,1.74,1.25,4.48
 48 | 20030228,0.15,0.63,0.1,0.94,1.26
 49 | 20030331,0.17,-0.4,4.15,-3.03,-0.49
 50 | 20030430,0.16,0.67,7.01,-5.89,-2.88
 51 | 20030531,0.17,4.14,0.46,-0.56,0.08
 52 | 20030630,0.16,-6.2,2.95,-1.22,3.1
 53 | 20030731,0.17,-0.61,2.6,-3.41,-0.46
 54 | 20030831,0.17,-3.81,-0.86,0.95,1.12
 55 | 20030930,0.16,-4.41,0.49,0.23,0.66
 56 | 20031031,0.17,-1.67,9.41,-4.87,5.63
 57 | 20031130,0.16,2.78,2.55,0.31,-1.98
 58 | 20031231,0.17,6.16,10.36,-7.22,4.06
 59 | 20040131,0.17,5.96,0.28,3.66,3.93
 60 | 20040229,0.16,6.05,-3.37,3.81,1.45
 61 | 20040331,0.17,3.61,2.96,2,2.15
 62 | 20040430,0.16,-9.36,3.9,0.34,0.84
 63 | 20040531,0.17,-2.66,-1.06,1.33,0.95
 64 | 20040630,0.16,-10.2,5.07,-2.95,0.5
 65 | 20040731,0.17,-0.34,3.15,-1.05,0.35
 66 | 20040831,0.17,-3.56,4.86,-1.01,3.2
 67 | 20040930,0.16,4.57,3.83,0.5,-1.48
 68 | 20041031,0.17,-5.41,4.12,-0.81,1.84
 69 | 20041130,0.18,1.18,-2.9,3.98,2.81
 70 | 20041231,0.19,-6.01,5.58,-2.03,3.39
 71 | 20050131,0.19,-6.16,2.38,-1.42,-1.09
 72 | 20050228,0.17,9.34,1.54,1.72,-0.87
 73 | 20050331,0.19,-9.55,4.71,-3.9,3
 74 | 20050430,0.18,-2.25,7.21,-5.36,-2.42
 75 | 20050531,0.19,-8.42,-4.68,5.65,2.91
 76 | 20050630,0.18,2,4.93,-2.09,0.26
 77 | 20050731,0.19,0.1,8.1,-7.7,-3.56
 78 | 20050831,0.19,8.36,-4.38,8.09,1.74
 79 | 20050930,0.18,-0.49,2.92,3.05,1.36
 80 | 20051031,0.19,-5.75,1.81,-0.21,3.78
 81 | 20051130,0.18,0.23,-0.62,1.48,2.63
 82 | 20051231,0.19,6.07,4.37,-4.69,0.65
 83 | 20060131,0.19,8.62,-0.42,-0.77,2.1
 84 | 20060228,0.17,3.61,-2.62,-1.67,2.65
 85 | 20060331,0.19,0.65,1.72,-1.02,-2.85
 86 | 20060430,0.18,11.28,4.69,-1.07,-3.19
 87 | 20060531,0.19,16.03,-8.13,9.7,7.51
 88 | 20060630,0.18,2.59,3.75,4.76,-1.08
 89 | 20060731,0.19,-6.88,-1.24,4.23,1.69
 90 | 20060831,0.2,3.94,-0.16,-0.72,0.04
 91 | 20060930,0.2,4.64,-0.98,-0.63,3.44
 92 | 20061031,0.21,2.65,8.28,-3.59,1.71
 93 | 20061130,0.2,14.63,3.99,-12.64,-1.11
 94 | 20061231,0.21,13.93,6.17,-9.55,-1.84
 95 | 20070131,0.21,13.68,1.46,12.3,7.74
 96 | 20070228,0.19,6.51,-5.9,11.81,-8.69
 97 | 20070331,0.22,11.09,-1.15,6.39,-2.22
 98 | 20070430,0.23,24.58,-9.3,6.99,-12.46
 99 | 20070531,0.24,9.41,0.08,-0.91,3.2
100 | 20070630,0.25,-9.22,13.35,-6.53,11.85
101 | 20070731,0.27,17.61,-3.15,6.26,0.93
102 | 20070831,0.29,15.16,1.92,-4.49,1.11
103 | 20070930,0.3,6.17,3.51,-2.35,1.07
104 | 20071031,0.32,4.83,13.28,-12.72,11.76
105 | 20071130,0.31,-15.26,3.42,10.47,0.56
106 | 20071231,0.33,11.86,-0.92,8.79,2.68
107 | 20080131,0.34,-15.7,-1.59,9.14,0.86
108 | 20080229,0.32,2.29,0.8,8.41,0.41
109 | 20080331,0.34,-19.92,-0.04,0.17,1.55
110 | 20080430,0.33,4.6,7.63,-10.48,-4.45
111 | 20080531,0.34,-6.93,-5.64,3.13,4.11
112 | 20080630,0.33,-20.76,4.09,-5.27,2.27
113 | 20080731,0.34,1.65,-1.66,8.06,0.07
114 | 20080831,0.34,-15.16,4.16,-8,6.63
115 | 20080930,0.33,-4.69,-4.16,-5.08,1.85
116 | 20081031,0.33,-25.47,4.42,-0.6,1.52
117 | 20081130,0.28,9.37,-3.91,10.8,6.31
118 | 20081231,0.2,-2.15,3.37,9.09,2.43
119 | 20090131,0.19,9.65,0.79,6.23,-6.87
120 | 20090228,0.17,4.91,-4.33,3.24,3.2
121 | 20090331,0.19,14.63,-0.92,7.26,-3.88
122 | 20090430,0.18,4.17,3.93,3.3,1.04
123 | 20090531,0.19,6.09,-2.62,1.08,3.15
124 | 20090630,0.18,12.65,2.59,-8.13,1.4
125 | 20090731,0.19,15.55,-8.08,-4.1,4.41
126 | 20090831,0.19,-21.82,5.21,8.61,-1.59
127 | 20090930,0.18,4.57,5.66,-0.69,3.09
128 | 20091031,0.19,8.29,0.42,4.41,0.86
129 | 20091130,0.18,7.31,-0.6,9.4,2.2
130 | 20091231,0.19,2.25,3.93,2.69,3.88
131 | 20100131,0.19,-8.95,-1.24,6.82,1.66
132 | 20100228,0.17,2.3,-2.5,3.82,-0.07
133 | 20100331,0.19,1.49,1.16,2.28,0.21
134 | 20100430,0.18,-7.91,0.09,1.73,5.8
135 | 20100531,0.19,-8.7,0,-0.33,-1.66
136 | 20100630,0.18,-7.4,2.99,-1.73,1.73
137 | 20100731,0.19,11.27,1.17,4.96,-0.83
138 | 20100831,0.19,1.78,-3.9,6.97,-1.73
139 | 20100930,0.18,0.64,-1.93,-1.17,-1.54
140 | 20101031,0.2,12.39,1.57,-4.73,-5.34
141 | 20101130,0.2,-4.72,0.4,3.52,10.41
142 | 20101231,0.21,-1.14,3.99,-1.68,-0.62
143 | 20110131,0.23,-2.75,4.26,-1.85,-1.02
144 | 20110228,0.22,4.84,-3.36,5.25,0.26
145 | 20110331,0.25,-0.59,1.63,-0.31,1.25
146 | 20110430,0.26,-1.51,2.1,-2.48,-0.41
147 | 20110531,0.27,-6.44,1.06,-0.96,0.29
148 | 20110630,0.26,2.27,0.36,1.23,0.59
149 | 20110731,0.29,-1.62,-0.68,2.54,2.9
150 | 20110831,0.29,-4.88,0.75,1.78,0.97
151 | 20110930,0.28,-9.22,3.68,-2.6,4.77
152 | 20111031,0.29,4.03,1.04,0,-2.33
153 | 20111130,0.28,-5.47,1.4,1.89,1.44
154 | 20111231,0.29,-7.33,7.68,-8.56,3.35
155 | 20120131,0.29,2.48,2.01,-4.26,2.17
156 | 20120229,0.27,6.97,-3.09,5.79,0.92
157 | 20120331,0.29,-7.03,1.23,-0.59,2.83
158 | 20120430,0.28,5.46,-1.68,1.58,2.84
159 | 20120531,0.29,0.06,-0.69,1,-1.63
160 | 20120630,0.27,-5.21,4.75,-1.08,0.97
161 | 20120731,0.25,-5.76,4.92,-3.51,-0.94
162 | 20120831,0.25,-2.85,-0.84,6.32,2.34
163 | 20120930,0.24,2.06,0.15,-2.36,-0.35
164 | 20121031,0.25,-1.22,0.59,1.25,2.44
165 | 20121130,0.24,-6.35,3.66,-4.89,-1.58
166 | 20121231,0.25,15.02,-0.9,1,0.6
167 | 20130131,0.25,4.93,0.73,1.65,3
168 | 20130228,0.23,0,0.06,3.19,2.75
169 | 20130331,0.25,-5.49,0.38,1.48,4.02
170 | 20130430,0.24,-2.5,1.33,-0.67,0.5
171 | 20130531,0.25,7.79,-3.83,6.1,-1.3
172 | 20130630,0.24,-13.46,3.55,-2.27,-0.31
173 | 20130731,0.25,2.65,-2.96,3.53,-3.65
174 | 20130831,0.25,4.99,0.97,3.78,2.8
175 | 20130930,0.24,4.25,-2.89,-0.42,-3.41
176 | 20131031,0.25,-2.65,5.47,0.89,2.49
177 | 20131130,0.24,4.18,-2.79,4.28,-0.45
178 | 20131231,0.25,-4.58,3.4,1.31,1.12
179 | 20140131,0.25,-2.3,-4.83,4.22,-1.32
180 | 20140228,0.23,0.53,-1.15,4.3,2.89
181 | 20140331,0.25,-2.69,4.92,0.86,6.31
182 | 20140430,0.24,-1.09,1.91,-1.64,0.29
183 | 20140531,0.25,1.06,-3.27,1.7,2.15
184 | 20140630,0.24,2.16,-2.19,2.89,0.86
185 | 20140731,0.25,7.71,1.88,0.27,2.15
186 | 20140831,0.25,1.19,-2.98,3.05,3.13
187 | 20140930,0.24,6.96,-3.32,5.73,-2.45
188 | 20141031,0.25,1.54,2.64,0.03,-0.89
189 | 20141130,0.24,8.83,4.76,-2.89,-3.7
190 | 20141231,0.23,15.25,15.17,-17.2,-20.19
191 | 20150131,0.23,0.45,-2.47,5.15,11.34
192 | 20150228,0.21,4.31,-3.06,1.76,3.07
193 | 20150331,0.21,14.92,-4.46,8.78,1.06
194 | 20150430,0.2,16.01,-0.91,-0.28,1.48
195 | 20150531,0.2,6.99,-10.28,18.41,12.75
196 | 20150630,0.18,-8.12,8.76,-3.72,0.75
197 | 20150731,0.17,-14.61,6.71,-4.88,4.02
198 | 20150831,0.16,-13.81,1.98,0.15,0.8
199 | 20150930,0.14,-4.31,2.25,3.54,0.25
200 | 20151031,0.14,14.39,-5.95,7.35,-8.57
201 | 20151130,0.12,3.24,-0.04,7.9,-3.5
202 | 20151231,0.13,3.17,4.21,3.82,3.71
203 | 20160131,0.13,-25.09,5.99,-4.81,7.92
204 | 20160229,0.12,-2.2,0.83,-0.04,1.71
205 | 20160331,0.13,14.52,-0.03,5.65,-7.05
206 | 20160430,0.12,-2.71,1.02,1.81,3.3
207 | 20160531,0.13,-0.11,1.58,-1.02,-0.41
208 | 20160630,0.12,2.21,-0.83,4.32,-4.37
209 | 20160731,0.13,1.15,5.96,-1.59,8.61
210 | 20160831,0.13,3.71,-0.87,0.89,2.12
211 | 20160930,0.12,-2.54,2.08,1.06,0.11
212 | 20161031,0.13,2.48,-0.76,1.53,0.21
213 | 20161130,0.12,3.64,1.28,-1.79,-0.52
214 | 20161231,0.13,-5.65,0.33,-0.14,-0.95
215 | 20170131,0.13,0.7,2.85,-3.34,-2.78
216 | 20170228,0.11,2.82,0.34,1,1.85
217 | 20170331,0.13,-0.84,3.36,-1.01,-0.93
218 | 20170430,0.12,-2.27,4.69,-4.12,-1.92
219 | 20170531,0.13,-1.91,5.68,-5.79,-3.06
220 | 20170630,0.12,4.14,2.66,-0.78,2.37
221 | 20170731,0.13,2.45,4.48,-2.03,0.84
222 | 20170831,0.13,2.48,-2.28,-0.15,1.82
223 | 20170930,0.12,0.62,-0.78,1.1,0.06
224 | 20171031,0.13,1.35,3.82,-4.4,4.63
225 | 20171130,0.12,-2.93,3.78,-3.88,-0.48
226 | 20171231,0.13,-0.12,2.29,-1.78,-1.26
227 | 20180131,0.13,3.66,6.67,-6.76,-1.69
228 | 20180228,0.11,-5.19,0.19,0.03,1.27
229 | 20180331,0.13,-1.41,-6.66,4.8,2.69
230 | 20180430,0.12,-3.28,-0.04,-1.21,0.46
231 | 20180531,0.13,0.27,4.49,-1.77,2.03
232 | 20180630,0.12,-7.89,3.71,-3.46,-0.4
233 | 20180731,0.13,0.9,3.66,-0.9,4.08
234 | 20180831,0.13,-5.86,0.71,-2.53,2.43
235 | 20180930,0.12,2.29,3.09,-3.24,-0.82
236 | 20181031,0.13,-8.94,2.55,-1.47,1.8
237 | 20181130,0.12,0.86,-3.16,5.06,2.2
238 | 20181231,0.13,-4.16,1.94,-0.25,2.44
239 | 20190131,0.13,2.88,6.35,-5.11,3.26
240 | 20190228,0.11,15.74,-9.55,5.69,-7.22
241 | 20190331,0.13,6.44,-2.81,4.66,0.51
242 | 20190430,0.12,-1.33,3.06,-2.45,2.6
243 | 20190531,0.13,-6.36,0.35,1.69,3.71
244 | 20190630,0.12,3.15,3.98,-3.8,0.86
245 | 20190731,0.13,-0.17,0.71,-3.08,1.18
246 | 20190831,0.13,-0.77,-3.63,-0.74,-3.34
247 | 20190930,0.12,0.61,1.13,0.53,0.49
248 | 20191031,0.13,1.11,1.35,-2.12,0.69
249 | 20191130,0.12,-1.8,1.83,-1.06,-0.3
250 | 20191231,0.13,6.86,0.21,1,-0.41
251 | 20200131,0.13,-0.7,-3.06,0.81,-5.27
252 | 20200229,0.12,-0.89,-3.72,1.12,-6.38
253 | 20200331,0.13,-6.27,5.08,2.39,7.63
254 | 20200430,0.12,5.07,0.38,-3.77,-0.57
255 | 20200531,0.13,0.3,0.12,1.67,0.06
256 | 20200630,0.12,8.01,1.03,-1.7,-2.03
257 | 20200731,0.13,12.6,0.76,0.1,-2.07
258 | 20200831,0.13,1.99,1.7,2.14,1.28
259 | 20200930,0.12,-5.79,1.1,-2.14,-1.08
260 | 20201031,0.13,0.86,1.24,-1.78,-3.52
261 | 20201130,0.12,3.65,4.63,-1.8,1.99
262 | 20201231,0.13,2.99,-4.72,-7.99,-2.76
263 | 20210131,0.13,0.34,3.36,-9.07,-2.17
264 | 20210228,0.13,-0.44,6.43,4.55,1.67
265 | 20210331,0.13,-2.93,4.87,5.35,1.15
266 | 20210430,0.13,1.77,-4.17,-2.74,2.93
267 | 20210531,0.13,5.05,-3.35,0.73,-1.4
268 | 20210630,0.13,0.79,-4.6,2.24,1.39
269 | 20210731,0.13,-4.1,-3.54,4.96,-3.96
270 | 20210831,0.13,2.85,6.8,3.92,-0.44
271 | 20210930,0.13,-0.36,4.94,-1.7,5.03
272 | 20211031,0.13,-0.27,-4.33,-2.57,6.3
273 | 20211130,0.13,1.96,-3.8,11.49,4.85
274 | 20211231,0.13,1.01,5.07,3.11,2.24
275 | 


--------------------------------------------------------------------------------
/pytorchtools_change.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 |  
 4 | class EarlyStoppings:
 5 |     """Early stops the training if validation loss doesn't improve after a given patience."""
 6 |     def __init__(self,para_list, patience=7, verbose=False, delta=0):
 7 |         """
 8 |         Args:
 9 |             patience (int): How long to wait after last time validation loss improved.
10 |                             上次验证集损失值改善后等待几个epoch
11 |                             Default: 7
12 |             verbose (bool): If True, prints a message for each validation loss improvement.
13 |                             如果是True，为每个验证集损失值改善打印一条信息
14 |                             Default: False
15 |             delta (float): Minimum change in the monitored quantity to qualify as an improvement.
16 |                             监测数量的最小变化，以符合改进的要求
17 |                             Default: 0
18 |         """
19 |         self.patience = patience
20 |         self.verbose = verbose
21 |         self.counter = 0
22 |         self.best_score = None
23 |         self.early_stop = False
24 |         self.val_loss_min = np.Inf
25 |         self.delta = delta
26 |  
27 |     def __call__(self, val_loss, model,para_list):
28 |  
29 |         score = -val_loss
30 |  
31 |         if self.best_score is None:
32 |             self.best_score = score
33 |             self.save_checkpoint(val_loss, model,para_list)
34 |         elif score < self.best_score + self.delta:
35 |             self.counter += 1
36 |             # print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
37 |             if self.counter >= self.patience:
38 |                 self.early_stop = True
39 |         else:
40 |             self.best_score = score
41 |             self.save_checkpoint(val_loss, model,para_list)
42 |             self.counter = 0
43 |  
44 |     def save_checkpoint(self, val_loss, model,para_list):
45 |         '''
46 |         Saves model when validation loss decrease.
47 |         验证损失减少时保存模型。
48 |         '''
49 |         if self.verbose:
50 |             print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
51 |         torch.save(model.state_dict(),para_list+'checkpoint.pt') # 这里会存储迄今最优模型的参数
52 |         # torch.save(model, 'finish_model.pkl') # 这里会存储迄今最优的模型
53 |         self.val_loss_min = val_loss


--------------------------------------------------------------------------------