├── Simu_Matlab ├── loss.m ├── f_grad.m ├── soft_threshodr.m ├── soft_threshodl.m ├── soft_threshoda.m ├── fw2.m ├── soft_threshode.m ├── fw1.m ├── lossh.m ├── sq.m ├── f_gradh.m ├── soft_threshodg.m ├── proximal.m ├── cut_knots_degree2.m ├── proximalH.m ├── pls.m ├── MoreRets.m ├── DGP.m ├── MC_Tree.m └── MC_Reg.m └── README.md /Simu_Matlab/loss.m: -------------------------------------------------------------------------------- 1 | function m = loss(y,yhat) 2 | %mean 3 | % 4 | m=mean(power(yhat-y,2)); 5 | end 6 | 7 | -------------------------------------------------------------------------------- /Simu_Matlab/f_grad.m: -------------------------------------------------------------------------------- 1 | function grad = f_grad(XX,XY,w) 2 | %f_grad 3 | % 4 | grad = XX*w-XY; 5 | end 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML_Codes 2 | Simulation Codes 3 | %%% This Version: April 2019. @copyright Shihao Gu, Bryan Kelly and Dacheng Xiu 4 | -------------------------------------------------------------------------------- /Simu_Matlab/soft_threshodr.m: -------------------------------------------------------------------------------- 1 | function val=soft_threshodr(groups,nc,w,mu) 2 | %soft_threshodr 3 | % 4 | val=w/(1+mu); 5 | end 6 | 7 | -------------------------------------------------------------------------------- /Simu_Matlab/soft_threshodl.m: -------------------------------------------------------------------------------- 1 | function val = soft_threshodl(groups,nc,w,mu) 2 | %soft_threshodl 3 | % 4 | val=sign(w).*max(abs(w)-mu,0); 5 | end 6 | 7 | -------------------------------------------------------------------------------- /Simu_Matlab/soft_threshoda.m: -------------------------------------------------------------------------------- 1 | function val=soft_threshoda(w,alpha,mu) 2 | %soft_threshoda 3 | % 4 | val=sign(w).*max(abs(w)-alpha*mu,0)/(1+alpha*mu); 5 | end 6 | 7 | -------------------------------------------------------------------------------- /Simu_Matlab/fw2.m: -------------------------------------------------------------------------------- 1 | function p = fw2(x) 2 | %fw2 find the position of maximum of a matrix 3 | maximum=max(max(x)); 4 | [X,Y]=find(x==maximum); 5 | p=[X,Y]; 6 | end 7 | 8 | -------------------------------------------------------------------------------- /Simu_Matlab/soft_threshode.m: -------------------------------------------------------------------------------- 1 | function val=soft_threshode(groups,nc,w,mu) 2 | %soft_threshoda 3 | % 4 | val=sign(w).*max(abs(w)-0.5*mu,0)/(1+0.5*mu); 5 | end 6 | 7 | 8 | -------------------------------------------------------------------------------- /Simu_Matlab/fw1.m: -------------------------------------------------------------------------------- 1 | function p = fw1(x) 2 | %fw1 find the maximum location of a vector 3 | % 4 | maximum=max(x); 5 | p=find(x==maximum); 6 | if length(p)>1 7 | p=p(1); 8 | end 9 | end 10 | 11 | -------------------------------------------------------------------------------- /Simu_Matlab/lossh.m: -------------------------------------------------------------------------------- 1 | function m=lossh(y,yhat,mu) 2 | %lossh 3 | % 4 | r=abs(yhat-y); 5 | l=zeros(length(r),1); 6 | ind=(r>mu); 7 | l(ind)=2*mu.*r(ind)-mu.*mu; 8 | ind=(r<=mu); 9 | l(ind)=r(ind).*r(ind); 10 | m=mean(l); 11 | end 12 | 13 | -------------------------------------------------------------------------------- /Simu_Matlab/sq.m: -------------------------------------------------------------------------------- 1 | function r = sq(a,b,step) 2 | %sq 3 | % 4 | r=[]; 5 | new=a; 6 | r(end+1)=a; 7 | for i=1:10000 8 | new=new+step; 9 | if new<=b 10 | r=r+[new]; 11 | else 12 | break 13 | end 14 | end 15 | end 16 | 17 | -------------------------------------------------------------------------------- /Simu_Matlab/f_gradh.m: -------------------------------------------------------------------------------- 1 | function grad = f_gradh(w,X,y,mu) 2 | %f_gradh 3 | % 4 | r = X*w-y; 5 | ind0 = find(abs(r)<=mu); 6 | ind1 = find(r>mu); 7 | indf1 = find(r<-mu); 8 | grad = X(ind0,:).'*(X(ind0,:)*w-y(ind0))+mu*X(ind1,:).'*ones(length(ind1),1)-mu*X(indf1,:).'*ones(length(indf1),1); 9 | end 10 | -------------------------------------------------------------------------------- /Simu_Matlab/soft_threshodg.m: -------------------------------------------------------------------------------- 1 | function w1=soft_threshodg(groups,nc,w,mu) 2 | %soft_threshodg 3 | % 4 | w1=w; 5 | for i=1:nc 6 | ind=(groups==i); 7 | wg=w1(ind,:); 8 | nn=size(wg,1); 9 | n2=sqrt(sum(power(wg,2))); 10 | if n2<=mu 11 | w1(ind,:)=zeros(nn,1); 12 | else 13 | w1(ind,:)=wg-mu*wg/n2; 14 | end 15 | end 16 | end 17 | 18 | -------------------------------------------------------------------------------- /Simu_Matlab/proximal.m: -------------------------------------------------------------------------------- 1 | function v = proximal(groups,nc,XX,XY,tol,L,l2,func) 2 | %proximal 3 | % 4 | dim=size(XX,1); 5 | max_iter =30000; 6 | gamma=1/L; 7 | l1=l2; 8 | % how to create float number 9 | w=zeros(dim,1); 10 | v=w; 11 | for t=0:max_iter-1 12 | vold=v; 13 | w_prev=w; 14 | w=v-gamma*f_grad(XX,XY,v); 15 | w=func(groups,nc,w,l1*gamma); 16 | v=w+t/(t+3)*(w-w_prev); 17 | if (sum(power(v-vold,2)) < (sum(power(vold,2))*tol) || sum(abs(v-vold))==0) 18 | break 19 | end 20 | end 21 | end 22 | 23 | -------------------------------------------------------------------------------- /Simu_Matlab/cut_knots_degree2.m: -------------------------------------------------------------------------------- 1 | function resultfinal = cut_knots_degree2(x,n,th) 2 | %cut_knots_degree2 3 | % do we need to make copies of matrix 4 | % use for loop to make copies 5 | [a,b] = size(x); 6 | resultfinal = zeros(a,b*(n+1)); 7 | for i=1:b 8 | xcut = x(:,i); 9 | xcutnona=xcut; 10 | xcutnona(isnan(xcutnona))=0; 11 | index=((1-1*isnan(xcut))==1); 12 | 13 | t=th(:,i); 14 | 15 | x1=xcutnona; 16 | resultfinal(:,(n+1)*i-n)=x1-mean(x1); 17 | x1=power(xcutnona-t(1),2); 18 | resultfinal(:,(n+1)*i-n+1)=x1-mean(x1); 19 | 20 | for j=1:(n-1) 21 | x1=power(xcutnona-t(j+1),2).*(xcutnona>=t(j+1)); 22 | resultfinal(:,(n+1)*i-n+1+j)=x1-mean(x1); 23 | end 24 | end 25 | end 26 | 27 | -------------------------------------------------------------------------------- /Simu_Matlab/proximalH.m: -------------------------------------------------------------------------------- 1 | 2 | function a=proximalH(groups,nc,xtest,mtrain,ytest,w,X,y,mu,tol,L,l2,func) 3 | %proximalH 4 | % 5 | dim=size(X,1); 6 | max_iter=3000; 7 | gamma=1/L; 8 | l1=l2; 9 | v=w; 10 | yhatbig1=xtest*w+mtrain; 11 | r20=lossh(yhatbig1,ytest,mu); 12 | for t=0:max_iter-1 13 | vold=v; 14 | w_perv=w; 15 | w=v-gamma*f_gradh(v,X,y,mu); 16 | mu1=l1*gamma; 17 | w=func(groups,nc,w,mu1); 18 | v=w+t/(t+3)*(w-w_perv); 19 | if (sum(power(v-vold,2)) < (sum(power(vold,2))*tol) || sum(abs(v-vold))==0) 20 | break 21 | end 22 | %yhatbig1=xtest*v+mtrain; 23 | %r2=lossh(yhatbig1,ytest,mu); 24 | %if r20 26 | v=v-V(:,1:(i))*(V(:,1:(i)).'*p); 27 | u=u-TT(:,1:(i))*(TT(:,1:(i)).'*u); 28 | end 29 | v=v/sqrt(v.'*v); 30 | s=s-v*(v.'*s); 31 | 32 | R(:,i+1)=r; 33 | TT(:,i+1)=t; 34 | P(:,i+1)=p; 35 | U(:,i+1)=u; 36 | V(:,i+1)=v; 37 | Q(:,i+1)=q; 38 | end 39 | 40 | for i=0:(A-2) 41 | C=R(:,1:(i+1))*Q(:,1:(i+1)).'; 42 | B(:,i+2)=C(:,1); 43 | end 44 | end 45 | 46 | -------------------------------------------------------------------------------- /Simu_Matlab/MoreRets.m: -------------------------------------------------------------------------------- 1 | %%% This Version: July 30, 2018. @copyright Shihao Gu, Bryan Kelly and Dacheng Xiu 2 | %%% If you use these codes, please cite the paper "Empirical Asset Pricing via Machine Learning" (2018) and "Autoencoder Asset Pricing Models." (2019) 3 | 4 | %%% Generate quarterly halfyear and annually returns %%% 5 | 6 | path='./Simu'; % set your own folder path 7 | name1='/SimuData_p50'; % Case Pc=50 8 | name2='/SimuData_p100'; % Case Pc=100 9 | 10 | for name=string({name1,name2}) 11 | for mo=[1,2] 12 | for M=1:1 13 | disp(M) 14 | 15 | dirstock=sprintf('%s',path,name); 16 | dirstock=sprintf('%s',dirstock,'/'); 17 | path2=sprintf('%s',dirstock,'r'); 18 | path2=sprintf('%s%d_%d',path2,mo,M); 19 | path2=sprintf('%s',path2,'.csv'); 20 | r=csvread(path2,0,0); 21 | r3=zeros(length(r),1); 22 | r6=zeros(length(r),1); 23 | r12=zeros(length(r),1); 24 | 25 | per=repmat(1:200,1,180); 26 | time=repelem(1:180,200); 27 | u=unique(per); 28 | for i=1:length(u) 29 | ind=(per==u(i)); 30 | ret=r(ind); 31 | 32 | 33 | ret3=zeros(length(ret),1); 34 | N=length(ret3); 35 | for j=1:(N-2) 36 | ret3(j)=sum(ret(j:(j+2))); 37 | end 38 | r3(ind)=ret3; 39 | 40 | 41 | ret6=zeros(length(ret),1); 42 | N=length(ret6); 43 | for j=1:(N-5) 44 | ret6(j)=sum(ret(j:(j+5))); 45 | end 46 | r6(ind)=ret6; 47 | 48 | 49 | ret12=zeros(length(ret),1); 50 | N=length(ret12); 51 | for j=1:(N-11) 52 | ret12(j)=sum(ret(j:(j+11))); 53 | end 54 | r12(ind)=ret12; 55 | 56 | end 57 | K=200*180; 58 | a=(1:K)'; 59 | df=[a;r]; 60 | % df=mat2dataset(df,'VarNames',{'a','r'}); 61 | pathr=sprintf('%s',dirstock,'r'); 62 | pathr=sprintf('%s%d_%d_%d',pathr,mo,M,1); 63 | pathr=sprintf('%s',pathr,'.csv'); 64 | csvwrite(pathr,df); 65 | 66 | df=[a;r3]; 67 | % df=mat2dataset(df,'VarNames',{'a','r3'}); 68 | pathr=sprintf('%s',dirstock,'r'); 69 | pathr=sprintf('%s%d_%d_%d',pathr,mo,M,3); 70 | pathr=sprintf('%s',pathr,'.csv'); 71 | csvwrite(pathr,df); 72 | 73 | df=[a;r6]; 74 | % df=mat2dataset(df,'VarNames',{'a','r6'}); 75 | pathr=sprintf('%s',dirstock,'r'); 76 | pathr=sprintf('%s%d_%d_%d',pathr,mo,M,6); 77 | pathr=sprintf('%s',pathr,'.csv'); 78 | csvwrite(pathr,df); 79 | 80 | df=[a;r12]; 81 | % df=mat2dataset(df,'VarNames',{'a','r12'}); 82 | pathr=sprintf('%s',dirstock,'r'); 83 | pathr=sprintf('%s%d_%d_%d',pathr,mo,M,12); 84 | pathr=sprintf('%s',pathr,'.csv'); 85 | csvwrite(pathr,df); 86 | end 87 | end 88 | end -------------------------------------------------------------------------------- /Simu_Matlab/DGP.m: -------------------------------------------------------------------------------- 1 | %%% This Version: July 30, 2018. @copyright Shihao Gu, Bryan Kelly and Dacheng Xiu 2 | %%% If you use these codes, please cite the paper "Empirical Asset Pricing via Machine Learning" (2018) and "Autoencoder Asset Pricing Models." (2019) 3 | 4 | %%% Generate Simulation Datasets %%% 5 | 6 | for M=1:1 7 | path='./Simu'; % set your own folder path 8 | 9 | name1='/SimuData_p50'; % Case Pc=50 10 | name2='/SimuData_p100'; % Case Pc=100 11 | mkdir(path); 12 | mkdir(sprintf('%s',path,name1)); 13 | mkdir(sprintf('%s',path,name2)); 14 | 15 | %%% Case Pc=100 %%% 16 | N=200; 17 | m=100; 18 | T=180; 19 | stdv=0.05; 20 | theta_w=0.02; 21 | stde=0.05; 22 | 23 | rho=unifrnd(0.9,1,[m,1]); 24 | c=zeros(N*T,m); 25 | for i=1:m 26 | x=zeros(N,T); 27 | x(:,1)=normrnd(0,1,[N,1]); 28 | for t=2:T 29 | x(:,t)=rho(i).*x(:,t-1)+normrnd(0,1,[N,1]).*sqrt(1-rho(i)^2); 30 | end 31 | [~,r]=sort(x); 32 | szx=size(x); 33 | x1=zeros(szx); 34 | ridx=1:szx(1); 35 | for k=1:szx(2) 36 | x1(r(:,k),k)=ridx*2/(N+1)-1; 37 | end 38 | c(:,i)=x1(:); 39 | end 40 | 41 | per=repmat(1:N,1,T); 42 | time=repelem(1:T,N); 43 | vt=normrnd(0,1,[3,T])*stdv; 44 | beta=c(:,[1,2,3]); 45 | betav=zeros(N*T,1); 46 | for t=1:T 47 | ind=(time==t); 48 | betav(ind)=beta(ind,:)*vt(:,t); 49 | end 50 | 51 | y=zeros(T,1); 52 | y(1)=normrnd(0,1); 53 | q=0.95; 54 | for t=2:T 55 | y(t)=q*y(t-1)+normrnd(0,1)*sqrt(1-q^2); 56 | end 57 | 58 | cy=c; 59 | for t=1:T 60 | ind=(time==t); 61 | cy(ind,:)=c(ind,:)*y(t); 62 | end 63 | 64 | ep=trnd(5,[N*T,1])*stde; 65 | 66 | 67 | %%% Model 1 68 | theta=[1,1,repelem(0,m-2),0,0,1,repelem(0,m-3)]*theta_w; 69 | r1=horzcat(c,cy)*theta'+betav+ep; 70 | rt=horzcat(c,cy)*theta'; 71 | %disp(1-sum((r1-rt).^2)/sum((r1-mean(r1)).^2)); 72 | 73 | pathc=sprintf('%s',path,name2); 74 | pathc=sprintf('%s',pathc,'/c'); 75 | pathc=sprintf('%s%d',pathc,M); 76 | pathc=sprintf('%s',pathc,'.csv'); 77 | csvwrite(pathc,horzcat(c,cy)); 78 | 79 | pathr=sprintf('%s',path,name2); 80 | pathr=sprintf('%s',pathr,'/r1'); 81 | pathr=sprintf('%s_%d',pathr,M); 82 | pathr=sprintf('%s',pathr,'.csv'); 83 | csvwrite(pathr,r1); 84 | 85 | 86 | 87 | %%% Model 2 88 | theta=[1,1,repelem(0,m-2),0,0,1,repelem(0,m-3)]*theta_w; 89 | z=horzcat(c,cy); 90 | z(:,1)=c(:,1).^2*2; 91 | z(:,2)=c(:,1).*c(:,2)*1.5; 92 | z(:,m+3)=sign(cy(:,3))*0.6; 93 | 94 | r1=z*theta'+betav+ep; 95 | rt=z*theta'; 96 | %disp(1-sum((r1-rt).^2)/sum((r1-mean(r1)).^2)); 97 | 98 | pathr=sprintf('%s',path,name2); 99 | pathr=sprintf('%s',pathr,'/r2'); 100 | pathr=sprintf('%s_%d',pathr,M); 101 | pathr=sprintf('%s',pathr,'.csv'); 102 | csvwrite(pathr,r1); 103 | 104 | 105 | 106 | %%% Case Pc=50 %%% 107 | 108 | m=50; 109 | 110 | %%% MOdel 1 111 | 112 | theta=[1,1,repelem(0,m-2),0,0,1,repelem(0,m-3)]*theta_w; 113 | r1=horzcat(c(:,1:m),cy(:,1:m))*theta'+betav+ep; 114 | rt=horzcat(c(:,1:m),cy(:,1:m))*theta'; 115 | %disp(1-sum((r1-rt).^2)/sum((r1-mean(r1)).^2)); 116 | 117 | pathc=sprintf('%s',path,name1); 118 | pathc=sprintf('%s',pathc,'/c'); 119 | pathc=sprintf('%s%d',pathc,M); 120 | pathc=sprintf('%s',pathc,'.csv'); 121 | csvwrite(pathc,horzcat(c(:,1:m),cy(:,1:m))); 122 | 123 | pathr=sprintf('%s',path,name1); 124 | pathr=sprintf('%s',pathr,'/r1'); 125 | pathr=sprintf('%s_%d',pathr,M); 126 | pathr=sprintf('%s',pathr,'.csv'); 127 | csvwrite(pathr,r1); 128 | 129 | 130 | %%% Model 2 131 | 132 | theta=[1,1,repelem(0,m-2),0,0,1,repelem(0,m-3)]*theta_w; 133 | z=horzcat(c(:,1:m),cy(:,1:m)); 134 | z(:,1)=c(:,1).^2*2; 135 | z(:,2)=c(:,1).*c(:,2)*1.5; 136 | z(:,m+3)=sign(cy(:,3))*0.6; 137 | 138 | r1=z*theta'+betav+ep; 139 | rt=z*theta'; 140 | %disp(1-sum((r1-rt).^2)/sum((r1-mean(r1)).^2)); 141 | 142 | pathr=sprintf('%s',path,name1); 143 | pathr=sprintf('%s',pathr,'/r2'); 144 | pathr=sprintf('%s_%d',pathr,M); 145 | pathr=sprintf('%s',pathr,'.csv'); 146 | csvwrite(pathr,r1); 147 | 148 | disp(M) 149 | 150 | end -------------------------------------------------------------------------------- /Simu_Matlab/MC_Tree.m: -------------------------------------------------------------------------------- 1 | %%% This Version: July 30, 2018. @copyright Shihao Gu, Bryan Kelly and Dacheng Xiu 2 | %%% If you use these codes, please cite the paper "Empirical Asset Pricing via Machine Learning" (2018) and "Autoencoder Asset Pricing Models." (2019) 3 | 4 | %%% All tree models %%% 5 | 6 | MC=1; % setup MC number 7 | datanum=50; %Or datanum=100; seperately run two cases 8 | path='./Simu'; % set your own folder path 9 | dirstock=strcat(path,'/SimuData_p',int2str(datanum),'/'); 10 | 11 | for hh=[1] 12 | %for hh = [1 3 6 12] % correspond to monthly quarterly halfyear and annually returns 13 | 14 | title=strcat(path,'/Simu_p',int2str(datanum),'/Tree',int2str(hh)); 15 | 16 | if (~(exist(title,'dir')==7) && MC==1) 17 | mkdir(title); 18 | end 19 | titleB = sprintf('%s',title,'/B'); 20 | if (~(exist(titleB,'dir')==7) && MC==1) 21 | mkdir(titleB); 22 | end 23 | if datanum ==50 24 | nump=50; 25 | end 26 | if datanum ==100 27 | nump=100; 28 | end 29 | 30 | 31 | for M=[MC] 32 | for mo=[1,2] 33 | disp(strcat('### MCMC :',int2str(M),', Model :',int2str(mo),' ###')) 34 | N=200; % Number of CS tickers 35 | m=nump*2; % Number of Characteristics 36 | T=180; % Number of Time Periods 37 | 38 | per=repmat(1:N,1,T); 39 | time=repelem(1:T,N); 40 | stdv=0.05; 41 | theta_w=0.005; 42 | 43 | %%% Read Files 44 | path1=strcat(dirstock,'c',int2str(M),'.csv'); 45 | path2=strcat(dirstock,'r',int2str(mo),'_',int2str(M),'.csv'); 46 | c=csvread(path1,0,0); 47 | r1=csvread(path2,0,0); 48 | 49 | %%% Add Some Elements %%% 50 | daylen=repelem(N,T/3); 51 | daylen_test=daylen; 52 | ind=1:floor(N*T/3); 53 | xtrain=c(ind,:); 54 | ytrain=r1(ind); 55 | trainper=per(ind); 56 | ind=floor(N*T/3)+1:floor(N*(T*2/3+1)); 57 | xtest=c(ind,:); 58 | ytest=r1(ind); 59 | testper=per(ind); 60 | 61 | l1=size(c,1); 62 | l2=length(r1); 63 | l3=l2-sum(isnan(r1)); 64 | 65 | 66 | ind=floor(N*T*2/3)+1:min([l1 l2 l3]); 67 | xoos=c(ind,:); 68 | yoos=r1(ind); 69 | clearvars c r1 70 | 71 | %%% Monthly Demean %%% 72 | ytrain_demean=ytrain-mean(ytrain); 73 | ytest_demean=ytest-mean(ytest); 74 | mtrain=mean(ytrain); 75 | mtest=mean(ytest); 76 | 77 | 78 | %%% Start to train %%% 79 | 80 | r2_oos=zeros(3,1); %%% OOS R2 81 | r2_is=zeros(3,1); %%% IS R2 82 | 83 | 84 | %%% Random Forest %%% 85 | if nump == 50 86 | lamv = 10:10:100; 87 | end 88 | if nump == 100 89 | lamv = 10:20:200; 90 | end 91 | ne=100; 92 | lamc = [2,4,8,16,32]; 93 | r=zeros(length(lamv),length(lamc),3); 94 | 95 | for n1 = 1:length(lamv) 96 | nf=lamv(n1); 97 | for n2 = 1:length(lamc) 98 | nn=lamc(n2); 99 | clf=TreeBagger(ne,xtrain,ytrain,'Method','regression','NumPredictorsToSample',nf,'MaxNumSplits',nn); 100 | yhatbig1 = predict(clf,xtest); 101 | r(n1,n2,1)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 102 | yhatbig1 = predict(clf,xoos); 103 | r(n1,n2,2)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 104 | yhatbig1 = predict(clf,xtrain); 105 | r(n1,n2,3)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 106 | end 107 | end 108 | 109 | fw_2 = fw2(r(:,:,1)); 110 | r2_oos(1)=r(fw_2(1),fw_2(2),2); 111 | r2_is(1)=r(fw_2(1),fw_2(2),3); 112 | disp(strcat('RF R2 : ',num2str(r2_oos(1),3))); 113 | 114 | 115 | %%% GBRT %%% 116 | 117 | lamv=-1:0.2:0; 118 | r=zeros(length(lamv),50,3); 119 | 120 | for n1 = 1: length(lamv) 121 | lr=10^lamv(n1); 122 | alpha=2; 123 | ne=50; 124 | t=templateTree('MaxNumSplits',2,'Surrogate','on'); 125 | clf=fitensemble(xtrain,ytrain,'LSBoost',ne,t,'Type','regression','LearnRate',lr); 126 | 127 | % e=predict(clf,xtest); 128 | % e = error(clf,xtest,ytest); 129 | e=loss(clf,xtest,ytest,'mode','cumulative'); 130 | for i = 1:length(e); 131 | r(n1,i,1) = e(i); 132 | % pred = e(i); 133 | % yhatbig1 = pred; 134 | % r(n1,i,1)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 135 | end 136 | 137 | %e=error(clf,xoos,yoos); 138 | e=loss(clf,xoos,yoos,'mode','cumulative'); 139 | for i = 1:length(e); 140 | r(n1,i,2) = e(i); 141 | % pred = e(i); 142 | % yhatbig1 = pred; 143 | % r(n1,i,2)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 144 | end 145 | 146 | %e=error(clf,xtrain,ytrain); 147 | e=loss(clf,xtrain,ytrain,'mode','cumulative'); 148 | for i = 1:length(e); 149 | r(n1,i,3) = e(i); 150 | % pred = e(i); 151 | % yhatbig1 = pred; 152 | % r(n1,i,2)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 153 | end 154 | 155 | end 156 | 157 | fw_2 = fw2(-r(:,:,1)); 158 | err1=mean((ytrain-mtrain).^2); 159 | err2=mean((yoos-mtrain).^2); 160 | r2_oos(2)=1-r(fw_2(1),fw_2(2),2)/err2; 161 | r2_is(2)=1-r(fw_2(1),fw_2(2),3)/err1; 162 | disp(strcat('GBRT R2 : ',num2str(r2_oos(2),3))); 163 | 164 | %disp(r2_oos) 165 | pathr=sprintf('%s',title,'/roos'); 166 | pathr=sprintf('%s_%d_%d',pathr,mo,M); 167 | pathb=sprintf('%s',pathr,'.csv'); 168 | csvwrite(pathr,r2_oos); 169 | 170 | %disp(r2_is) 171 | pathr=sprintf('%s',title,'/ris'); 172 | pathr=sprintf('%s_%d_%d',pathr,mo,M); 173 | pathb=sprintf('%s',pathr,'.csv'); 174 | csvwrite(pathr,r2_is); 175 | end 176 | end 177 | end -------------------------------------------------------------------------------- /Simu_Matlab/MC_Reg.m: -------------------------------------------------------------------------------- 1 | %%% This Version: July 30, 2018. @copyright Shihao Gu, Bryan Kelly and Dacheng Xiu 2 | %%% If you use these codes, please cite the paper "Empirical Asset Pricing via Machine Learning" (2018) and "Autoencoder Asset Pricing Models." (2019) 3 | 4 | %%% All regression models %%% 5 | 6 | MC=1; % setup MC number 7 | 8 | datanum=50; %Or datanum=100; seperately run two cases 9 | path='./Simu'; % set your own folder path 10 | dirstock=strcat(path,'/SimuData_p',int2str(datanum),'/'); 11 | 12 | 13 | 14 | 15 | for hh=[1] 16 | %for hh = [1 3 6 12] % correspond to monthly quarterly halfyear and annually returns 17 | 18 | title=strcat(path,'/Simu_p',int2str(datanum),'/Reg',int2str(hh)); 19 | 20 | if (~(exist(title,'dir')==7) && MC==1) 21 | mkdir(title); 22 | end 23 | titleB = sprintf('%s',title,'/B'); 24 | if (~(exist(titleB,'dir')==7) && MC==1) 25 | mkdir(titleB); 26 | end 27 | if datanum ==50 28 | nump=50; 29 | end 30 | if datanum ==100 31 | nump=100; 32 | end 33 | 34 | mu=0.2*sqrt(hh); 35 | tol=1e-10; 36 | 37 | 38 | % Start to MCMC 39 | for M=[MC] 40 | for mo=[1,2] 41 | 42 | disp(strcat('### MCMC :',int2str(M),', Model :',int2str(mo),' ###')) 43 | N=200; % Number of CS tickers 44 | m=nump*2; % Number of Characteristics 45 | T=180; % Number of Time Periods 46 | 47 | per=repmat(1:N,1,T); 48 | time=repelem(1:T,N); 49 | stdv=0.05; 50 | theta_w=0.005; 51 | 52 | %%% Read Files 53 | path1=strcat(dirstock,'c',int2str(M),'.csv'); 54 | path2=strcat(dirstock,'r',int2str(mo),'_',int2str(M),'.csv'); 55 | c=csvread(path1,0,0); 56 | r1=csvread(path2,0,0); 57 | 58 | %%% Add Some Elements %%% 59 | 60 | daylen=repelem(N,T/3); 61 | daylen_test=daylen; 62 | ind=1:floor(N*T/3); 63 | xtrain=c(ind,:); 64 | ytrain=r1(ind); 65 | trainper=per(ind); 66 | ind=floor(N*T/3)+1:floor(N*(T*2/3-hh+1)); 67 | xtest=c(ind,:); 68 | ytest=r1(ind); 69 | testper=per(ind); 70 | 71 | l1=size(c,1); 72 | l2=length(r1); 73 | l3=l2-sum(isnan(r1)); 74 | 75 | ind=floor(N*T*2/3)+1:min([l1 l2 l3]); 76 | xoos=c(ind,:); 77 | yoos=r1(ind); 78 | clearvars c r1 79 | 80 | %%% Monthly Demean %%% 81 | ytrain_demean=ytrain-mean(ytrain); 82 | ytest_demean=ytest-mean(ytest); 83 | mtrain=mean(ytrain); 84 | mtest=mean(ytest); 85 | 86 | %%% Calcaulate Sufficient Stats %%% 87 | sd=zeros(size(xtrain,2),1); % dim of sd? 88 | for i=1:size(xtrain,2) 89 | s=std(xtrain(:,i)); 90 | if s>0 91 | xtrain(:,i)=xtrain(:,i)/s; 92 | xtest(:,i)=xtest(:,i)/s; 93 | xoos(:,i)=xoos(:,i)/s; 94 | sd(i)=s; 95 | end 96 | end 97 | 98 | XX=xtrain.'*xtrain; 99 | [U,S,V]=svd(XX); 100 | L=S(1); 101 | %disp 'Lasso L = ' 102 | %disp(L) 103 | Y=ytrain_demean; 104 | XY=xtrain.'*Y; 105 | 106 | %%% Start to Train %%% 107 | 108 | %%% OLS %%% 109 | r2_oos=zeros(13,1); %%% OOS R2 110 | r2_is=zeros(13,1); %%% IS R2 111 | 112 | modeln=1; 113 | groups=0;nc=0; 114 | clf=fitlm(xtrain,ytrain_demean,'Intercept',false); 115 | yhatbig1=predict(clf,xoos)+mtrain; 116 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 117 | yhatbig1=predict(clf,xtrain)+mtrain; 118 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 119 | b=clf.Coefficients.Estimate; 120 | pathb=sprintf('%s',title,'/B/b'); 121 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 122 | pathb=sprintf('%s',pathb,'.csv'); 123 | csvwrite(pathb,b); 124 | disp(strcat('Simple OLS R2 : ',num2str(r2_oos(modeln),3))); 125 | 126 | 127 | modeln=modeln+1; 128 | func=@soft_threshodl; 129 | b=proximalH(groups,nc,xtest,mtrain,ytest,b,xtrain,ytrain_demean,mu,tol,L,0,func); 130 | yhatbig1=xoos*b+mtrain; 131 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 132 | yhatbig1=xtrain*b+mtrain; 133 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 134 | pathb=sprintf('%s',title,'/B/b'); 135 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 136 | pathb=sprintf('%s',pathb,'.csv'); 137 | csvwrite(pathb,b); 138 | disp(strcat('Simple OLS+H R2 : ',num2str(r2_oos(modeln),3))); 139 | 140 | 141 | modeln=modeln+1; 142 | ne=30; 143 | X=xtrain.'*xtrain; 144 | [pca_vec,pca_val]=eig(X); 145 | p1=pca_vec(:,size(pca_vec,2):-1:(size(pca_vec,2)-ne+1)); 146 | Z=xtrain*p1; 147 | 148 | r=zeros(3,ne); 149 | B=zeros(size(xtrain,2),ne); 150 | Y=ytrain_demean; 151 | 152 | for j=1:ne-1 153 | xx=Z(:,1:j); 154 | b=(inv(xx.'*xx)*xx.')*Y; 155 | b=p1(:,1:j)*b; 156 | 157 | yhatbig1=xtest*b+mtrain; 158 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 159 | yhatbig1=xoos*b+mtrain; 160 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 161 | yhatbig1=xtrain*b+mtrain; 162 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 163 | B(:,j)=b; 164 | end 165 | b=zeros(size(xtest,2),1); 166 | j=ne; 167 | yhatbig1=xtest*b+mtrain; 168 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 169 | yhatbig1=xoos*b+mtrain; 170 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 171 | yhatbig1=xtrain*b+mtrain; 172 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 173 | B(:,j)=b; 174 | 175 | r2_oos(modeln)=r(2,int16(fw1(r(1,:)))); 176 | r2_is(modeln)=r(3,int16(fw1(r(1,:)))); 177 | b=B(:,int16(fw1(r(1,:)))); 178 | pathb=sprintf('%s',title,'/B/b'); 179 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 180 | pathb=sprintf('%s',pathb,'.csv'); 181 | csvwrite(pathb,b); 182 | disp(strcat('PCR R2 : ',num2str(r2_oos(modeln),3))); 183 | 184 | 185 | modeln=modeln+1; 186 | B=pls(xtrain,ytrain_demean,30); 187 | ne=30; 188 | r=zeros(3,ne); 189 | Y=ytrain_demean; 190 | 191 | for j=1:ne 192 | b=B(:,j); 193 | yhatbig1=xtest*b+mtrain; 194 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 195 | yhatbig1=xoos*b+mtrain; 196 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 197 | yhatbig1=xtrain*b+mtrain; 198 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 199 | end 200 | 201 | r2_oos(modeln)=r(2,int16(fw1(r(1,:)))); 202 | r2_is(modeln)=r(3,int16(fw1(r(1,:)))); 203 | b=B(:,int16(fw1(r(1,:)))); 204 | pathb=sprintf('%s',title,'/B/b'); 205 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 206 | pathb=sprintf('%s',pathb,'.csv'); 207 | csvwrite(pathb,b); 208 | disp(strcat('PLS R2 : ',num2str(r2_oos(modeln),3))); 209 | 210 | 211 | %%% Lasso %%% 212 | modeln=modeln+1; 213 | lamv=-2:0.1:4; 214 | alpha=1; 215 | r=zeros(3,length(lamv)); 216 | 217 | for j=1:length(lamv) 218 | l2=10^lamv(j); 219 | func=@soft_threshodl; 220 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 221 | yhatbig1=xtest*b+mtrain; 222 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 223 | yhatbig1=xoos*b+mtrain; 224 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 225 | yhatbig1=xtrain*b+mtrain; 226 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 227 | end 228 | 229 | r2_oos(modeln)=r(2,int16(fw1(r(1,:)))); 230 | r2_is(modeln)=r(3,int16(fw1(r(1,:)))); 231 | l2=10^lamv(int16(fw1(r(1,:)))); 232 | 233 | func=@soft_threshodl; 234 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 235 | pathb=sprintf('%s',title,'/B/b'); 236 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 237 | pathb=sprintf('%s',pathb,'.csv'); 238 | csvwrite(pathb,b); 239 | disp(strcat('Lasso R2 : ',num2str(r2_oos(modeln),3))); 240 | 241 | 242 | modeln=modeln+1; 243 | func=@soft_threshodl; 244 | b=proximalH(groups,nc,xtest,mtrain,ytest,b,xtrain,ytrain_demean,mu,tol,L,l2,func); 245 | yhatbig1=xoos*b+mtrain; 246 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 247 | yhatbig1=xtrain*b+mtrain; 248 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 249 | pathb=sprintf('%s',title,'/B/b'); 250 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 251 | pathb=sprintf('%s',pathb,'.csv'); 252 | csvwrite(pathb,b); 253 | disp(strcat('Lasso+H R2 : ',num2str(r2_oos(modeln),3))); 254 | 255 | 256 | 257 | %%% Ridge %%% 258 | modeln=modeln+1; 259 | lamv=0:0.1:6; 260 | alpha=1; 261 | r=zeros(3,length(lamv)); 262 | 263 | for j=1:length(lamv) 264 | l2=10^lamv(j); 265 | func=@soft_threshodr; 266 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 267 | yhatbig1=xtest*b+mtrain; 268 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 269 | yhatbig1=xoos*b+mtrain; 270 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 271 | yhatbig1=xtrain*b+mtrain; 272 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 273 | end 274 | 275 | r2_oos(modeln)=r(2,int16(fw1(r(1,:)))); 276 | r2_is(modeln)=r(3,int16(fw1(r(1,:)))); 277 | l2=10^lamv(int16(fw1(r(1,:)))); 278 | func=@soft_threshodr; 279 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 280 | pathb=sprintf('%s',title,'/B/b'); 281 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 282 | pathb=sprintf('%s',pathb,'.csv'); 283 | csvwrite(pathb,b); 284 | disp(strcat('Ridge R2 : ',num2str(r2_oos(modeln),3))); 285 | 286 | 287 | modeln=modeln+1; 288 | func=@soft_threshodr; 289 | b=proximalH(groups,nc,xtest,mtrain,ytest,b,xtrain,ytrain_demean,mu,tol,L,l2,func); 290 | yhatbig1=xoos*b+mtrain; 291 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 292 | yhatbig1=xtrain*b+mtrain; 293 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 294 | pathb=sprintf('%s',title,'/B/b'); 295 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 296 | pathb=sprintf('%s',pathb,'.csv'); 297 | csvwrite(pathb,b); 298 | disp(strcat('Ridge+H R2 : ',num2str(r2_oos(modeln),3))); 299 | 300 | 301 | 302 | %%% Elastic Net %%% 303 | modeln=modeln+1; 304 | lamv=-2:0.1:4; 305 | alpha=0.5; 306 | r=zeros(3,length(lamv)); 307 | 308 | for j=1:length(lamv) 309 | l2=10^lamv(j); 310 | func=@soft_threshode; 311 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 312 | yhatbig1=xtest*b+mtrain; 313 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 314 | yhatbig1=xoos*b+mtrain; 315 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 316 | yhatbig1=xtrain*b+mtrain; 317 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 318 | end 319 | 320 | r2_oos(modeln)=r(2,int16(fw1(r(1,:)))); 321 | r2_is(modeln)=r(3,int16(fw1(r(1,:)))); 322 | l2=10^lamv(int16(fw1(r(1,:)))); 323 | func=@soft_threshode; 324 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 325 | pathb=sprintf('%s',title,'/B/b'); 326 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 327 | pathb=sprintf('%s',pathb,'.csv'); 328 | csvwrite(pathb,b); 329 | disp(strcat('Enet R2 : ',num2str(r2_oos(modeln),3))); 330 | 331 | 332 | modeln=modeln+1; 333 | func=@soft_threshode; 334 | b=proximalH(groups,nc,xtest,mtrain,ytest,b,xtrain,ytrain_demean,mu,tol,L,l2,func); 335 | yhatbig1=xoos*b+mtrain; 336 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 337 | yhatbig1=xtrain*b+mtrain; 338 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 339 | pathb=sprintf('%s',title,'/B/b'); 340 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 341 | pathb=sprintf('%s',pathb,'.csv'); 342 | csvwrite(pathb,b); 343 | disp(strcat('Enet+H R2 : ',num2str(r2_oos(modeln),3))); 344 | 345 | 346 | 347 | %%% Oracle Models %%% 348 | modeln=modeln+1; 349 | if mo==1 350 | x=zeros(size(xtrain,1),3); 351 | x(:,1)=xtrain(:,1); 352 | x(:,2)=xtrain(:,2); 353 | x(:,3)=xtrain(:,nump+3); 354 | x1=zeros(size(xoos,1),3); 355 | x1(:,1)=xoos(:,1); 356 | x1(:,2)=xoos(:,2); 357 | x1(:,3)=xoos(:,nump+3); 358 | 359 | clf=fitlm(x,ytrain,'Intercept',false); 360 | yhatbig1=predict(clf,x1); 361 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 362 | yhatbig1=predict(clf,x); 363 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 364 | disp(strcat('Oracle R2 : ',num2str(r2_oos(modeln),3))); 365 | end 366 | 367 | if mo==2 368 | x=zeros(size(xtrain,1),3); 369 | x(:,1)=power(xtrain(:,1),2); 370 | x(:,2)=xtrain(:,2).*xtrain(:,1); 371 | x(:,3)=sign(xtrain(:,nump+3)); 372 | x1=zeros(size(xoos,1),3); 373 | x1(:,1)=power(xoos(:,1),2); 374 | x1(:,2)=xoos(:,2).*xoos(:,1); 375 | x1(:,3)=sign(xoos(:,nump+3)); 376 | 377 | clf=fitlm(x,ytrain,'Intercept',false); 378 | yhatbig1=predict(clf,x1); 379 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 380 | yhatbig1=predict(clf,x); 381 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 382 | disp(strcat('Oracle R2 : ',num2str(r2_oos(modeln),3))); 383 | end 384 | 385 | 386 | %%% Group Lasso %%% 387 | kn=4; 388 | th=zeros(kn,size(xtrain,2)); 389 | th(2,:)=0; 390 | for i=1:size(xtrain,2) 391 | th(:,i)=quantile(xtrain(:,i),(0:kn-1)/kn); 392 | end 393 | xtrain=cut_knots_degree2(xtrain,kn,th); 394 | xtest=cut_knots_degree2(xtest,kn,th); 395 | xoos=cut_knots_degree2(xoos,kn,th); 396 | 397 | for i=1:size(xtrain,2) 398 | s=std(xtrain(:,i)); 399 | if s>0 400 | xtrain(:,i)=xtrain(:,i)/s; 401 | xtest(:,i)=xtest(:,i)/s; 402 | xoos(:,i)=xoos(:,i)/s; 403 | end 404 | end 405 | 406 | Y=ytrain_demean; 407 | XX=xtrain.'*xtrain; 408 | [U,S,V]=svd(XX); 409 | L=S(1); 410 | %disp 'L = ' 411 | %disp(L) 412 | XY=xtrain.'*Y; 413 | 414 | modeln=modeln+1; 415 | lamv=0.5:0.1:3; 416 | nc=(size(XX,2))/(kn+1); 417 | groups=repelem(1:nc,kn+1); 418 | r=zeros(3,length(lamv)); 419 | 420 | for j=1:length(lamv) 421 | l2=10^lamv(j); 422 | func=@soft_threshodg; 423 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 424 | yhatbig1=xtest*b+mtrain; 425 | r(1,j)=1-sum(power(yhatbig1-ytest,2))/sum(power(ytest-mtrain,2)); 426 | yhatbig1=xoos*b+mtrain; 427 | r(2,j)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 428 | yhatbig1=xtrain*b+mtrain; 429 | r(3,j)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 430 | end 431 | 432 | r2_oos(modeln)=r(2,int16(fw1(r(1,:)))); 433 | r2_is(modeln)=r(3,int16(fw1(r(1,:)))); 434 | l2=10^lamv(int16(fw1(r(1,:)))); 435 | 436 | func=@soft_threshodg; 437 | b=proximal(groups,nc,XX,XY,tol,L,l2,func); 438 | pathb=sprintf('%s',title,'/B/b'); 439 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 440 | pathb=sprintf('%s',pathb,'.csv'); 441 | csvwrite(pathb,b); 442 | disp(strcat('Group Lasso R2 : ',num2str(r2_oos(modeln),3))); 443 | 444 | 445 | modeln=modeln+1; 446 | func=@soft_threshodg; 447 | b=proximalH(groups,nc,xtest,mtrain,ytest,b,xtrain,ytrain_demean,mu,tol,L,l2,func); 448 | yhatbig1=xoos*b+mtrain; 449 | r2_oos(modeln)=1-sum(power(yhatbig1-yoos,2))/sum(power(yoos-mtrain,2)); 450 | yhatbig1=xtrain*b+mtrain; 451 | r2_is(modeln)=1-sum(power(yhatbig1-ytrain,2))/sum(power(ytrain-mtrain,2)); 452 | pathb=sprintf('%s',title,'/B/b'); 453 | pathb=sprintf('%s%d_%d_%d',pathb,mo,M,modeln); 454 | pathb=sprintf('%s',pathb,'.csv'); 455 | csvwrite(pathb,b); 456 | disp(strcat('Group Lasso+H R2 : ',num2str(r2_oos(modeln),3))); 457 | 458 | 459 | %disp(r2_oos) 460 | pathr=sprintf('%s',title,'/roos'); 461 | pathr=sprintf('%s_%d_%d',pathr,mo,M); 462 | pathb=sprintf('%s',pathr,'.csv'); 463 | csvwrite(pathr,r2_oos); 464 | 465 | %disp(r2_is) 466 | pathr=sprintf('%s',title,'/ris'); 467 | pathr=sprintf('%s_%d_%d',pathr,mo,M); 468 | pathb=sprintf('%s',pathr,'.csv'); 469 | csvwrite(pathr,r2_is); 470 | end 471 | end 472 | 473 | 474 | end 475 | 476 | 477 | 478 | --------------------------------------------------------------------------------