├── 聚类分析.R ├── LM_Optim.R ├── PCA_Optim.R ├── SVM_Optim.R ├── wufenwei.R ├── Cluster_Optim.R ├── EWeight_OPtim.R ├── PCA_Selection.R ├── Pics ├── MWeighted.png ├── EqualWeighted.png └── EqualWeighted2.png ├── factor_performance.R ├── factor_return_plot.R ├── .gitattributes ├── README.md ├── impact.R ├── .gitignore ├── redo_factor1.R ├── diy_factor.R ├── factor_return.R ├── 等权重打分.R └── PartII_ChoosingTheStocks.R /聚类分析.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/聚类分析.R -------------------------------------------------------------------------------- /LM_Optim.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/LM_Optim.R -------------------------------------------------------------------------------- /PCA_Optim.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/PCA_Optim.R -------------------------------------------------------------------------------- /SVM_Optim.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/SVM_Optim.R -------------------------------------------------------------------------------- /wufenwei.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/wufenwei.R -------------------------------------------------------------------------------- /Cluster_Optim.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/Cluster_Optim.R -------------------------------------------------------------------------------- /EWeight_OPtim.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/EWeight_OPtim.R -------------------------------------------------------------------------------- /PCA_Selection.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/PCA_Selection.R -------------------------------------------------------------------------------- /Pics/MWeighted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/Pics/MWeighted.png -------------------------------------------------------------------------------- /Pics/EqualWeighted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/Pics/EqualWeighted.png -------------------------------------------------------------------------------- /factor_performance.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/factor_performance.R -------------------------------------------------------------------------------- /factor_return_plot.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/factor_return_plot.R -------------------------------------------------------------------------------- /Pics/EqualWeighted2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/HEAD/Pics/EqualWeighted2.png -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-learning-on-Stocks-Selection 2 | 机器学习选股模型 3 | 利用多种数据挖掘方法,建立Alpha多因子选股中性策略。效果其实还好嘛…… 4 | 5 | ![Alt text](https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/master/Pics/EqualWeighted.png) 6 | 7 | ![Alt text](https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/master/Pics/EqualWeighted2.png) 8 | 9 | ![Alt text](https://raw.githubusercontent.com/pyhong/Machine-learning-on-Stocks-Selection/master/Pics/MWeighted.png) 10 | 11 | 线性回归结果这么好也是吓了一跳,重新最小化风险组合下,支持向量回归结果也还可以嘛。 12 | 但是选股结果无法用传统经济金融意义去理解,确实很难去说服别人。 13 | PS:BP神经网络效果永远都是差的不得了,干脆把代码删掉好了(手动再见)。 14 | 个人感觉除非去到深度学习里面的各种神经网络,不然BPNN这种坑爹货在哪里都比不上SVM、randomforest,甚至连线性regulization都比不上。 15 | -------------------------------------------------------------------------------- /impact.R: -------------------------------------------------------------------------------- 1 | library(WindR) 2 | w.start() 3 | code<-w.wset('IndexConstituent','date=20151105;windcode=000300.SH')$Data$wind_code 4 | 5 | #估计冲击成本函数 6 | lm.sol<-lapply(code,function(x) 7 | { 8 | tp<-w_wsd_data<-w.wsd(x,"pct_chg,volume","2014-01-01","2014-12-31")$Data 9 | chg<-w_wsd_data$PCT_CHG[-1] 10 | vch<-w_wsd_data$VOLUME[-1]-w_wsd_data$VOLUME[-nrow(w_wsd_data)] 11 | r2<-0 12 | i<-0 13 | fit<-NULL 14 | try({for(k in seq(0.2,2,by=0.2)) 15 | { 16 | r2.old<-r2 17 | i.old<-i 18 | i<-k 19 | lmsol<-lm(chg~I(vch^k)+0) 20 | r2<-summary(lmsol)$adj.r.squared 21 | if(r220100930)&as.numeric(stock_log$date)<=20150630),] 7 | what<<-1 8 | lapply(factor_list,function(x) 9 | { 10 | tmp_LReturn <- stock_log 11 | x$date <- format(as.Date(x$date),"%Y%m%d") 12 | index <- match(dtime,x$date) 13 | x <- x[index,] 14 | x <- x[as.numeric(x$date)<=20140930] 15 | x[is.na(x)]<--Inf 16 | for(i in 2:ncol(x)) 17 | { 18 | j <- 1 19 | while(j<=(nrow(x)-1)) 20 | { 21 | count <- 1 22 | while(x[j,i]==x[j+count,i]&x[j,i]!=-Inf&(j+count<=nrow(x))) 23 | { 24 | x[j+count,i] <- -Inf 25 | count <- count+1 26 | } 27 | j <- count+j 28 | } 29 | } 30 | for(i in 1:(nrow(x)-1)) 31 | { 32 | index<-stock_log$date>=x$date[i]&stock_log$date=day.begin&tt$date<=day.end),] 17 | stock_date <<- union(stock_date,tt[,1]) 18 | return(tt) 19 | }) 20 | stock_date <- sort(stock_date) 21 | stock_name <- substr(stock_dir,nchar(stock_dir)-9,nchar(stock_dir)-4) 22 | stock_return <- data.frame(matrix(NA,nrow=length(stock_date),ncol=length(stock_name)+1)) 23 | colnames(stock_return) <- c("date",stock_name) 24 | stock_return[,1] <- stock_date 25 | stock_date_index <- rep(1,length(stock_name)) 26 | list_index <<-2 27 | lapply(stock_list,function(x) 28 | { 29 | index <- match(x$date,stock_date) 30 | stock_return[index,list_index] <<- x[,2] 31 | list_index <<- list_index + 1 32 | }) 33 | stock_log <- stock_return[-1,] 34 | stock_log[,-1] <-log(stock_return[-1,-1]/stock_return[-nrow(stock_return),-1]) 35 | date_1 <- stock_date[-length(stock_date)] 36 | date_2 <- stock_date[-1] 37 | popup_date <- c(stock_date[1],date_1[which(substr(date_1,5,6)!=substr(date_2,5,6))-1]) 38 | popup_date <- c(day.begin.factor,popup_date[c(1:length(popup_date)-1)%%3==0],day.end.factor) 39 | popup_date <- data.frame(d1=popup_date[-length(popup_date)],d2=popup_date[-1],d3=c(popup_date[-(1:2)],Inf)) 40 | #计算因子收益的函数 41 | factor_return <<- data.frame(matrix(NA,nrow=length(stock_date),ncol=length(stock_name)+1)) 42 | factor_stockrank <<- data.frame(matrix(NA,nrow=length(stock_date),ncol=length(stock_name)+1)) 43 | rank.of.stock <- match(stock_name,substr(colnames(factor_list[[1]]),2,7)[-1]) 44 | factor_list<-lapply(factor_list,function(x) 45 | { 46 | x<-x[,c(1,rank.of.stock+1)] 47 | x[,1]<-as.numeric(format(as.Date(x$date),"%Y%m%d")) 48 | return(x) 49 | }) 50 | name_i <<- 1 51 | lapply(factor_list,function(x) 52 | { 53 | stock_log_tmp <<- stock_log 54 | apply(popup_date,1,function(y) 55 | { 56 | x <- x[x$date==y[1],] 57 | x <- x[-1] 58 | x_rank <- rank(x) 59 | index_tmp <- (stock_log_tmp$date>=y[2])&(stock_log_tmp$date<=y[3]) 60 | if(length(index_tmp)>0) 61 | { 62 | tmp_log <- stock_log_tmp[index_tmp,] 63 | tmp_log <- tmp_log[,c(1,x_rank+1)] 64 | stock_log_tmp[index_tmp,] <<- tmp_log 65 | } 66 | }) 67 | write.csv(stock_log_tmp,paste("output/1/","未分五档",factor_dir_name[name_i],sep=""),row.names=F) 68 | name_i <<- name_i+1 69 | }) 70 | 71 | -------------------------------------------------------------------------------- /等权重打分.R: -------------------------------------------------------------------------------- 1 | load("E:/金融建模/data/data_for_trainging_and_prediction.RData") 2 | 3 | # 提取数据 4 | S1 <- FactorList2[[1]] # PB市净率 5 | S2 <- FactorList2[[2]] # PCF市现率 6 | S3 <- FactorList2[[4]] # PS市销率 7 | S4 <- FactorList2[[6]] # 换手率 8 | S5 <- FactorList2[[10]] # 市盈率TTM 9 | S6 <- FactorList2[[14]] #总市值 10 | 11 | S6list <- list() #将S6的数据修改成数值 12 | for(i in 1:301){ 13 | S6list[[i]] <- do.call(rbind,lapply(strsplit(S6[,i],','), paste, collapse = '')) 14 | } 15 | temp <- do.call(cbind, S6list) 16 | temp <- as.data.frame(temp) 17 | colnames(temp) <- colnames(S6) 18 | S6 <- temp 19 | 20 | F1 <- FactorList1[[6]] # 每股经营活动产生的现金流量净额 21 | F2 <- FactorList1[[9]] # 每股收益EPS 22 | F3 <- FactorList1[[12]] # 权益乘数 23 | F4 <- FactorList1[[16]] # 资产负债率 24 | F5 <- FactorList1[[17]] # 总负债(同比增长率) 25 | 26 | colnames(S1) <- paste("X", colnames(S1), sep = '') 27 | colnames(S2) <- paste("X", colnames(S2), sep = '') 28 | colnames(S3) <- paste("X", colnames(S3), sep = '') 29 | colnames(S4) <- paste("X", colnames(S4), sep = '') 30 | colnames(S5) <- paste("X", colnames(S5), sep = '') 31 | colnames(S6) <- paste("X", colnames(S6), sep = '') 32 | 33 | # 获取2015年上半年所需因子数据 34 | FacName <- c(paste('S', c(1:6), sep = ''), paste('F', c(1:5), sep = '')) 35 | for(i in 1:11){ 36 | assign(FacName[i], get(FacName[i])[53:58,]) 37 | } 38 | 39 | # 标准化数据 40 | DataScale <- function(dt){ 41 | dtt <- t(dt[, -1]) 42 | time <- t(as.numeric(dt[, 1])) 43 | temp <- apply(dtt, 2, function(x){scale(as.numeric(x))}) 44 | 45 | temp <- cbind(row.names(dtt), as.data.frame(temp)) 46 | } 47 | 48 | for(i in 1:11){ 49 | assign(FacName[i], DataScale(get(FacName[i]))) 50 | } # 进行标准化 51 | 52 | # 合并数据 53 | MaIndex <- match(S1[,1], F1[,1]) 54 | temp <- cbind(S1, S2[,-1], S3[,-1], S4[,-1], S5[,-1], S6[,-1], 55 | F1[MaIndex,-1], F2[MaIndex,-1], F3[MaIndex,-1], 56 | F4[MaIndex,-1], F5[MaIndex,-1]) 57 | 58 | Index <- seq(from = 2, to = 67, 6) 59 | fac1 <- temp[, c(1, Index)] # 选取2015年1月股票组合所需因子暴露 60 | fac2 <- temp[, c(1, Index+1)] # 选取2015年2月股票组合所需因子暴露 61 | fac3 <- temp[, c(1, Index+2)] 62 | fac4 <- temp[, c(1, Index+3)] 63 | fac5 <- temp[, c(1, Index+4)] 64 | fac6 <- temp[, c(1, Index+5)] 65 | 66 | # 等权重求得分排序 67 | Rank <- function(Fdata){ 68 | Score <- apply(Fdata[, c(8, 9)], 1, sum) - apply(Fdata[, c(-1, -8, -9)], 1, sum) 69 | Stock <- cbind(as.data.frame(Fdata[,1]), Score) 70 | as.character(Stock[order(Stock[,2], decreasing = T)[1:50], 1]) 71 | } 72 | 73 | TELReturn <- t(ELReturn) 74 | ELReturn1 <- TELReturn[match(Rank(fac1), row.names(TELReturn)), 53] 75 | ELReturn2 <- TELReturn[match(Rank(fac2), row.names(TELReturn)), 54] 76 | ELReturn3 <- TELReturn[match(Rank(fac3), row.names(TELReturn)), 55] 77 | ELReturn4 <- TELReturn[match(Rank(fac4), row.names(TELReturn)), 56] 78 | ELReturn5 <- TELReturn[match(Rank(fac5), row.names(TELReturn)), 57] 79 | ELReturn6 <- TELReturn[match(Rank(fac6), row.names(TELReturn)), 58] 80 | 81 | ELR <- c(mean(as.numeric(ELReturn1)), 82 | mean(as.numeric(ELReturn2)), 83 | mean(as.numeric(ELReturn3)), 84 | mean(as.numeric(ELReturn4), na.rm = T), 85 | mean(as.numeric(ELReturn5), na.rm = T), 86 | mean(as.numeric(ELReturn6))) 87 | plot(cumsum(ELR)*100, type = 'l') 88 | 89 | set <- cbind(Rank(fac1),Rank(fac2),Rank(fac3),Rank(fac4),Rank(fac5),Rank(fac6)) 90 | set <- as.data.frame(set) 91 | write.csv(set,"等权重方法选股结果.csv") 92 | -------------------------------------------------------------------------------- /PartII_ChoosingTheStocks.R: -------------------------------------------------------------------------------- 1 | #回归法选股 2 | #SVR和LM 3 | 4 | #Get the list of components of HS300 5 | # library(WindR) 6 | # w.start() 7 | # w_wset_data <- w.wset('SectorConstituent','date=20151108;windcode=000300.SH')$Data 8 | # write.csv(w_wset_data,"HS300成份股.csv",row.names=F) 9 | 10 | #Get the relative log return to HS300 11 | MonthLogReturn <- read.csv("hs300月对数收益率.csv",head=T) 12 | MonthLogReturn[,-1] <- apply(MonthLogReturn[,-1],c(1,2),function(x){ifelse(x==0,x<-NA,x)}) 13 | StockName<-substr(colnames(MonthLogReturn)[-1],2,7) 14 | # HS300Return <- w.wsd("000300.SH","pct_chg","2010-05-31","2015-06-30","Period=M")$Data 15 | # HS300LogReturn <- HS300Return 16 | # HS300LogReturn[,-1] <- log(1+HS300Return[,-1]/100) 17 | # write.csv(HS300LogReturn,"HS300指数对数收益率.csv",row.names=F) 18 | HS300LogReturn <- read.csv("HS300指数对数收益率.csv",head=T,stringsAsFactors=F) 19 | ELReturn <- MonthLogReturn 20 | ELReturn[,-1] <- apply(MonthLogReturn[,-1],2,function(x){x-HS300LogReturn[,-1]}) 21 | ELReturn <- ELReturn[as.numeric(format(as.Date(ELReturn$DATETIME),"%Y%m%d"))>=20100930,] 22 | #Get the factor list 23 | stockdate<-c("20100331","20100430",format(as.Date(HS300LogReturn$DATETIME),"%Y%m%d")) 24 | stockdate1<-stockdate[as.numeric(stockdate)<=20141231] 25 | stockdate2<-stockdate[as.numeric(stockdate)>=20100831&as.numeric(stockdate)<=20150531] 26 | FactorDir1 <- dir("试题/附录2:300支股票对应的财务指标/",full.names=T,pattern=".csv") 27 | FactorName1 <- dir("试题/附录2:300支股票对应的财务指标/",full.names=F,pattern=".csv") 28 | FactorList1 <- lapply(FactorDir1,function(x) 29 | { 30 | tplist <- read.csv(x,head=T) 31 | tplist$date<-format(as.Date(tplist$date),"%Y%m%d") 32 | tplist <- tplist[match(stockdate1,tplist$date),] 33 | return(tplist) 34 | }) 35 | FactorDir2 <- dir("试题/自找数据/factor/",full.names=T) 36 | FactorName2 <- dir("试题/自找数据/factor/",full.names=F) 37 | FactorList2 <- lapply(FactorDir2,function(x) 38 | { 39 | tplist <- read.csv(x, header = F, stringsAsFactors = F,skip = 3) 40 | names <- read.csv(x, header = F, stringsAsFactors = F, nrows = 1) 41 | names[1]<-"date" 42 | colnames(tplist)<-names 43 | tplist$date<-format(as.Date(tplist$date),"%Y%m%d") 44 | tplist <- tplist[match(stockdate2,tplist$date),] 45 | return(tplist) 46 | }) 47 | 48 | FinalTable <- NULL 49 | for(i in 2:ncol(ELReturn)) 50 | { 51 | index <- as.numeric(format(as.Date(ELReturn$DATETIME),"%Y%m%d"))<=20141231 52 | TmpTable <- matrix(ELReturn[index,i],ncol=1) 53 | for(tp in FactorList1) {tp<-tp[index,c(1,match(colnames(ELReturn[,-1]),colnames(tp[,-1]))+1)];TmpTable <- cbind(TmpTable,tp[,i])} 54 | for(tp in FactorList2) {tp<-tp[index,c(1,match(colnames(ELReturn[,-1]),paste("X",colnames(tp[,-1]),sep=""))+1)];TmpTable <- cbind(TmpTable,tp[,i])} 55 | FinalTable <- rbind(FinalTable,TmpTable) 56 | } 57 | 58 | EffectiveIndex <- c(1,22,23,25,27,7,10,13,31,17,18,35) #这个是对着那个excel表选出来 59 | 60 | PredictDate <- stockdate[stockdate>20141231] 61 | for(i in PredictDate) 62 | { 63 | index <- as.numeric(format(as.Date(ELReturn$DATETIME),"%Y%m%d"))==i 64 | TmpTable <- ELReturn[index,] 65 | names <- colnames(TmpTable) 66 | for(tp in FactorList1) 67 | { 68 | tp <- tp[index,c(1,match(colnames(ELReturn[,-1]),colnames(tp[,-1]))+1)]; 69 | colnames(tp) <- names; 70 | TmpTable <- rbind(TmpTable,tp,deparse.level = 0); 71 | } 72 | for(tp in FactorList2) 73 | { 74 | tp <- tp[index,c(1,match(colnames(ELReturn[,-1]),paste("X",colnames(tp[,-1]),sep=""))+1)]; 75 | colnames(tp) <- names; 76 | TmpTable <- rbind(TmpTable,tp,deparse.level = 0); 77 | } 78 | names <- c("超额对数收益率",substr(FactorName1,1,nchar(FactorName1)-4),substr(FactorName2,1,nchar(FactorName2)-4)) 79 | rownames(TmpTable) <- names 80 | TmpTable <- TmpTable[EffectiveIndex,] 81 | write.csv(t(TmpTable),paste("DataForRegression_Predict/",i,".csv",sep="")) 82 | } 83 | 84 | names <- c("超额对数收益率",substr(FactorName1,1,nchar(FactorName1)-4),substr(FactorName2,1,nchar(FactorName2)-4)) 85 | colnames(FinalTable) <- names 86 | 87 | #Choose the effective factors 88 | 89 | EffectiveTable <- FinalTable[,c(1,EffectiveIndex)] 90 | # EffectiveTable <- FinalTable 91 | EffectiveTable <- EffectiveTable[complete.cases(EffectiveTable),] 92 | write.csv(EffectiveTable,"DataForRegression_Train.csv",row.names=F) 93 | 94 | 95 | EffectiveTable <- read.csv("DataForRegression_Train.csv",stringsAsFactor=F,head=T) 96 | colmeans <- colMeans(EffectiveTable) 97 | colsd <- apply(EffectiveTable,2,sd) 98 | 99 | EffectiveTable <- scale(EffectiveTable,center=T,scale=T) 100 | EffectiveTable <- data.frame(EffectiveTable) 101 | 102 | PredictDir <- dir("DataForRegression_Predict/",full.names = T) 103 | PredictTable <- lapply(PredictDir,function(x) 104 | { 105 | tp <- read.csv(x,head=F,skip=2,stringsAsFactors=F) 106 | names <- read.csv(x,head=F,nrows=1,stringsAsFactors=F) 107 | names[1,1]<-"code" 108 | colnames(tp)<-names[1,] 109 | tp 110 | }) 111 | 112 | 113 | 114 | 115 | #Linear Regression 116 | lm.sol <- lm(EffectiveTable$超额对数收益率 ~ ., data=EffectiveTable) 117 | RegressionResult.lm <- predict(lm.sol,newdata=EffectiveTable[,-1]) 118 | plot(RegressionResult.lm,EffectiveTable$超额对数收益率) 119 | names<-colnames(EffectiveTable) 120 | lmStock <<- data.frame(matrix(0,ncol=6,nrow=50)) 121 | count <<- 1 122 | PReturn<-sapply(PredictTable,function(x) 123 | { 124 | names<-colnames(EffectiveTable) 125 | x<-x[complete.cases(x),] 126 | y<-(x[,-(1:2)]-colmeans[-1])/colsd[-1] 127 | y<-x[,-(1:2)] 128 | colnames(y)<-names[-1] 129 | pre<-predict(lm.sol,newdata=y) 130 | index<-order(pre,decreasing=T)[1:50] 131 | lmStock[,count]<<-substr(x[index,1],2,10) 132 | count <<- count+1 133 | mean(x[index,2],na.rm=T) 134 | }) 135 | write.csv(lmStock,"回归结果/lm_stock.csv",row.names=F) 136 | result.lm <- cbind(PReturn,cumsum(PReturn)) 137 | colnames(result.lm)<-c("每月超额收益率","累积超额收益率") 138 | write.csv(result.lm,"回归结果/lm_return.csv") 139 | plot(cumsum(PReturn),type="l",col="red",xlab="2015年前六月",ylab="累计收益率",main="线性回归") 140 | #SVM 141 | if(!require(e1071)) {install.packages("e1071");library(e1071)} 142 | svm.sol<-svm(EffectiveTable$超额对数收益率~.,data=EffectiveTable,cost=1.4) 143 | RegressionResult.svm <- predict(svm.sol,EffectiveTable[,2:ncol(EffectiveTable)]) 144 | plot(RegressionResult.svm,EffectiveTable$超额对数收益率) 145 | svmStock <<- data.frame(matrix(0,ncol=6,nrow=50)) 146 | count <<- 1 147 | PReturn<-sapply(PredictTable,function(x) 148 | { 149 | names<-colnames(EffectiveTable) 150 | x<-x[complete.cases(x),] 151 | y<-x[,-(1:2)] 152 | y<-(x[,-(1:2)]-colmeans[-1])/colsd[-1] 153 | colnames(y)<-names[-1] 154 | pre<-predict(svm.sol,newdata=y) 155 | index<-order(pre,decreasing=T)[1:50] 156 | svmStock[,count]<<-substr(x[index,1],2,10) 157 | count <<- count+1 158 | mean(x[index,2],na.rm=T) 159 | }) 160 | write.csv(svmStock,"回归结果/svm_stock.csv",row.names=F) 161 | result.svm <- cbind(PReturn,cumsum(PReturn)) 162 | colnames(result.svm)<-c("每月超额收益率","累积超额收益率") 163 | write.csv(result.svm,"回归结果/svm_return.csv") 164 | plot(cumsum(PReturn),type="l",col="red",xlab="2015年前六月",ylab="累计收益率",main="SVR回归") 165 | #randomForest 166 | if(!(require(randomForest))) {install.packages("randomForest");library(randomForest)} 167 | rdForest.sol <- randomForest(EffectiveTable$超额对数收益率~.,data=EffectiveTable) 168 | RegressionResult.rdForest <- predict(rdForest.sol,newdata=EffectiveTable[,2:ncol(EffectiveTable)]) 169 | plot(EffectiveTable$超额对数收益率,RegressionResult.rdForest) 170 | rdFStock <<- data.frame(matrix(0,ncol=6,nrow=50)) 171 | count <<- 1 172 | PReturn<-sapply(PredictTable,function(x) 173 | { 174 | names<-colnames(EffectiveTable) 175 | x<-x[complete.cases(x),] 176 | y<-x[,-(1:2)] 177 | y<-(x[,-(1:2)]-colmeans[-1])/colsd[-1] 178 | colnames(y)<-names[-1] 179 | pre<-predict(rdForest.sol,newdata=y) 180 | index<-order(pre,decreasing=T)[1:50] 181 | rdFStock[,count]<<-substr(x[index,1],2,10) 182 | count <<- count+1 183 | mean(x[index,2],na.rm=T) 184 | }) 185 | write.csv(rdFStock,"回归结果/rdF_stock.csv",row.names=F) 186 | result.rdF <- cbind(PReturn,cumsum(PReturn)) 187 | colnames(result.rdF)<-c("每月超额收益率","累积超额收益率") 188 | write.csv(result.rdF,"回归结果/rdF_return.csv") 189 | plot(cumsum(PReturn),type="l",col="red",xlab="2015年前六月",ylab="累计收益率",main="RandomForest回归") 190 | --------------------------------------------------------------------------------