├── README.md ├── read_by2.py └── SVM.py /README.md: -------------------------------------------------------------------------------- 1 | # Quantitative-transaction 2 | 基于adaboost的SVM预测股票价格 3 | -------------------------------------------------------------------------------- /read_by2.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | def process_execl(path): 4 | csv_reader = csv.reader(open(path, encoding='utf-8')) 5 | getfeature =[] 6 | k=1 7 | i=1 8 | kidfeature =[] 9 | label = [] 10 | key =True 11 | data=list(csv_reader) 12 | for row in data[1:]: 13 | row=list(map(float,row)) 14 | if(k!=48): 15 | if k!=row[1]: 16 | key = False 17 | k=row[1] 18 | kidfeature.append([((row[2]-data[i-1][3])/data[i-1][3])if type(data[i-1][3])==float else 0\ 19 | ,(row[3]-row[2])/row[2],(row[4]-row[2])/row[2],(row[5]-row[2])/row[2]]) 20 | kidfeature.append(row[6:]) 21 | k=k+1 22 | else: 23 | if (k != row[1]): 24 | key = False 25 | k = row[1] 26 | if key ==True: 27 | kidfeature.append([((row[2] - data[i - 1][3]) / data[i - 1][3]) if type(data[i - 1][2]) == float else 0 \ 28 | , (row[3] - row[2]) / row[2], (row[4] - row[2]) / row[2],(row[5] - row[2]) / row[2]]) 29 | kidfeature.append(row[6:]) 30 | getfeature.append(sum(kidfeature,[])) 31 | tem = float(data[i + 1][2]) if i= 0.002: 33 | # label.append(1) 34 | # elif (tem - row[3]) / row[3] <= -0.002: 35 | # label.append(-1) 36 | # else: 37 | # label.append(0) 38 | if (tem - row[3]) / row[3] >0: 39 | label.append(1) 40 | else: 41 | label.append(0) 42 | kidfeature=[] 43 | k = 1 44 | key=True 45 | i=i+1 46 | return getfeature,label 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /SVM.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | from read_by2 import * 3 | from sklearn.decomposition import PCA 4 | from sklearn.svm import SVC 5 | from math import log 6 | import glob 7 | import sklearn.preprocessing 8 | import matplotlib.pyplot as plt 9 | 10 | def error_percentage(train_result,y_train,weight): 11 | error = 0 12 | percentage_1 = 0 13 | percentage_0 = 0 14 | for i in range(len(train_result)): 15 | if y_train[i] == 1: 16 | percentage_1 += 1 17 | if train_result[i] != y_train[i]: 18 | error += weight[1] * 1 19 | else: 20 | percentage_0 += 1 21 | if train_result[i] != y_train[i]: 22 | error += weight[0] * 1 23 | error_percentage = error / (percentage_1 * weight[1] + percentage_0 * weight[0]) 24 | return error_percentage 25 | 26 | def am_coefficient(clf,x_train,y_train): 27 | weight = clf.class_weight_ 28 | train_result = clf.predict(x_train) 29 | train_error_percentage=error_percentage(train_result,y_train,weight) 30 | a=1/2*log((1-train_error_percentage)/train_error_percentage) 31 | if a<0: 32 | a=0 33 | return a 34 | 35 | if __name__ =="__main__": 36 | feature=[] 37 | label=[] 38 | for file in glob.glob("F:\\study in school\\量化交易\\1\\000005.csv"): 39 | feature1,label1 = process_execl(file) 40 | feature+=feature1[5:-1] 41 | label+=label1[5:-1] 42 | # label_change =list(map(numbers_to_feature,label[5:-1])) 43 | feature = sklearn.preprocessing.MinMaxScaler().fit_transform(feature) 44 | #feature = sklearn.preprocessing.normalize(feature) 45 | pca=PCA(n_components=40) 46 | feature=pca.fit_transform(feature) 47 | print(pca.explained_variance_) 48 | 49 | x_train, x_test, y_train, y_test = train_test_split(feature, label, test_size=0.2) 50 | 51 | clf1_rbf = SVC(kernel='rbf',probability=True,class_weight='balanced') 52 | clf1_rbf.fit(x_train, y_train) 53 | a1=am_coefficient(clf1_rbf,x_train,y_train) 54 | 55 | clf2_rbf = SVC(kernel='rbf',probability=True) 56 | clf2_rbf.fit(x_train, y_train) 57 | a2=am_coefficient(clf2_rbf,x_train,y_train) 58 | 59 | clf3_sigmoid = SVC(kernel='sigmoid', probability=True,class_weight='balanced') 60 | clf3_sigmoid.fit(x_train, y_train) 61 | a3=am_coefficient(clf3_sigmoid,x_train,y_train) 62 | 63 | clf4_sigmoid = SVC(kernel='sigmoid', probability=True) 64 | clf4_sigmoid.fit(x_train, y_train) 65 | a4=am_coefficient(clf4_sigmoid,x_train,y_train) 66 | 67 | a_total=a1+a2+a3+a4 68 | result=clf1_rbf.predict_proba(x_test)*a1+clf2_rbf.predict_proba(x_test)*a2\ 69 | +clf3_sigmoid.predict_proba(x_test)*a3+clf4_sigmoid.predict_proba(x_test)*a4 70 | # print(result) 71 | result=result[:,1] 72 | real_reult=[] 73 | error=0 74 | error_1=0 75 | len_1=0 76 | for i in range(len(result)): 77 | if result[i]>a_total*0.5: 78 | real_reult.append(1) 79 | len_1+=1 80 | if y_test[i]!=1: 81 | error+=1 82 | error_1+=1 83 | else: 84 | real_reult.append(0) 85 | if y_test[i]!=0: 86 | error+=1 87 | 88 | print("rise recall:%f"%(error_1/len_1)) 89 | print(error/len(result)) 90 | print(y_test) 91 | print(real_reult) 92 | 93 | 94 | 95 | # x_train, x_test, y_train, y_test = train_test_split(feature, label, test_size=0.2) 96 | # clf_rbf = SVC(kernel='rbf',probability=True,class_weight='balanced') 97 | # clf_rbf.fit(x_train, y_train) 98 | # weight=clf_rbf.class_weight_ 99 | # print(clf_rbf.class_weight_) 100 | # print(len(x_train)) 101 | # print(len(clf_rbf.support_vectors_)) 102 | # train_result=clf_rbf.predict(x_train) 103 | # result=clf_rbf.predict(x_test) 104 | # print(y_test) 105 | # print(result) 106 | # 107 | # error= 0 108 | # percentage_1 = 0 109 | # for i in range(len(train_result)): 110 | # if train_result[i]!=y_train[i]: 111 | # error+=1 112 | # if y_train[i]==1: 113 | # percentage_1+=1 114 | # trainerror_percentage=error/len(train_result) 115 | # percentage_1=percentage_1/len(train_result) 116 | # print("training中1的比例为%f"%percentage_1) 117 | # print(trainerror_percentage) 118 | # 119 | # error = 0 120 | # percentage_1 = 0 121 | # for i in range(len(result)): 122 | # if result[i]!=y_test[i]: 123 | # if y_test[i]==0: 124 | # error+=weight[0]*1 125 | # else: 126 | # error+=weight[1]*1 127 | # if y_test[i]==1: 128 | # percentage_1+=1 129 | # testerror_percentage=error/(percentage_1*weight[1]+(len(result)-percentage_1)*weight[0]) 130 | # percentage_1 = percentage_1 / len(result) 131 | # print("test中1的比例为%f" % percentage_1) 132 | # print(testerror_percentage) 133 | # 134 | # clf_sigmoid = SVC(kernel='sigmoid',probability=True,class_weight=dict(zip([0,1],[1,1]))) 135 | # clf_sigmoid.fit(x_train, y_train) 136 | # print(clf_rbf.class_weight_) 137 | # print(len(x_train)) 138 | # print(len(clf_sigmoid.support_vectors_)) 139 | # train_result = clf_sigmoid.predict(x_train) 140 | # result = clf_sigmoid.predict(x_test) 141 | # print(result) 142 | # print(y_test) 143 | # error = 0 144 | # 145 | # for i in range(len(train_result)): 146 | # if train_result[i] != y_train[i]: 147 | # error += 1 148 | # trainerror_percentage = error / len(train_result) 149 | # print(trainerror_percentage) 150 | # 151 | # error = 0 152 | # percentage_1 = 0 153 | # for i in range(len(result)): 154 | # if result[i]!=y_test[i]: 155 | # if y_test[i]==0: 156 | # error+=weight[0]*1 157 | # else: 158 | # error+=weight[1]*1 159 | # if y_test[i]==1: 160 | # percentage_1+=1 161 | # testerror_percentage=error/(percentage_1*weight[1]+(len(result)-percentage_1)*weight[0]) 162 | # percentage_1 = percentage_1 / len(result) 163 | # print("test中1的比例为%f" % percentage_1) 164 | # print(testerror_percentage) 165 | 166 | --------------------------------------------------------------------------------