├── .gitignore ├── README.md ├── arimaPredicter.py ├── arima_knn ├── KNN_interface.py ├── KNN_interface.pyc ├── Version_5.py ├── arima_knn.py └── modify_submit.py ├── arima_xgboost ├── arima_xgboost.py ├── arima_xgboost_imporve.py ├── arima_xgboost_multi.py └── statanalysis.py ├── data ├── datam.csv ├── example.csv ├── lcdatam.csv ├── submit0.csv ├── submit1.csv └── train.csv ├── dataLoader.py ├── doc ├── compare.docx ├── manual.py ├── parareport.txt ├── report.pdf ├── report.txt ├── ~$ompare.docx └── 特征选择.xlsx ├── plot_pic ├── 1001_customers.jpg ├── 1002_customers.jpg ├── 1004_customers.jpg ├── 1005_customers.jpg ├── 1006_customers.jpg ├── 1007_customers.jpg ├── 1099_customers.jpg ├── 10_customers.jpg ├── 1101_customers.jpg ├── 1102_customers.jpg ├── 1103_customers.jpg ├── 1104_customers.jpg ├── 11_customers.jpg ├── 1201_customers.jpg ├── 1202_customers.jpg ├── 1203_customers.jpg ├── 1205_customers.jpg ├── 12_customers.jpg ├── 1301_customers.jpg ├── 1302_customers.jpg ├── 1306_customers.jpg ├── 1308_customers.jpg ├── 1399_customers.jpg ├── 13_customers.jpg ├── 1401_customers.jpg ├── 1402_customers.jpg ├── 1403_customers.jpg ├── 1404_customers.jpg ├── 14_customers.jpg ├── 1501_customers.jpg ├── 1502_customers.jpg ├── 1503_customers.jpg ├── 1504_customers.jpg ├── 1505_customers.jpg ├── 1507_customers.jpg ├── 1508_customers.jpg ├── 1509_customers.jpg ├── 1510_customers.jpg ├── 1511_customers.jpg ├── 1512_customers.jpg ├── 1513_customers.jpg ├── 1514_customers.jpg ├── 1515_customers.jpg ├── 1516_customers.jpg ├── 1517_customers.jpg ├── 1518_customers.jpg ├── 1519_customers.jpg ├── 1520_customers.jpg ├── 1521_customers.jpg ├── 15_customers.jpg ├── 2001_customers.jpg ├── 2002_customers.jpg ├── 2003_customers.jpg ├── 2004_customers.jpg ├── 2005_customers.jpg ├── 2006_customers.jpg ├── 2007_customers.jpg ├── 2008_customers.jpg ├── 2009_customers.jpg ├── 2010_customers.jpg ├── 2011_customers.jpg ├── 2012_customers.jpg ├── 2013_customers.jpg ├── 2014_customers.jpg ├── 2015_customers.jpg ├── 20_customers.jpg ├── 2101_customers.jpg ├── 2102_customers.jpg ├── 2103_customers.jpg ├── 2104_customers.jpg ├── 2105_customers.jpg ├── 2106_customers.jpg ├── 2107_customers.jpg ├── 2108_customers.jpg ├── 21_customers.jpg ├── 2201_customers.jpg ├── 2202_customers.jpg ├── 2203_customers.jpg ├── 2204_customers.jpg ├── 2205_customers.jpg ├── 2206_customers.jpg ├── 2207_customers.jpg ├── 2208_customers.jpg ├── 2209_customers.jpg ├── 2210_customers.jpg ├── 2211_customers.jpg ├── 2212_customers.jpg ├── 22_customers.jpg ├── 2301_customers.jpg ├── 2302_customers.jpg ├── 2303_customers.jpg ├── 2304_customers.jpg ├── 2305_customers.jpg ├── 2306_customers.jpg ├── 2307_customers.jpg ├── 2308_customers.jpg ├── 2309_customers.jpg ├── 2310_customers.jpg ├── 2311_customers.jpg ├── 2312_customers.jpg ├── 2313_customers.jpg ├── 2314_customers.jpg ├── 2315_customers.jpg ├── 2316_customers.jpg ├── 2317_customers.jpg ├── 23_customers.jpg ├── 3001_customers.jpg ├── 3002_customers.jpg ├── 3003_customers.jpg ├── 3004_customers.jpg ├── 3005_customers.jpg ├── 3006_customers.jpg ├── 3007_customers.jpg ├── 3008_customers.jpg ├── 3009_customers.jpg ├── 3010_customers.jpg ├── 3011_customers.jpg ├── 3012_customers.jpg ├── 3013_customers.jpg ├── 3014_customers.jpg ├── 3015_customers.jpg ├── 3016_customers.jpg ├── 3017_customers.jpg ├── 3018_customers.jpg ├── 30_customers.jpg ├── 3101_customers.jpg ├── 3102_customers.jpg ├── 3104_customers.jpg ├── 3105_customers.jpg ├── 3106_customers.jpg ├── 3107_customers.jpg ├── 3108_customers.jpg ├── 3109_customers.jpg ├── 3110_customers.jpg ├── 3111_customers.jpg ├── 3112_customers.jpg ├── 3113_customers.jpg ├── 3114_customers.jpg ├── 3115_customers.jpg ├── 3116_customers.jpg ├── 3117_customers.jpg ├── 3118_customers.jpg ├── 3119_customers.jpg ├── 3120_customers.jpg ├── 3121_customers.jpg ├── 3122_customers.jpg ├── 3125_customers.jpg ├── 3126_customers.jpg ├── 3128_customers.jpg ├── 31_customers.jpg ├── 3208_customers.jpg ├── 3212_customers.jpg ├── 3213_customers.jpg ├── 3215_customers.jpg ├── 3216_customers.jpg ├── 3217_customers.jpg ├── 3218_customers.jpg ├── 3227_customers.jpg ├── 32_customers.jpg ├── 3301_customers.jpg ├── 3303_customers.jpg ├── 3311_customers.jpg ├── 3313_customers.jpg ├── 3314_customers.jpg ├── 3315_customers.jpg ├── 3316_customers.jpg ├── 3317_customers.jpg ├── 3319_customers.jpg ├── 3320_customers.jpg ├── 3321_customers.jpg ├── 3322_customers.jpg ├── 3323_customers.jpg ├── 3325_customers.jpg ├── 3326_customers.jpg ├── 3328_customers.jpg ├── 3330_customers.jpg ├── 33_customers.jpg ├── 3401_customers.jpg ├── 3402_customers.jpg ├── 3403_customers.jpg ├── 3404_customers.jpg ├── 3405_customers.jpg ├── 3406_customers.jpg ├── 3407_customers.jpg ├── 3408_customers.jpg ├── 3409_customers.jpg ├── 3410_customers.jpg ├── 3412_customers.jpg ├── 3413_customers.jpg ├── 3414_customers.jpg ├── 3415_customers.jpg ├── 3416_customers.jpg ├── 3417_customers.jpg ├── 3419_customers.jpg ├── 3421_customers.jpg ├── 3423_customers.jpg ├── 3424_customers.jpg ├── 3426_customers.jpg ├── 3427_customers.jpg ├── 3428_customers.jpg ├── 3429_customers.jpg ├── 3431_customers.jpg ├── 3432_customers.jpg ├── 3436_customers.jpg ├── 34_customers.jpg └── 异常日期.txt ├── rnn ├── test.py ├── test2.py └── test3.py ├── tools ├── backup.py ├── csvloader.py ├── csvloader_largeClass.py ├── dataModify.py └── fileChecker.py ├── v3 ├── May_input.csv ├── Readme.txt ├── Version_3.py ├── commit_empty.csv ├── features.csv ├── five_fold.py ├── five_fold_feature_v3.csv ├── preparedata.py ├── submit.csv ├── test.csv ├── timeseries_customers.csv ├── timeseries_discounts.csv ├── train.csv └── 调参.txt ├── v5 ├── KNN_interface.py ├── Readme.txt ├── Version_5.py ├── commit_empty.csv ├── modify_submit.py ├── submit.csv ├── timeseries_customers_processed.csv └── 调参.txt ├── v6_stacking ├── .idea │ ├── misc.xml │ ├── modules.xml │ ├── v6_stacking.iml │ └── workspace.xml ├── Version6_stacking.py ├── commit_empty.csv ├── cv │ ├── arima01.csv │ ├── arima11.csv │ ├── arima12.csv │ ├── arima_cv.py │ ├── result01.csv │ ├── result11.csv │ ├── result12.csv │ ├── xgboost_cv.csv │ └── xgboost_cv.py ├── five_fold_feature_v3.csv ├── five_fold_feature_xgboost.csv ├── merged_feature.csv ├── prepare_data.py ├── test.csv ├── timeseries_customers.csv └── train.csv └── xgboostPredicter.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pyc 3 | report -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BDCI2017 2 | 3 | 2017年CCF大数据与计算智能大赛-小超市供销存管理优化 4 | 5 | ## 参赛人员 6 | 7 | Wang Jun cnwj@mail.ustc.edu.cn 8 | Wang Fei wf314159@mail.ustc.edu.cn 9 | 10 | ## 算法说明与结果报告 11 | 12 | 见doc文件夹下report.pdf 13 | 14 | ## 文件说明 15 | 16 | ### arimaPredicter.py 17 | 18 | 封装后的Sarima预测器 19 | 20 | ### xgboostPredicter.py 21 | 22 | 封装后的xgboost预测器 23 | 24 | ### dataLoader.py 25 | 26 | 封装后的数据读取类 27 | 28 | ### data文件夹 29 | 30 | 比赛数据 31 | 32 | train.csv 比赛给定的训练数据 33 | 34 | example.csv 比赛给定的结果样本 35 | 36 | datam.csv 预处理后的中类样本 37 | 38 | lcdatam.csv 预处理后的大类样本 39 | 40 | submit0.csv submit1.csv 比赛中提交的两个文件 41 | 42 | ### tools文件夹 43 | 44 | 用于预处理的工具 45 | 46 | ### doc文件夹 47 | 48 | 相关文档 49 | 50 | report.pdf 实验报告 51 | 52 | manual.py Sarima预测器与xgboost预测器的使用指南 53 | 54 | ### arima_knn文件夹 55 | 56 | 基于arima、knn的集成学习 57 | 58 | ### arima_xgboost文件夹 59 | 60 | 基于arima、xgboost的集成学习 61 | 62 | arima_xgboost_multi.py 是实验最终用于预测的集成学习预测器 63 | 64 | ### plot_pic文件夹 65 | 66 | 销量-时间图 67 | 68 | ### rnn文件夹 69 | 70 | 基于LSTM的学习器(未封装,最终未使用) 71 | 72 | ### v3文件夹 73 | 74 | 基于随机森林的学习器(未封装,最终未使用) 75 | 76 | ### v5文件夹 77 | 78 | 基于knn的学习器(未封装,最终未使用) 79 | 80 | ### v6_stacking文件夹 81 | 82 | 基于stacking的集成学习预测器 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /arimaPredicter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 15 12:25:37 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | from numpy import array 9 | from numpy import log 10 | from numpy import exp 11 | import math 12 | 13 | import datetime as dt 14 | import pandas as pd 15 | from statsmodels.tsa.statespace.sarimax import SARIMAX 16 | import statsmodels.api as sm 17 | 18 | import matplotlib.pylab as plt 19 | from statsmodels.tsa.stattools import adfuller 20 | 21 | class predicter(): 22 | def __init__(self): 23 | self.ParaChoose = {} 24 | self.dtIndex = [] 25 | 26 | def setIndex(self, index): 27 | self.dtIndex = index[:] 28 | 29 | def getIndex(self): 30 | return self.dtIndex 31 | 32 | def createIndex(self, date_from, length): 33 | delta = dt.timedelta(days=1) 34 | now = date_from 35 | self.dtIndex = [] 36 | for i in range(0, length): 37 | self.dtIndex.append(now) 38 | now = now + delta 39 | return self.dtIndex 40 | 41 | def setPara(self, clas, para): 42 | if (type(para)!=tuple or len(para)!=2): 43 | raise TypeError("timeserise should be (ar, ma)") 44 | self.ParaChoose[clas] = para 45 | 46 | def getPara(self): 47 | return self.ParaChoose 48 | 49 | def test_stationarity(self, timeseries): 50 | #Determing rolling statistics 51 | if (type(timeseries) == list): 52 | length = len(timeseries) 53 | timeseries = pd.Series(timeseries) 54 | timeseries.index = pd.Index(self.dtIndex[0:length]) 55 | elif (type(timeseries) != pd.core.series.Series): 56 | raise TypeError("timeserise should be a list or series") 57 | rolmean = timeseries.rolling(window=12,center=False).mean() 58 | rolstd = timeseries.rolling(window=12,center=False).std() 59 | 60 | #Plot rolling statistics: 61 | plt.plot(timeseries, color='blue',label='Original') 62 | plt.plot(rolmean, color='red', label='Rolling Mean') 63 | plt.plot(rolstd, color='black', label = 'Rolling Std') 64 | plt.legend(loc='best') 65 | plt.title('Rolling Mean & Standard Deviation') 66 | plt.show(block=False) 67 | 68 | #Perform Dickey-Fuller test: 69 | print('Results of Dickey-Fuller Test:') 70 | dftest = adfuller(timeseries, autolag='AIC') 71 | dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) 72 | for key,value in dftest[4].items(): 73 | dfoutput['Critical Value (%s)'%key] = value 74 | print(dfoutput) 75 | 76 | #Get AR and MA parameter 77 | fig = plt.figure(figsize=(12,8)) 78 | ax1=fig.add_subplot(211) 79 | fig = sm.graphics.tsa.plot_acf(timeseries, lags=20, ax=ax1) 80 | ax2 = fig.add_subplot(212) 81 | fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2) 82 | plt.show(block=False) 83 | 84 | def sarimaTrain(self, trainLabel, classNo=0, para=()): 85 | dataLength = len(trainLabel) 86 | data = pd.Series(trainLabel) 87 | for i in range(0, dataLength): 88 | data[i] = log(data[i] + 1) 89 | index = self.dtIndex[0:dataLength] 90 | data.index = pd.Index(index) 91 | 92 | if (len(para) != 2): 93 | try: 94 | (ar, ma) = self.ParaChoose[classNo] 95 | except KeyError: 96 | print("%d: parameter not set, use (1,1) default" % classNo) 97 | (ar, ma) = (1, 1) 98 | return SARIMAX(data, order=(ar,1,ma), seasonal_order=(0,1,1,7)).fit() 99 | else: 100 | return SARIMAX(data, order=(para[0], 1, para[1]), seasonal_order=(0,1,1,7)).fit() 101 | 102 | def sarimaParaSelect(self, classNo, trainLabel, testLabel, useAic=False): 103 | dataLength = len(trainLabel) 104 | data = pd.Series(trainLabel) 105 | for i in range(0, dataLength): 106 | data[i] = log(data[i] + 1) 107 | index = self.dtIndex[0:dataLength] 108 | data.index = pd.Index(index) 109 | 110 | minBias = 99999.0 111 | minAic = 99999.0 112 | (ar, ma) = (0, 0) 113 | label = array(testLabel) 114 | for p, q in [(1, 1), (0, 1), (1, 2), (2, 0), (2, 1), (2, 2)]: 115 | try: 116 | model = SARIMAX(data, order=(p,1,q), seasonal_order=(0,1,1,7)).fit() 117 | output = array(model.forecast(len(testLabel))) 118 | for i in range(0, len(testLabel)): 119 | output[i] = exp(output[i]) - 1 120 | bias = math.sqrt(sum((output-label)*(output-label))/len(testLabel)) 121 | if (bias < minBias and (useAic == False or model.aic < minAic)): 122 | (ar, ma) = (p, q) 123 | minBias = bias 124 | minAic = model.aic 125 | bestOutput = output 126 | except: 127 | pass 128 | 129 | if (minBias < 90000.0): 130 | self.ParaChoose[classNo] = (ar, ma) 131 | return ((ar, ma), bestOutput) 132 | else: 133 | raise ValueError 134 | 135 | def checkBias(self, model, trainLabel): 136 | dataLength = len(trainLabel) 137 | data = pd.Series(trainLabel) 138 | index = self.dtIndex[0:dataLength] 139 | data.index = pd.Index(index) 140 | 141 | pred = model.predict() 142 | plt.plot(data, color='blue',label='Original') 143 | plt.plot(pred, color='red', label='Predicted') 144 | plt.show(block=False) 145 | return list(data - pred) 146 | 147 | @staticmethod 148 | def sarimaPredict(model, predictLength): 149 | output = model.forecast(predictLength) 150 | for i in range(0, predictLength): 151 | output[i] = exp(output[i]) - 1 152 | return array(output) 153 | -------------------------------------------------------------------------------- /arima_knn/KNN_interface.py: -------------------------------------------------------------------------------- 1 | # IDE not support Chinese 2 | 3 | from sklearn.neighbors import KNeighborsRegressor 4 | import numpy as np 5 | import csv 6 | 7 | def knn(data, pred_length, D_window=14, max_k=7): 8 | if pred_length + D_window >= len(data): 9 | print('ERROR: pred_length or D_window too long') 10 | return None 11 | 12 | ret_ypred = [] 13 | for h in range(4): 14 | train_feature, train_label = get_train_set(data, h, D_window, pred_length) 15 | 16 | e_LOO_arr = np.zeros(max_k) 17 | for k in range(2, max_k + 1): 18 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 19 | model.fit(train_feature, train_label) 20 | 21 | dist_list, index_list = model.kneighbors([data[0 - D_window:]]) 22 | k_neighbor_label = [] 23 | for i in index_list[0]: 24 | k_neighbor_label.append(train_label[i]) 25 | 26 | ypred = model.predict([data[0-D_window:]]) 27 | ypred = np.asarray(list(map(round, ypred[0]))) 28 | 29 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k) 30 | 31 | k_min = np.argmin(e_LOO_arr[1:]) + 2 32 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto') 33 | model.fit(train_feature, train_label) 34 | ypred = model.predict([data[0 - D_window:]]) 35 | ret_ypred += list(map(round, ypred[0])) 36 | 37 | return np.asarray(ret_ypred) 38 | 39 | 40 | def get_train_set(train_data, h, D, pred_length): 41 | feature, label = [], [] 42 | block_len = int(pred_length / 4) 43 | if h != 3: 44 | for i in range(len(train_data) - D - block_len * (h + 1) + 1): 45 | feature.append(train_data[i:i + D]) 46 | label.append(train_data[i + D + block_len * h:i + D + block_len * h + block_len]) 47 | else: 48 | for i in range(len(train_data) - D - pred_length + 1): 49 | feature.append(train_data[i:i + D]) 50 | label.append(train_data[i + D + 3 * block_len:i + D + pred_length]) 51 | return np.array(feature), np.array(label) 52 | 53 | def LOO(k_neighbor_label, ypred, k): 54 | ret = 0 55 | for neighbor in k_neighbor_label: 56 | ret = ret + ((neighbor - ypred) ** 2).sum() 57 | ret = ret * k / (k - 1)**2 58 | # ret = ret / (k)**2 59 | return ret 60 | 61 | 62 | def test(): 63 | with open('timeseries_customers_processed.csv') as input_file: 64 | input_csv = csv.reader(input_file) 65 | next(input_csv) 66 | row = next(input_csv) 67 | data = list(map(float, row[1:])) 68 | print(knn(data, 30)) 69 | 70 | 71 | if __name__ == '__main__': 72 | test() 73 | -------------------------------------------------------------------------------- /arima_knn/KNN_interface.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/arima_knn/KNN_interface.pyc -------------------------------------------------------------------------------- /arima_knn/Version_5.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from sklearn.neighbors import KNeighborsRegressor 3 | import numpy as np 4 | 5 | from modify_submit import change_pred 6 | 7 | 8 | def main_fun(): 9 | class_codes = ['1201', '2011', '12', '15', '20', '22', '23', '30'] 10 | with open('timeseries_customers_processed.csv') as input_file: 11 | input_csv = csv.reader(input_file) 12 | next(input_csv) 13 | for row in input_csv: 14 | if row[0] in class_codes: 15 | # MIMO_KNN_test(row) 16 | # MIMO_KNN_LOO_test(row) 17 | MIMO_KNN_LOO_May(row) 18 | 19 | 20 | # 划分数据集测试不同参数(D_window, k),没有预测5月份销量 21 | def MIMO_KNN_test(data): 22 | code = data[0] 23 | data = list(map(float, data[1:])) 24 | train_data = data[:90] 25 | test_data = data[90:] 26 | 27 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天 28 | D_window = 14 29 | for h in range(4): 30 | train_feature, train_label = get_train_set(train_data, h, D_window) 31 | y_label = get_test_label(test_data, h) 32 | 33 | for k in range(1, 8): 34 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 35 | model.fit(train_feature, train_label) 36 | 37 | ypred = model.predict([train_data[0-D_window:]]) 38 | ypred = np.array(list(map(round, ypred[0]))) 39 | 40 | rmse = np.sqrt(((ypred - y_label) ** 2).mean()) 41 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse) 42 | 43 | 44 | # 划分数据集,实现论文里的方法,没有预测5月份销量 45 | def MIMO_KNN_LOO_test(data): 46 | code = data[0] 47 | data = list(map(float, data[1:])) 48 | train_data = data[:90] 49 | test_data = data[90:] 50 | 51 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天 52 | D_window = 14 53 | max_k = 7 54 | for h in range(4): 55 | train_feature, train_label = get_train_set(train_data, h, D_window) 56 | y_label = get_test_label(test_data, h) 57 | 58 | e_LOO_arr = np.zeros(max_k) 59 | for k in range(2, max_k + 1): 60 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 61 | model.fit(train_feature, train_label) 62 | 63 | # 获取k近邻 64 | dist_list, index_list = model.kneighbors([train_data[0 - D_window:]]) 65 | k_neighbor_label = [] 66 | for i in index_list[0]: 67 | k_neighbor_label.append(train_label[i]) 68 | 69 | # 基于k近邻的预测值 70 | ypred = model.predict([train_data[0-D_window:]]) 71 | ypred = np.asarray(list(map(round, ypred[0]))) 72 | rmse = np.sqrt(((ypred - y_label) ** 2).mean()) 73 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse) 74 | 75 | # 计算e_LOO 76 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k) 77 | 78 | # 取e_LOO最小的k值 79 | k_min = np.argmin(e_LOO_arr[1:]) + 2 80 | print('k_min=', k_min) 81 | 82 | 83 | # 使用整个数据集,实现论文里的方法,预测5月份销量 84 | def MIMO_KNN_LOO_May(data): 85 | code = data[0] 86 | data = list(map(float, data[1:])) 87 | 88 | D_window = 14 89 | max_k = 7 90 | pred_May = [] 91 | for h in range(4): 92 | train_feature, train_label = get_train_set(data, h, D_window) 93 | e_LOO_arr = np.zeros(max_k) 94 | for k in range(2, max_k + 1): 95 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 96 | model.fit(train_feature, train_label) 97 | 98 | # 获取k近邻 99 | dist_list, index_list = model.kneighbors([data[0 - D_window:]]) 100 | k_neighbor_label = [] 101 | for i in index_list[0]: 102 | k_neighbor_label.append(train_label[i]) 103 | 104 | # 基于k近邻的预测值 105 | ypred = model.predict([data[0 - D_window:]]) 106 | ypred = np.asarray(list(map(round, ypred[0]))) 107 | 108 | # 计算e_LOO 109 | e_LOO_arr[k - 1] = LOO(k_neighbor_label, ypred, k) 110 | 111 | # 取e_LOO最小的k值 112 | k_min = np.argmin(e_LOO_arr[1:]) + 2 113 | 114 | # 令k=k_min,做预测 115 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto') 116 | model.fit(train_feature, train_label) 117 | ypred = model.predict([data[0 - D_window:]]) 118 | ypred = list(map(round, ypred[0])) 119 | pred_May = pred_May + ypred 120 | 121 | print(pred_May) 122 | # 替换文件里编码为code的预测值 123 | change_pred(code, pred_May) 124 | 125 | 126 | # 计算LOO,用于k(近邻数)的选择 127 | def LOO(k_neighbor_label, ypred, k): 128 | ret = 0 129 | for neighbor in k_neighbor_label: 130 | ret = ret + ((neighbor - ypred) ** 2).sum() 131 | ret = ret * k / (k - 1)**2 132 | # ret = ret / (k)**2 133 | return ret 134 | 135 | 136 | def get_train_set(train_data, h, D): 137 | feature, label = [], [] 138 | if h != 3: 139 | for i in range(len(train_data) - D - 7 * (h+1) + 1): 140 | feature.append(train_data[i:i+D]) 141 | label.append(train_data[i+D+7*h:i+D+7*h+7]) 142 | else: 143 | for i in range(len(train_data) - D - 30 + 1): 144 | feature.append(train_data[i:i+D]) 145 | label.append(train_data[i+D+21:i+D+30]) 146 | return np.array(feature), np.array(label) 147 | 148 | 149 | def get_test_label(test_data, h): 150 | if h != 3: 151 | return test_data[7*h:7*h+7] 152 | else: 153 | return test_data[21:] 154 | 155 | 156 | if __name__ == '__main__': 157 | main_fun() 158 | -------------------------------------------------------------------------------- /arima_knn/arima_knn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | from numpy import array 9 | from numpy import zeros 10 | import csv 11 | import math 12 | import datetime as dt 13 | 14 | import arimaPredicter 15 | import dataLoader 16 | import KNN_interface 17 | 18 | larclasPred = {} 19 | larclasLabl = {} 20 | totalBias = 0 21 | totalCount = 0 22 | 23 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)] 24 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))] 25 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)] 26 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))] 27 | 28 | modelChoose = [] 29 | lcModelChoose = [] 30 | 31 | ap = arimaPredicter.predicter() 32 | ap.setIndex(dtIndex) 33 | 34 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl): 35 | with open('compare.csv', 'ab') as f: 36 | writer = csv.writer(f) 37 | count = 1 38 | writer.writerow([midclass, accuracy]) 39 | for x in trainLabl: 40 | writer.writerow([count, x]) 41 | count += 1 42 | for x in range(0, len(testPred)): 43 | writer.writerow([count, testLabl[x], testPred[x]]) 44 | count += 1 45 | 46 | def modelselect(trainSize, testSize, skipSize = 0): 47 | global larclasPred, totalBias, totalCount, modelChoose, lcModelChoose, ap 48 | larclasPred = {} 49 | totalBias = 0 50 | totalCount = 0 51 | modelChoose = [] 52 | lcModelChoose = [] 53 | 54 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 55 | loader.setSize(trainSize, testSize, skipSize) 56 | 57 | # middle class 58 | while (True): 59 | midclass, trD, trL, _, teL = loader.getNextMidClass() 60 | if (midclass == 0): 61 | break 62 | else: 63 | # sarima model 64 | try: 65 | model = ap.sarimaTrain(midclass, trL, teL) 66 | teP1 = ap.sarimaPredict(model, testSize) 67 | except: 68 | teP1 = zeros(testSize) 69 | 70 | # kNN model 71 | try: 72 | teP2 = KNN_interface.knn(trL, testSize) 73 | except: 74 | print("Warning: kNN train fail") 75 | teP2 = zeros(testSize) 76 | 77 | # just zero 78 | teP3 = zeros(testSize) 79 | 80 | # count bias of midclass and update larclass 81 | label = array(teL) 82 | larclass = int(midclass/100) 83 | totalCount += testSize 84 | 85 | bias1 = sum((teP1-label)*(teP1-label)) 86 | bias2 = sum((teP2-label)*(teP2-label)) 87 | bias3 = sum((teP3-label)*(teP3-label)) 88 | if (bias3 <= bias1 and bias3 <= bias2): 89 | totalBias += bias3 90 | bias3 = math.sqrt(bias3/testSize) 91 | print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3) 92 | modelChoose.append(3) 93 | if (larclass in larclasPred): 94 | larclasPred[larclass] += teP3 95 | else: 96 | larclasPred[larclass] = teP3 97 | elif (bias1 <= bias2): 98 | totalBias += bias1 99 | bias1 = math.sqrt(bias1/testSize) 100 | print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1) 101 | modelChoose.append(1) 102 | if (larclass in larclasPred): 103 | larclasPred[larclass] += teP1 104 | else: 105 | larclasPred[larclass] = teP1 106 | else: 107 | totalBias += bias2 108 | bias2 = math.sqrt(bias2/testSize) 109 | print "(Midclass %d select kNN, accuracy: %f)" % (midclass, bias2) 110 | modelChoose.append(2) 111 | if (larclass in larclasPred): 112 | larclasPred[larclass] += teP2 113 | else: 114 | larclasPred[larclass] = teP2 115 | 116 | # large class 117 | while (True): 118 | larclass, trD, trL, _, teL = loader.getNextLarClass() 119 | if (larclass == 0): 120 | break 121 | else: 122 | # sarima model 123 | try: 124 | model = ap.sarimaTrain(larclass, trL, teL) 125 | teP1 = ap.sarimaPredict(model, testSize) 126 | except: 127 | teP1 = zeros(testSize) 128 | 129 | # knn model 130 | try: 131 | teP2 = KNN_interface.knn(trL, testSize) 132 | except: 133 | print("Warning: kNN train fail") 134 | teP2 = zeros(testSize) 135 | 136 | # sum of midclasses 137 | teP3 = larclasPred[larclass] 138 | 139 | # count bias of midclass and update larclass 140 | label = array(teL) 141 | totalCount += testSize 142 | 143 | bias1 = sum((teP1-label)*(teP1-label)) 144 | bias2 = sum((teP2-label)*(teP2-label)) 145 | bias3 = sum((teP3-label)*(teP3-label)) 146 | if (bias3 <= bias1 and bias3 <= bias2): 147 | totalBias += bias3 148 | bias3 = math.sqrt(bias3/testSize) 149 | print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3) 150 | lcModelChoose.append(3) 151 | elif (bias1 <= bias2): 152 | totalBias += bias1 153 | bias1 = math.sqrt(bias1/testSize) 154 | print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1) 155 | lcModelChoose.append(1) 156 | else: 157 | totalBias += bias2 158 | bias2 = math.sqrt(bias2/testSize) 159 | print "(Larclass %d select kNN, accuracy: %f)" % (larclass, bias2) 160 | lcModelChoose.append(2) 161 | 162 | totalBias = math.sqrt(totalBias/totalCount) 163 | print "(Predict finished, accuracy: %f)" % (totalBias) 164 | loader.closeFiles() 165 | 166 | def submit(trainSize): 167 | global larclasPred, ap 168 | larclasPred = {} 169 | 170 | f1 = open("submit.csv", "r") 171 | submit_csv = csv.reader(f1) 172 | submit_csv.next() 173 | f2 = open('submit1.csv', 'wb') 174 | writer = csv.writer(f2) 175 | 176 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 177 | loader.setSize(trainSize) 178 | 179 | # middle class 180 | current = 0 181 | while (True): 182 | midclass, trD, trL, teD, teL = loader.getNextMidClass() 183 | if (midclass == 0): 184 | break 185 | else: 186 | if (modelChoose[current] == 1): 187 | try: 188 | model = ap.sarimaTrain(midclass, trL) 189 | teP = ap.sarimaPredict(model, 30) 190 | except: 191 | print("%d: failed to use arima, use kNN instead" % midclass) 192 | teP = KNN_interface.knn(trL, 30) 193 | elif (modelChoose[current] == 2): 194 | teP = KNN_interface.knn(trL, 30) 195 | else: 196 | teP = zeros(30) 197 | current += 1 198 | 199 | for x in teP: 200 | x_int = round(x) 201 | row = submit_csv.next() 202 | if (int(row[0]) != midclass): 203 | raise KeyError 204 | writer.writerow([row[0], row[1], x_int]) 205 | 206 | # count larclass 207 | larclass = int(midclass/100) 208 | if (larclass in larclasPred): 209 | larclasPred[larclass] += teP 210 | else: 211 | larclasPred[larclass] = teP 212 | 213 | # large class 214 | current = 0 215 | while (True): 216 | larclass, trD, trL, teD, teL = loader.getNextLarClass() 217 | if (larclass == 0): 218 | break 219 | else: 220 | if (lcModelChoose[current] == 1): 221 | try: 222 | model = ap.sarimaTrain(larclass, trL) 223 | teP = ap.sarimaPredict(model, 30) 224 | except: 225 | print("%d: failed to use arima, use kNN instead" % larclass) 226 | teP = KNN_interface.knn(trL, 30) 227 | elif (lcModelChoose[current] == 2): 228 | teP = KNN_interface.knn(trL, 30) 229 | else: 230 | teP = larclasPred[larclass] 231 | current += 1 232 | 233 | # write file - larclass 234 | for x in teP: 235 | x_int = round(x) 236 | row = submit_csv.next() 237 | if (int(row[0]) != larclass): 238 | raise KeyError 239 | writer.writerow([row[0], row[1], x_int]) 240 | 241 | f1.close() 242 | f2.close() 243 | loader.closeFiles() 244 | 245 | modelselect(75, 30, 15) 246 | """ 247 | with open("report.txt", "w") as f: 248 | for clas in arimaParaChoose: 249 | f.writelines("class %d: (%d,%d)\n" % (clas, arimaParaChoose[clas][0], arimaParaChoose[clas][1])) 250 | """ 251 | submit(120) -------------------------------------------------------------------------------- /arima_knn/modify_submit.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | # 读取原预测文件,预测结果取整再写回去 5 | def get_round(): 6 | rows = [] 7 | with open('submit.csv') as input_file: 8 | input_csv = csv.reader(input_file) 9 | rows.append(next(input_csv)) 10 | for row in input_csv: 11 | row[2] = str(int(round(float(row[2])))) 12 | rows.append(row) 13 | with open('submit.csv', 'w', newline='') as output_file: 14 | output_csv = csv.writer(output_file) 15 | for row in rows: 16 | output_csv.writerow(row) 17 | 18 | 19 | # 将预测文件中编码为code的类别预测值用pred替换 20 | def change_pred(code, pred): 21 | rows = [] 22 | file_name = 'submit_WJ_2.csv' 23 | with open(file_name) as input_file: 24 | input_csv = csv.reader(input_file) 25 | rows.append(next(input_csv)) 26 | i = 0 27 | for row in input_csv: 28 | if row[0] == code: 29 | rows.append([code, row[1], str(pred[i])]) 30 | i += 1 31 | else: 32 | rows.append(row) 33 | with open(file_name, 'w', newline='') as output_file: 34 | output_csv = csv.writer(output_file) 35 | for row in rows: 36 | output_csv.writerow(row) 37 | 38 | 39 | if __name__ == '__main__': 40 | get_round() 41 | -------------------------------------------------------------------------------- /arima_xgboost/arima_xgboost.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import xgboost as xgb 9 | import arimaPredicter 10 | import dataLoader 11 | 12 | from numpy import array 13 | from numpy import zeros 14 | import csv 15 | import math 16 | import datetime as dt 17 | 18 | ap = arimaPredicter.predicter() 19 | ap.createIndex(dt.datetime(2015,1,1), 243) 20 | 21 | modelChoose = {} 22 | 23 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl): 24 | with open('compare.csv', 'ab') as f: 25 | writer = csv.writer(f) 26 | count = 1 27 | writer.writerow([midclass, accuracy]) 28 | for x in trainLabl: 29 | writer.writerow([count, x]) 30 | count += 1 31 | for x in range(0, len(testPred)): 32 | writer.writerow([count, testLabl[x], testPred[x]]) 33 | count += 1 34 | 35 | def xgboostPredict(trainData, trainLabel, dataToPredict, 36 | params = {"objective":"reg:linear", "max_depth":1, "gamma":2}): 37 | dtrain = xgb.DMatrix(trainData, trainLabel) 38 | gbm = xgb.train(dtrain=dtrain, params=params) 39 | return gbm.predict(xgb.DMatrix(dataToPredict)) 40 | 41 | def simulateFeature(trainData, musk): 42 | for feature in trainData: 43 | for i in musk: 44 | feature[i] = 0 45 | 46 | def createFeature(date_from, length, zeros, DictHoilday, DictBeforeHoilday, 47 | DictWorkday): 48 | delta = dt.timedelta(days=1) 49 | now = date_from 50 | index = [] 51 | for i in range(0, length): 52 | index.append(now) 53 | now = now + delta 54 | feature = [] 55 | empty = [0 for x in range(0, zeros+4)] 56 | for i in range(0, length): 57 | x = empty[:] 58 | x[0] = index[i].day 59 | x[1] = (index[i].weekday() + 1) % 7 60 | dayCount = i + 1 61 | if (dayCount in DictHoilday): 62 | x[3] = 1 63 | elif (dayCount in DictBeforeHoilday): 64 | x[2] = 1 65 | elif (dayCount in DictWorkday): 66 | if (x[1]==6 or ((dayCount+1) in DictHoilday)): 67 | x[2] = 1 68 | elif (x[1]==0 or x[1]==6): 69 | x[3] = 1 70 | elif (x[1]==5): 71 | x[2] = 1 72 | feature.append(x) 73 | return feature 74 | 75 | def setModel(clas, model): 76 | global modelChoose 77 | if (clas not in modelChoose): 78 | modelChoose[clas] = model 79 | elif (model < modelChoose[clas]): 80 | modelChoose[clas] = model 81 | 82 | def modelselect(trainSize, testSize, skipSize = 0): 83 | larclasPred = {} 84 | totalBias = 0 85 | totalCount = 0 86 | 87 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 88 | loader.setSize(trainSize, testSize, skipSize) 89 | 90 | # middle class 91 | while (True): 92 | midclass, trD, trL, teD, teL = loader.getNextMidClass() 93 | if (midclass == 0): 94 | break 95 | else: 96 | 97 | # sarima model 98 | try: 99 | model = ap.sarimaTrain(midclass, trL, teL) 100 | teP1 = ap.sarimaPredict(model, testSize) 101 | except: 102 | teP1 = zeros(testSize) 103 | 104 | # xgboost model 105 | simulateFeature(teD, [-2, -1]) 106 | try: 107 | teP2 = xgboostPredict(array(trD), array(trL), array(teD)) 108 | except: 109 | teP2 = zeros(testSize) 110 | 111 | # just zero 112 | teP3 = zeros(testSize) 113 | 114 | # count bias of midclass and update larclass 115 | label = array(teL) 116 | larclass = int(midclass/100) 117 | totalCount += testSize 118 | 119 | bias1 = sum((teP1-label)*(teP1-label)) 120 | bias2 = sum((teP2-label)*(teP2-label)) 121 | bias3 = sum((teP3-label)*(teP3-label)) 122 | if (bias3 <= bias1 and bias3 <= bias2): 123 | totalBias += bias3 124 | bias3 = math.sqrt(bias3/testSize) 125 | print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3) 126 | setModel(midclass, 3) 127 | if (larclass in larclasPred): 128 | larclasPred[larclass] += teP3 129 | else: 130 | larclasPred[larclass] = teP3 131 | elif (bias1 <= bias2): 132 | totalBias += bias1 133 | bias1 = math.sqrt(bias1/testSize) 134 | print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1) 135 | setModel(midclass, 1) 136 | if (larclass in larclasPred): 137 | larclasPred[larclass] += teP1 138 | else: 139 | larclasPred[larclass] = teP1 140 | else: 141 | totalBias += bias2 142 | bias2 = math.sqrt(bias2/testSize) 143 | print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2) 144 | setModel(midclass, 2) 145 | if (larclass in larclasPred): 146 | larclasPred[larclass] += teP2 147 | else: 148 | larclasPred[larclass] = teP2 149 | 150 | # large class 151 | while (True): 152 | larclass, trD, trL, teD, teL = loader.getNextLarClass() 153 | if (larclass == 0): 154 | break 155 | else: 156 | 157 | # sarima model 158 | try: 159 | model = ap.sarimaTrain(larclass, trL, teL) 160 | teP1 = ap.sarimaPredict(model, testSize) 161 | except: 162 | teP1 = zeros(testSize) 163 | 164 | # xgboost model 165 | simulateFeature(teD, [-2, -1]) 166 | try: 167 | teP2 = xgboostPredict(array(trD), array(trL), array(teD)) 168 | except: 169 | teP2 = zeros(testSize) 170 | 171 | # sum of midclasses 172 | try: 173 | teP3 = larclasPred[larclass] 174 | except: 175 | teP3 = zeros(testSize) 176 | 177 | # count bias of midclass and update larclass 178 | label = array(teL) 179 | totalCount += testSize 180 | 181 | bias1 = sum((teP1-label)*(teP1-label)) 182 | bias2 = sum((teP2-label)*(teP2-label)) 183 | bias3 = sum((teP3-label)*(teP3-label)) 184 | if (bias3 <= bias1 and bias3 <= bias2): 185 | totalBias += bias3 186 | bias3 = math.sqrt(bias3/testSize) 187 | print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3) 188 | setModel(larclass, 3) 189 | elif (bias1 <= bias2): 190 | totalBias += bias1 191 | bias1 = math.sqrt(bias1/testSize) 192 | print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1) 193 | setModel(larclass, 1) 194 | else: 195 | totalBias += bias2 196 | bias2 = math.sqrt(bias2/testSize) 197 | print "(Larclass %d select XGBOOST, accuracy: %f)" % (larclass, bias2) 198 | setModel(larclass, 2) 199 | 200 | totalBias = math.sqrt(totalBias/totalCount) 201 | print "(Predict finished, accuracy: %f)" % (totalBias) 202 | loader.closeFiles() 203 | 204 | def submit(trainSize): 205 | global larclasPred 206 | larclasPred = {} 207 | f1 = open("example.csv", "r") 208 | submit_csv = csv.reader(f1) 209 | row = submit_csv.next() 210 | f2 = open('submit.csv', 'wb') 211 | writer = csv.writer(f2) 212 | writer.writerow(row) 213 | 214 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 215 | loader.setSize(trainSize) 216 | 217 | preDate = range(0, 9) + range(10, 59) 218 | 219 | # middle class 220 | goal = createFeature(dt.datetime(2015,9,1), 59, 2, 221 | range(31, 38), [30], [39, 40]) 222 | 223 | while (True): 224 | midclass, trD, trL, teD, teL = loader.getNextMidClass() 225 | if (midclass == 0): 226 | break 227 | else: 228 | if (modelChoose[midclass] == 1): 229 | try: 230 | model = ap.sarimaTrain(midclass, trL) 231 | teP = ap.sarimaPredict(model, 59) 232 | except: 233 | print("%d: failed to use arima, use xgboost instead" % midclass) 234 | teP = xgboostPredict(array(trD), array(trL), array(goal)) 235 | elif (modelChoose[midclass] == 2): 236 | teP = xgboostPredict(array(trD), array(trL), array(goal)) 237 | else: 238 | teP = zeros(59) 239 | 240 | for i in preDate: 241 | x_int = round(teP[i]) 242 | if (x_int < 0): 243 | x_int = 0 244 | row = submit_csv.next() 245 | if (int(row[0]) != midclass): 246 | raise KeyError 247 | writer.writerow([row[0], row[1], x_int]) 248 | 249 | # count larclass 250 | larclass = int(midclass/100) 251 | if (larclass in larclasPred): 252 | larclasPred[larclass] += teP 253 | else: 254 | larclasPred[larclass] = teP 255 | 256 | # large class 257 | goal = createFeature(dt.datetime(2015,9,1), 59, 1, 258 | range(31, 38), [30], [39, 40]) 259 | 260 | while (True): 261 | larclass, trD, trL, teD, teL = loader.getNextLarClass() 262 | if (larclass == 0): 263 | break 264 | else: 265 | if (modelChoose[larclass] == 1): 266 | try: 267 | model = ap.sarimaTrain(larclass, trL) 268 | teP = ap.sarimaPredict(model, 59) 269 | except: 270 | print("%d: failed to use arima, use xgboost instead" % larclass) 271 | teP = xgboostPredict(array(trD), array(trL), array(goal)) 272 | elif (modelChoose[larclass] == 2): 273 | teP = xgboostPredict(array(trD), array(trL), array(goal)) 274 | else: 275 | try: 276 | teP = larclasPred[larclass] 277 | except: 278 | teP = zeros(59) 279 | 280 | # write file - midclass 281 | for i in preDate: 282 | x_int = round(teP[i]) 283 | if (x_int < 0): 284 | x_int = 0 285 | row = submit_csv.next() 286 | if (int(row[0]) != larclass): 287 | raise KeyError 288 | writer.writerow([row[0], row[1], x_int]) 289 | 290 | f1.close() 291 | f2.close() 292 | loader.closeFiles() 293 | 294 | modelselect(200, 43, 0) 295 | para = ap.getPara() 296 | submit(243) -------------------------------------------------------------------------------- /arima_xgboost/arima_xgboost_multi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import arimaPredicter 9 | import dataLoader 10 | import xgboostPredicter 11 | 12 | from numpy import array 13 | from numpy import zeros 14 | import csv 15 | import math 16 | import datetime as dt 17 | 18 | aps = [] 19 | for i in range(0, 3): 20 | ap = arimaPredicter.predicter() 21 | ap.createIndex(dt.datetime(2015,1,1), 243) 22 | aps.append(ap) 23 | 24 | xgp = xgboostPredicter.predicter() 25 | 26 | modelChoose = {} 27 | 28 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl): 29 | with open('compare.csv', 'ab') as f: 30 | writer = csv.writer(f) 31 | count = 1 32 | writer.writerow([midclass, accuracy]) 33 | for x in trainLabl: 34 | writer.writerow([count, x]) 35 | count += 1 36 | for x in range(0, len(testPred)): 37 | writer.writerow([count, testLabl[x], testPred[x]]) 38 | count += 1 39 | 40 | def setModel(clas, model): 41 | global modelChoose 42 | if (clas not in modelChoose): 43 | modelChoose[clas] = [model] 44 | elif (model < modelChoose[clas]): 45 | modelChoose[clas].append(model) 46 | 47 | def trainAndCompare(ap, clas, trD, trL, teD, teL, teP3): 48 | testSize = len(teL) 49 | # sarima model 50 | try: 51 | (_, teP1) = ap.sarimaParaSelect(clas, trL, teL) 52 | except: 53 | teP1 = zeros(testSize) 54 | 55 | # xgboost model 56 | xgp.simulateFeature(teD, [-2, -1]) 57 | try: 58 | model = xgp.xgboostTrain(trD, trL) 59 | teP2 = xgp.xgboostPredict(model, teD) 60 | except: 61 | teP2 = zeros(testSize) 62 | 63 | label = array(teL) 64 | bias1 = sum((teP1-label)*(teP1-label)) 65 | bias2 = sum((teP2-label)*(teP2-label)) 66 | bias3 = sum((teP3-label)*(teP3-label)) 67 | if (bias3 <= bias1 and bias3 <= bias2): 68 | return (3, bias3, teP3) 69 | elif (bias1 <= bias2): 70 | return (1, bias1, teP1) 71 | else: 72 | return (2, bias2, teP2) 73 | 74 | def modelselect(ap, trainSize, testSize, skipSize = 0): 75 | larclasPred = {} 76 | totalBias = 0 77 | totalCount = 0 78 | 79 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 80 | loader.setSize(trainSize, testSize, skipSize) 81 | 82 | # middle class 83 | while (True): 84 | midclass, trD, trL, teD, teL = loader.getNextMidClass() 85 | if (midclass == 0): 86 | break 87 | else: 88 | (model, bias, teP) = trainAndCompare(ap, midclass, trD, trL, teD, teL, zeros(testSize)) 89 | 90 | larclass = int(midclass/100) 91 | totalCount += testSize 92 | totalBias += bias 93 | bias = math.sqrt(bias/testSize) 94 | print("(Midclass %d select model %d, accuracy: %f)" % (midclass, model, bias)) 95 | setModel(midclass, model) 96 | if (larclass in larclasPred): 97 | larclasPred[larclass] += teP 98 | else: 99 | larclasPred[larclass] = teP 100 | 101 | # large class 102 | while (True): 103 | larclass, trD, trL, teD, teL = loader.getNextLarClass() 104 | if (larclass == 0): 105 | break 106 | else: 107 | if (larclass in larclasPred): 108 | (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, larclasPred[larclass]) 109 | else: 110 | (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, zeros(testSize)) 111 | 112 | totalCount += testSize 113 | totalBias += bias 114 | bias = math.sqrt(bias/testSize) 115 | print("(Larclass %d select model %d, accuracy: %f)" % (larclass, model, bias)) 116 | setModel(larclass, model) 117 | 118 | totalBias = math.sqrt(totalBias/totalCount) 119 | print("(Predict finished, accuracy: %f)" % (totalBias)) 120 | loader.closeFiles() 121 | 122 | def writeClass(clas, result, dates, checker, writer): 123 | for i in dates: 124 | x_int = round(result[i]) 125 | if (x_int < 0): 126 | x_int = 0 127 | row = checker.next() 128 | if (int(row[0]) != clas): 129 | raise KeyError 130 | writer.writerow([row[0], row[1], x_int]) 131 | 132 | def predictClass(clas, cvSize, trD, trL, teD, teP3): 133 | teP = zeros(59) 134 | count = cvSize 135 | for i in range(0, cvSize): 136 | if (modelChoose[clas][i] == 1): 137 | try: 138 | model = aps[i].sarimaTrain(trL, clas) 139 | teP += aps[i].sarimaPredict(model, 59) 140 | except: 141 | print("%d: failed to use arima" % clas) 142 | count -= 1 143 | elif (modelChoose[clas][i] == 2): 144 | model = xgp.xgboostTrain(trD, trL) 145 | teP += xgp.xgboostPredict(model, teD) 146 | else: 147 | teP += teP3 148 | 149 | if (count == 0): 150 | print("%d: failed to use arima at all, only use xgboost" % clas) 151 | model = xgp.xgboostTrain(trD, trL) 152 | teP = xgp.xgboostPredict(model, teD) 153 | else: 154 | teP = teP / count 155 | return teP 156 | 157 | 158 | def submit(trainSize, cvSize): 159 | larclasPred = {} 160 | f1 = open("example.csv", "r") 161 | submit_csv = csv.reader(f1) 162 | row = submit_csv.next() 163 | f2 = open('submit.csv', 'wb') 164 | writer = csv.writer(f2) 165 | writer.writerow(row) 166 | 167 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 168 | loader.setSize(trainSize) 169 | 170 | preDate = range(0, 9) + range(10, 59) 171 | 172 | # middle class 173 | goal = xgp.createFeature(dt.datetime(2015,9,1), 59, 2, 174 | range(31, 38), [30], [39, 40]) 175 | 176 | while (True): 177 | midclass, trD, trL, teD, teL = loader.getNextMidClass() 178 | if (midclass == 0): 179 | break 180 | else: 181 | teP = predictClass(midclass, cvSize, trD, trL, goal, zeros(59)) 182 | writeClass(midclass, teP, preDate, submit_csv, writer) 183 | 184 | # count larclass 185 | larclass = int(midclass/100) 186 | if (larclass in larclasPred): 187 | larclasPred[larclass] += teP 188 | else: 189 | larclasPred[larclass] = teP 190 | 191 | # large class 192 | goal = xgp.createFeature(dt.datetime(2015,9,1), 59, 1, 193 | range(31, 38), [30], [39, 40]) 194 | 195 | while (True): 196 | larclass, trD, trL, teD, teL = loader.getNextLarClass() 197 | if (larclass == 0): 198 | break 199 | else: 200 | if (larclass in larclasPred): 201 | teP = predictClass(larclass, cvSize, trD, trL, goal, larclasPred[larclass]) 202 | else: 203 | teP = predictClass(larclass, cvSize, trD, trL, goal, zeros(59)) 204 | writeClass(larclass, teP, preDate, submit_csv, writer) 205 | 206 | f1.close() 207 | f2.close() 208 | loader.closeFiles() 209 | 210 | modelselect(aps[0], 210, 28, 5) 211 | modelselect(aps[1], 180, 28, 35) 212 | modelselect(aps[2], 150, 28, 65) 213 | submit(243, 3) -------------------------------------------------------------------------------- /arima_xgboost/statanalysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import datetime as dt 9 | import pandas as pd 10 | import numpy as np 11 | 12 | import csv 13 | import math 14 | import arimaPredicter 15 | 16 | temp = [] 17 | 18 | index = [dt.datetime(2015,1,x) for x in range(1, 32)] 19 | index = index + [dt.datetime(2015,2,x) for x in (range(1 ,29))] 20 | index = index + [dt.datetime(2015,3,x) for x in range(1, 32)] 21 | index = index + [dt.datetime(2015,4,x) for x in range(1, 31)] 22 | 23 | def getData(csvReader, trainCount, testCount): 24 | trainData = [] 25 | testData = [] 26 | trainLabel = [] 27 | testLabel = [] 28 | try: 29 | for x in range(0, trainCount): 30 | row = next(csvReader) 31 | """ 32 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 33 | float(row[7]), float(row[8]), float(row[9]), float(row[10]), 34 | float(row[11]), float(row[12])] 35 | """ 36 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 37 | float(row[7]), float(row[8])] 38 | trainData.append(data) 39 | trainLabel.append(float(row[15])) 40 | for x in range(0, testCount): 41 | row = next(csvReader) 42 | """ 43 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 44 | float(row[7]), float(row[8]), float(row[9]), float(row[10]), 45 | float(row[11]), float(row[12])] 46 | """ 47 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 48 | float(row[7]), float(row[8])] 49 | testData.append(data) 50 | testLabel.append(float(row[15])) 51 | return int(row[0]), trainData, trainLabel, testData, testLabel 52 | except StopIteration: 53 | return 0, [], [], [], [] 54 | 55 | def getLCData(csvReader, trainCount, testCount): 56 | trainData = [] 57 | testData = [] 58 | trainLabel = [] 59 | testLabel = [] 60 | try: 61 | for x in range(0, trainCount): 62 | row = next(csvReader) 63 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 64 | float(row[7])] 65 | trainData.append(data) 66 | trainLabel.append(float(row[14])) 67 | for x in range(0, testCount): 68 | row = next(csvReader) 69 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 70 | float(row[7])] 71 | testData.append(data) 72 | testLabel.append(float(row[14])) 73 | return int(row[0]), trainData, trainLabel, testData, testLabel 74 | except StopIteration: 75 | return 0, [], [], [], [] 76 | 77 | def getBias(label, pred): 78 | a1 = np.array(label) 79 | a2 = np.array(pred) 80 | if (a1.__len__() != a2.__len__()): 81 | raise ValueError("length not equel") 82 | m = a1 - a2 83 | return math.sqrt(sum(m*m)/a1.__len__()) 84 | 85 | def sariamTest(): 86 | f = open("datam.csv", "r") 87 | f_csv = csv.reader(f) 88 | 89 | # writer = open("report.txt", "w") 90 | 91 | ap = arimaPredicter.predicter(); 92 | ap.setIndex(index) 93 | 94 | for i in range(0, 10): 95 | midclass, trD, trL, teD, teL = getData(f_csv, 120, 0) 96 | if (midclass == 0): 97 | break 98 | 99 | trainData = trL[:99] 100 | testData = trL[99:] 101 | 102 | ap.test_stationarity(trL) 103 | 104 | greatfit = (0, 0, 0) 105 | minaic = 99999 106 | 107 | for p in range(0, 3): 108 | for q in range(0, 3): 109 | try: 110 | ap.setPara(midclass, (p, q)) 111 | model = ap.sarimaTrain(midclass, trainData) 112 | if (model.aic < minaic): 113 | minaic = model.aic 114 | greatfit = (p, 1, q) 115 | result = ap.sarimaPredict(model, len(testData)) 116 | print("(%d,%d) %f %f\n" % (p, q, model.aic, getBias(testData, result))) 117 | 118 | except: 119 | pass 120 | 121 | print("midclass %d: %d %d\n" % (midclass, greatfit[0], greatfit[2])) 122 | 123 | f.close() 124 | #writer.close() 125 | """ 126 | def test_Ljung_Box(timeseries, l): 127 | acf, q, p = sm.tsa.acf(timeseries, nlags=l, qstat=True) 128 | out = np.c_[range(1, l+1), acf[1:], q, p] 129 | output=pd.DataFrame(out, columns=['lag', "AC", "Q", "P-value"]) 130 | output = output.set_index('lag') 131 | print output 132 | 133 | import arch 134 | 135 | def sariamGarchTest(): 136 | global larclasPred, larclasLabl, totalBias, totalCount, temp 137 | f = open("datam.csv", "r") 138 | f_csv = csv.reader(f) 139 | 140 | for i in range(0, 1): 141 | midclass, trD, trL, teD, teL = getData(f_csv, 120, 0) 142 | if (midclass == 0): 143 | break 144 | # print trL 145 | data0 = pd.Series(trL) 146 | data0.index = pd.Index(index) 147 | 148 | trainData = data0[:dt.datetime(2015,4,9)] 149 | testData = data0[dt.datetime(2015,4,10):] 150 | 151 | model = SARIMAX(trainData, order=(1,1,1), seasonal_order=(0,1,1,7)) 152 | result = model.fit() 153 | 154 | at = trainData - result.fittedvalues 155 | #plt.plot(at, color='red') 156 | #plt.show(block=False) 157 | 158 | at2 = np.square(at) 159 | plt.plot(at2, color='red') 160 | plt.show(block=False) 161 | #test_Ljung_Box(at2, 10) 162 | 163 | amodel = arch.arch_model(at2) 164 | aresult = amodel.fit(disp='off') 165 | aresult.summary() 166 | temp.append(aresult) 167 | output1 = result.forecast(trL.__len__()-trainData.__len__()) 168 | forecasts = aresult.forecast(horizon=5, start=dt.datetime(2015,4,9)) 169 | print forecasts.mean[dt.datetime(2015,4,9):] 170 | print forecasts.variance[dt.datetime(2015,4,9):] 171 | f.close() 172 | """ 173 | sariamTest() -------------------------------------------------------------------------------- /data/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/data/train.csv -------------------------------------------------------------------------------- /dataLoader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import csv 9 | 10 | class loader(): 11 | def __init__(self, midClassFile = "", LarClassFile = ""): 12 | if (midClassFile != ""): 13 | self.mid_f = open(midClassFile, "r") 14 | self.mid_f_csv = csv.reader(self.mid_f) 15 | else: 16 | self.mid_f = None 17 | self.mid_f_csv = None 18 | if (LarClassFile != ""): 19 | self.lar_f = open(LarClassFile, "r") 20 | self.lar_f_csv = csv.reader(self.lar_f) 21 | else: 22 | self.lar_f = None 23 | self.lar_f_csv = None 24 | self.trainCount = 120 25 | self.testCount = 0 26 | self.skipCount = 0 27 | self.midClassFeature = range(3, 9) 28 | self.midSuffix = [] 29 | self.larClassFeature = range(3, 8) 30 | self.larSuffix = [] 31 | 32 | 33 | def setFile(self, midClassFile = "", LarClassFile = ""): 34 | if (midClassFile != ""): 35 | try: 36 | self.mid_f.close() 37 | except: 38 | pass 39 | self.mid_f = open(midClassFile, "r") 40 | self.mid_f_csv = csv.reader(self.mid_f) 41 | if (LarClassFile != ""): 42 | try: 43 | self.lar_f.close() 44 | except: 45 | pass 46 | self.lar_f = open(LarClassFile, "r") 47 | self.lar_f_csv = csv.reader(self.lar_f) 48 | 49 | def closeFiles(self): 50 | try: 51 | self.mid_f.close() 52 | except: 53 | pass 54 | try: 55 | self.lar_f.close() 56 | except: 57 | pass 58 | 59 | def setSize(self, train, test = 0, skip = 0): 60 | self.trainCount = train 61 | self.testCount = test 62 | self.skipCount = skip 63 | 64 | def setMidClassFeature(self, feature=[], suffix=[]): 65 | self.midClassFeature = feature 66 | self.midSuffix = suffix 67 | 68 | def setLarClassFeature(self, feature=[], suffix=[]): 69 | self.larClassFeature = feature 70 | self.larSuffix = suffix 71 | 72 | def getNextMidClass(self): 73 | trainData = [] 74 | testData = [] 75 | trainLabel = [] 76 | testLabel = [] 77 | try: 78 | for x in range(0, self.trainCount): 79 | row = next(self.mid_f_csv) 80 | data = [] 81 | for y in self.midClassFeature: 82 | data.append(float(row[y])) 83 | data = data + self.midSuffix 84 | trainData.append(data) 85 | trainLabel.append(float(row[-1])) 86 | 87 | for x in range(0, self.testCount): 88 | row = next(self.mid_f_csv) 89 | data = [] 90 | for y in self.midClassFeature: 91 | data.append(float(row[y])) 92 | data = data + self.midSuffix 93 | testData.append(data) 94 | testLabel.append(float(row[-1])) 95 | 96 | for x in range(0, self.skipCount): 97 | next(self.mid_f_csv) 98 | return int(row[0]), trainData, trainLabel, testData, testLabel 99 | except StopIteration: 100 | return 0, [], [], [], [] 101 | 102 | def getNextLarClass(self): 103 | trainData = [] 104 | testData = [] 105 | trainLabel = [] 106 | testLabel = [] 107 | try: 108 | for x in range(0, self.trainCount): 109 | row = next(self.lar_f_csv) 110 | data = [] 111 | for y in self.larClassFeature: 112 | data.append(float(row[y])) 113 | data = data + self.larSuffix 114 | trainData.append(data) 115 | trainLabel.append(float(row[-1])) 116 | 117 | for x in range(0, self.testCount): 118 | row = next(self.lar_f_csv) 119 | data = [] 120 | for y in self.larClassFeature: 121 | data.append(float(row[y])) 122 | data = data + self.larSuffix 123 | testData.append(data) 124 | testLabel.append(float(row[-1])) 125 | 126 | for x in range(0, self.skipCount): 127 | next(self.lar_f_csv) 128 | return int(row[0]), trainData, trainLabel, testData, testLabel 129 | except StopIteration: 130 | return 0, [], [], [], [] -------------------------------------------------------------------------------- /doc/compare.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/compare.docx -------------------------------------------------------------------------------- /doc/manual.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Nov 28 15:14:17 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | # xgboostPredicter, arimaPredicter快速上手指南 9 | # 最后更新 20171128 10 | 11 | import arimaPredicter 12 | import dataLoader 13 | import xgboostPredicter 14 | 15 | import matplotlib.pylab as plt 16 | import datetime as dt 17 | 18 | #读取训练数据,用其他方式读取也可以 19 | loader = dataLoader.loader("datam.csv") 20 | loader.setSize(200, 43, 0) 21 | midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass() 22 | 23 | plt.plot(trainLabel) 24 | plt.title('Train Label') 25 | plt.show(block=False) 26 | 27 | def arimaPredict(): 28 | # 首先创建类实例 29 | ap = arimaPredicter.predicter() 30 | # 设置索引,函数的第一个参数是训练数据开始的日期,第二次参数是索引的长度,索引 31 | # 长度不小于训练数据的长度即可 32 | ap.createIndex(dt.datetime(2015,1,1), 243) 33 | # 可以直接调用sarimaTrain函数训练arima模型,只需将训练标签输入即可 34 | model = ap.sarimaTrain(trainLabel) 35 | # 得到模型后调用sarimaPredict函数便可以预测紧接着训练数据之后若干天的预测值, 36 | # 两个参数分别为先前得到的模型与预测序列的长度 37 | # 这是一个静态函数,可以直接通过类名调用 38 | predictLabel = ap.sarimaPredict(model, 43) 39 | # 这样便可以得到结果 40 | plt.plot(testLabel, color='blue',label='actual') 41 | plt.plot(predictLabel, color='red',label='predict') 42 | plt.title('ARIMA(default)') 43 | plt.show(block=False) 44 | 45 | # 事实上,在sarimaTrain函数中,你也可以指定ARIMA模型的两个参数(ar, ma) 46 | model = ap.sarimaTrain(trainLabel, para=(2, 2)) 47 | # 如果参数指定得当,结果将更好,反之更糟糕 48 | predictLabel = ap.sarimaPredict(model, 43) 49 | plt.plot(testLabel, color='blue',label='actual') 50 | plt.plot(predictLabel, color='red',label='predict') 51 | plt.title('ARIMA(2, 2)') 52 | plt.show(block=False) 53 | 54 | # 如果你不知道该指定什么参数,那么可以使用sarimaParaSelect函数选择参数,该函数 55 | # 的输入为类别名称,训练集,测试集及决定在参数选择时是否参考AIC的布尔变量 56 | # 目前来看,在参数选择时是否参考AIC的结果差不多 57 | # 函数执行后将会返回最优的参数以及测试集上的运行结果,同时实例中也会以类别名称为 58 | # 键存储这个最优参数 59 | para, _ = ap.sarimaParaSelect(1001, trainLabel[:-50], trainLabel[-50:], True) 60 | 61 | # 由于最优参数已被存储,再次训练是指明类别名称即可 62 | model = ap.sarimaTrain(trainLabel, classNo=1001) 63 | # 预测的方式始终相同 64 | predictLabel = ap.sarimaPredict(model, 43) 65 | plt.plot(testLabel, color='blue',label='actual') 66 | plt.plot(predictLabel, color='red',label='predict') 67 | plt.title('ARIMA(%d, %d)' % (para[0], para[1])) 68 | plt.show(block=False) 69 | 70 | # 需要注意的是,当模型不等收敛时,sarimaTrain函数与sarimaParaSelect函数都有可能 71 | # 抛出异常 72 | 73 | def xgboostPredict(): 74 | # 首先创建类实例 75 | xgp = xgboostPredicter.predicter() 76 | 77 | # 可以直接调用xgboostTrain函数训练xgboost模型,输入为训练集的特征和对应的标签 78 | model = xgp.xgboostTrain(trainData, trainLabel) 79 | 80 | # 得到模型后调用xgboostPredict函数便可以根据测试集的特征得到对应的预测值 81 | # 这是一个静态函数,可以直接通过类名调用 82 | predictLabel = xgp.xgboostPredict(model, testData) 83 | # 这样便可以得到结果 84 | plt.plot(testLabel, color='blue',label='actual') 85 | plt.plot(predictLabel, color='red',label='predict') 86 | plt.title('xgboost(default)') 87 | plt.show(block=False) 88 | 89 | # 在predicter类中,还有两个静态的工具函数: 90 | # simulateFeature函数用于将特征向量的某些位清空,如 91 | xgp.simulateFeature(testData, [-2, -1]) 92 | # 可以清空测试集中所有特征向量的后两位(在我的特征定义中对应促销信息),这将使 93 | # 在测试集上的结果更加真实 94 | predictLabel = xgp.xgboostPredict(model, testData) 95 | plt.plot(testLabel, color='blue',label='actual') 96 | plt.plot(predictLabel, color='red',label='predict') 97 | plt.title('xgboost(default)') 98 | plt.show(block=False) 99 | 100 | # createFeature函数用于创建测试用的特征向量,但只有在你的特征定义与我的一致时 101 | # 才能使用它 102 | # 其输入参数为 (开始日期,长度,后缀零数量,节假日列表,节假日前一天列表,工作日列表) 103 | # 列表均为对应日期的序号,从1开始计数;需要注意的是,周末自动算节假日,周五自动 104 | # 算节假日前一天,例如 105 | data = xgp.createFeature(dt.datetime(2015,9,1), 7, 1, [4], [3], [6]) 106 | # 的输出为 107 | for x in data: 108 | print(x) 109 | 110 | arimaPredict() 111 | xgboostPredict() -------------------------------------------------------------------------------- /doc/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/report.pdf -------------------------------------------------------------------------------- /doc/report.txt: -------------------------------------------------------------------------------- 1 | (Midclass 1001 select SARIMA, accuracy: 4.164104) 2 | (Midclass 1002 select SARIMA, accuracy: 0.530263) 3 | (Midclass 1004 select SARIMA, accuracy: 2.009439) 4 | (Midclass 1005 select ZERO, accuracy: 0.377964) 5 | (Midclass 1099 select SARIMA, accuracy: 0.195678) 6 | (Midclass 1101 select SARIMA, accuracy: 1.562947) 7 | (Midclass 1102 select SARIMA, accuracy: 0.875281) 8 | (Midclass 1103 select SARIMA, accuracy: 0.415788) 9 | (Midclass 1201 select SARIMA, accuracy: 22.207918) 10 | (Midclass 1202 select SARIMA, accuracy: 2.736437) 11 | (Midclass 1203 select SARIMA, accuracy: 11.184439) 12 | (Midclass 1205 select SARIMA, accuracy: 0.846538) 13 | (Midclass 1301 select XGBOOST, accuracy: 1.802102) 14 | (Midclass 1302 select SARIMA, accuracy: 1.113745) 15 | (Midclass 1306 select XGBOOST, accuracy: 0.668731) 16 | (Midclass 1308 select SARIMA, accuracy: 3.409499) 17 | (Midclass 1501 select XGBOOST, accuracy: 1.607822) 18 | (Midclass 1502 select SARIMA, accuracy: 0.649425) 19 | (Midclass 1503 select XGBOOST, accuracy: 0.854608) 20 | (Midclass 1504 select ZERO, accuracy: 0.577350) 21 | (Midclass 1505 select SARIMA, accuracy: 5.885219) 22 | (Midclass 1507 select ZERO, accuracy: 0.000000) 23 | (Midclass 1508 select SARIMA, accuracy: 0.998574) 24 | (Midclass 1510 select SARIMA, accuracy: 1.002930) 25 | (Midclass 1511 select SARIMA, accuracy: 0.207669) 26 | (Midclass 1512 select XGBOOST, accuracy: 0.536687) 27 | (Midclass 1513 select SARIMA, accuracy: 1.473901) 28 | (Midclass 1515 select SARIMA, accuracy: 1.343283) 29 | (Midclass 1516 select XGBOOST, accuracy: 1.640316) 30 | (Midclass 1517 select XGBOOST, accuracy: 2.080571) 31 | (Midclass 1518 select SARIMA, accuracy: 5.853074) 32 | (Midclass 1519 select XGBOOST, accuracy: 1.276925) 33 | (Midclass 1521 select SARIMA, accuracy: 3.578035) 34 | (Midclass 2001 select ZERO, accuracy: 2.203893) 35 | (Midclass 2002 select SARIMA, accuracy: 1.330328) 36 | (Midclass 2003 select XGBOOST, accuracy: 1.401267) 37 | (Midclass 2004 select SARIMA, accuracy: 0.204031) 38 | (Midclass 2005 select SARIMA, accuracy: 1.984291) 39 | (Midclass 2006 select XGBOOST, accuracy: 3.567574) 40 | (Midclass 2007 select SARIMA, accuracy: 2.002037) 41 | (Midclass 2008 select SARIMA, accuracy: 2.831727) 42 | (Midclass 2009 select SARIMA, accuracy: 2.368810) 43 | (Midclass 2010 select XGBOOST, accuracy: 1.422014) 44 | (Midclass 2011 select SARIMA, accuracy: 7.890765) 45 | (Midclass 2012 select XGBOOST, accuracy: 0.779130) 46 | (Midclass 2013 select SARIMA, accuracy: 3.032853) 47 | (Midclass 2014 select SARIMA, accuracy: 4.112845) 48 | (Midclass 2015 select XGBOOST, accuracy: 1.717115) 49 | (Midclass 2101 select XGBOOST, accuracy: 0.531832) 50 | (Midclass 2102 select ZERO, accuracy: 0.377964) 51 | (Midclass 2103 select SARIMA, accuracy: 0.951623) 52 | (Midclass 2104 select SARIMA, accuracy: 1.087212) 53 | (Midclass 2105 select SARIMA, accuracy: 1.209021) 54 | (Midclass 2106 select XGBOOST, accuracy: 0.528829) 55 | (Midclass 2107 select ZERO, accuracy: 0.000000) 56 | (Midclass 2201 select SARIMA, accuracy: 5.789172) 57 | (Midclass 2202 select SARIMA, accuracy: 7.402780) 58 | (Midclass 2203 select SARIMA, accuracy: 7.110155) 59 | (Midclass 2204 select SARIMA, accuracy: 3.052818) 60 | (Midclass 2205 select SARIMA, accuracy: 3.604804) 61 | (Midclass 2206 select SARIMA, accuracy: 4.279676) 62 | (Midclass 2207 select SARIMA, accuracy: 2.793577) 63 | (Midclass 2208 select SARIMA, accuracy: 1.795416) 64 | (Midclass 2209 select XGBOOST, accuracy: 1.610905) 65 | (Midclass 2210 select SARIMA, accuracy: 3.808497) 66 | (Midclass 2211 select ZERO, accuracy: 0.377964) 67 | (Midclass 2212 select XGBOOST, accuracy: 1.402607) 68 | (Midclass 2301 select XGBOOST, accuracy: 1.707330) 69 | (Midclass 2302 select SARIMA, accuracy: 1.730345) 70 | (Midclass 2303 select XGBOOST, accuracy: 2.240427) 71 | (Midclass 2304 select XGBOOST, accuracy: 0.650331) 72 | (Midclass 2305 select SARIMA, accuracy: 1.866917) 73 | (Midclass 2306 select XGBOOST, accuracy: 3.693004) 74 | (Midclass 2307 select SARIMA, accuracy: 1.606624) 75 | (Midclass 2309 select XGBOOST, accuracy: 1.696085) 76 | (Midclass 2310 select ZERO, accuracy: 0.617213) 77 | (Midclass 2311 select XGBOOST, accuracy: 1.108243) 78 | (Midclass 2312 select SARIMA, accuracy: 0.478464) 79 | (Midclass 2313 select ZERO, accuracy: 0.308607) 80 | (Midclass 2314 select XGBOOST, accuracy: 1.468442) 81 | (Midclass 2316 select XGBOOST, accuracy: 1.258186) 82 | (Midclass 2317 select XGBOOST, accuracy: 0.640845) 83 | (Midclass 3001 select ZERO, accuracy: 0.308607) 84 | (Midclass 3002 select SARIMA, accuracy: 1.270771) 85 | (Midclass 3003 select SARIMA, accuracy: 0.708163) 86 | (Midclass 3004 select XGBOOST, accuracy: 0.470117) 87 | (Midclass 3005 select XGBOOST, accuracy: 0.946125) 88 | (Midclass 3006 select SARIMA, accuracy: 2.675442) 89 | (Midclass 3007 select SARIMA, accuracy: 1.639240) 90 | (Midclass 3008 select SARIMA, accuracy: 1.766410) 91 | (Midclass 3010 select SARIMA, accuracy: 0.838993) 92 | (Midclass 3011 select SARIMA, accuracy: 0.640106) 93 | (Midclass 3013 select SARIMA, accuracy: 2.473312) 94 | (Midclass 3014 select ZERO, accuracy: 0.218218) 95 | (Midclass 3016 select SARIMA, accuracy: 3.851208) 96 | (Midclass 3017 select SARIMA, accuracy: 0.888357) 97 | (Midclass 3018 select SARIMA, accuracy: 3.428816) 98 | (Midclass 3102 select ZERO, accuracy: 0.218218) 99 | (Midclass 3105 select XGBOOST, accuracy: 0.214763) 100 | (Midclass 3107 select SARIMA, accuracy: 0.215656) 101 | (Midclass 3109 select ZERO, accuracy: 0.218218) 102 | (Midclass 3110 select XGBOOST, accuracy: 0.681508) 103 | (Midclass 3112 select SARIMA, accuracy: 0.673105) 104 | (Midclass 3113 select SARIMA, accuracy: 0.196265) 105 | (Midclass 3114 select SARIMA, accuracy: 0.820782) 106 | (Midclass 3116 select SARIMA, accuracy: 0.859559) 107 | (Midclass 3117 select ZERO, accuracy: 0.690066) 108 | (Midclass 3118 select XGBOOST, accuracy: 1.138621) 109 | (Midclass 3119 select ZERO, accuracy: 0.308607) 110 | (Midclass 3125 select ZERO, accuracy: 0.000000) 111 | (Midclass 3126 select SARIMA, accuracy: 0.710316) 112 | (Midclass 3208 select ZERO, accuracy: 0.000000) 113 | (Midclass 3217 select ZERO, accuracy: 0.000000) 114 | (Midclass 3227 select SARIMA, accuracy: 0.470535) 115 | (Midclass 3311 select ZERO, accuracy: 0.000000) 116 | (Midclass 3316 select ZERO, accuracy: 0.000000) 117 | (Midclass 3319 select SARIMA, accuracy: 1.418897) 118 | (Midclass 3320 select XGBOOST, accuracy: 0.681791) 119 | (Midclass 3321 select ZERO, accuracy: 0.845154) 120 | (Midclass 3322 select ZERO, accuracy: 0.218218) 121 | (Midclass 3323 select ZERO, accuracy: 0.308607) 122 | (Midclass 3325 select SARIMA, accuracy: 0.306011) 123 | (Midclass 3326 select ZERO, accuracy: 0.000000) 124 | (Midclass 3402 select SARIMA, accuracy: 0.505201) 125 | (Midclass 3403 select ZERO, accuracy: 0.218218) 126 | (Midclass 3407 select XGBOOST, accuracy: 1.583984) 127 | (Midclass 3408 select ZERO, accuracy: 0.218218) 128 | (Midclass 3413 select ZERO, accuracy: 0.000000) 129 | (Midclass 3415 select SARIMA, accuracy: 0.871680) 130 | (Midclass 3417 select XGBOOST, accuracy: 0.377431) 131 | (Midclass 3423 select SARIMA, accuracy: 0.438360) 132 | (Midclass 3424 select XGBOOST, accuracy: 1.109004) 133 | (Midclass 3426 select XGBOOST, accuracy: 0.215652) 134 | (Midclass 3431 select SARIMA, accuracy: 0.555214) 135 | (Larclass 10 select SUM, accuracy: 5.288813) 136 | (Larclass 11 select SUM, accuracy: 1.967995) 137 | (Larclass 12 select SARIMA, accuracy: 29.097950) 138 | (Larclass 13 select SARIMA, accuracy: 3.669651) 139 | (Larclass 15 select SARIMA, accuracy: 15.189662) 140 | (Larclass 20 select SARIMA, accuracy: 13.969971) 141 | (Larclass 21 select SUM, accuracy: 2.007923) 142 | (Larclass 22 select SUM, accuracy: 22.782286) 143 | (Larclass 23 select SARIMA, accuracy: 9.731009) 144 | (Larclass 30 select SARIMA, accuracy: 8.978236) 145 | (Larclass 31 select SUM, accuracy: 2.468272) 146 | (Larclass 32 select SARIMA, accuracy: 0.447503) 147 | (Larclass 33 select SARIMA, accuracy: 2.195191) 148 | (Larclass 34 select SARIMA, accuracy: 2.465107) 149 | 150 | 1502: failed to use arima, use xgboost instead 151 | 2302: failed to use arima, use xgboost instead 152 | 3017: failed to use arima, use xgboost instead 153 | 3018: failed to use arima, use xgboost instead 154 | -------------------------------------------------------------------------------- /doc/~$ompare.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/~$ompare.docx -------------------------------------------------------------------------------- /doc/特征选择.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/特征选择.xlsx -------------------------------------------------------------------------------- /plot_pic/1001_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1001_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1002_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1002_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1004_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1004_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1005_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1005_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1006_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1006_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1007_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1007_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1099_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1099_customers.jpg -------------------------------------------------------------------------------- /plot_pic/10_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/10_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1101_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1101_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1102_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1102_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1103_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1103_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1104_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1104_customers.jpg -------------------------------------------------------------------------------- /plot_pic/11_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/11_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1201_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1201_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1202_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1202_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1203_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1203_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1205_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1205_customers.jpg -------------------------------------------------------------------------------- /plot_pic/12_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/12_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1301_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1301_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1302_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1302_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1306_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1306_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1308_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1308_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1399_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1399_customers.jpg -------------------------------------------------------------------------------- /plot_pic/13_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/13_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1401_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1401_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1402_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1402_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1403_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1403_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1404_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1404_customers.jpg -------------------------------------------------------------------------------- /plot_pic/14_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/14_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1501_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1501_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1502_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1502_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1503_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1503_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1504_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1504_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1505_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1505_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1507_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1507_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1508_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1508_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1509_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1509_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1510_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1510_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1511_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1511_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1512_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1512_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1513_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1513_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1514_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1514_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1515_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1515_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1516_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1516_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1517_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1517_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1518_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1518_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1519_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1519_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1520_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1520_customers.jpg -------------------------------------------------------------------------------- /plot_pic/1521_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1521_customers.jpg -------------------------------------------------------------------------------- /plot_pic/15_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/15_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2001_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2001_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2002_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2002_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2003_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2003_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2004_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2004_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2005_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2005_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2006_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2006_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2007_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2007_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2008_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2008_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2009_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2009_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2010_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2010_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2011_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2011_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2012_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2012_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2013_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2013_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2014_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2014_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2015_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2015_customers.jpg -------------------------------------------------------------------------------- /plot_pic/20_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/20_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2101_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2101_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2102_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2102_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2103_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2103_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2104_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2104_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2105_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2105_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2106_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2106_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2107_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2107_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2108_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2108_customers.jpg -------------------------------------------------------------------------------- /plot_pic/21_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/21_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2201_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2201_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2202_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2202_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2203_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2203_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2204_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2204_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2205_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2205_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2206_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2206_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2207_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2207_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2208_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2208_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2209_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2209_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2210_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2210_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2211_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2211_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2212_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2212_customers.jpg -------------------------------------------------------------------------------- /plot_pic/22_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/22_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2301_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2301_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2302_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2302_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2303_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2303_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2304_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2304_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2305_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2305_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2306_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2306_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2307_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2307_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2308_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2308_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2309_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2309_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2310_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2310_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2311_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2311_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2312_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2312_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2313_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2313_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2314_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2314_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2315_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2315_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2316_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2316_customers.jpg -------------------------------------------------------------------------------- /plot_pic/2317_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2317_customers.jpg -------------------------------------------------------------------------------- /plot_pic/23_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/23_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3001_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3001_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3002_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3002_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3003_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3003_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3004_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3004_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3005_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3005_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3006_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3006_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3007_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3007_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3008_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3008_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3009_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3009_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3010_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3010_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3011_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3011_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3012_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3012_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3013_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3013_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3014_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3014_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3015_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3015_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3016_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3016_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3017_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3017_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3018_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3018_customers.jpg -------------------------------------------------------------------------------- /plot_pic/30_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/30_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3101_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3101_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3102_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3102_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3104_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3104_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3105_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3105_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3106_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3106_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3107_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3107_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3108_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3108_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3109_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3109_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3110_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3110_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3111_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3111_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3112_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3112_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3113_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3113_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3114_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3114_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3115_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3115_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3116_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3116_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3117_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3117_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3118_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3118_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3119_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3119_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3120_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3120_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3121_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3121_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3122_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3122_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3125_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3125_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3126_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3126_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3128_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3128_customers.jpg -------------------------------------------------------------------------------- /plot_pic/31_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/31_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3208_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3208_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3212_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3212_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3213_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3213_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3215_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3215_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3216_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3216_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3217_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3217_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3218_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3218_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3227_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3227_customers.jpg -------------------------------------------------------------------------------- /plot_pic/32_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/32_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3301_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3301_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3303_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3303_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3311_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3311_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3313_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3313_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3314_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3314_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3315_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3315_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3316_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3316_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3317_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3317_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3319_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3319_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3320_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3320_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3321_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3321_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3322_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3322_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3323_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3323_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3325_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3325_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3326_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3326_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3328_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3328_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3330_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3330_customers.jpg -------------------------------------------------------------------------------- /plot_pic/33_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/33_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3401_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3401_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3402_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3402_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3403_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3403_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3404_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3404_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3405_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3405_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3406_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3406_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3407_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3407_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3408_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3408_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3409_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3409_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3410_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3410_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3412_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3412_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3413_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3413_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3414_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3414_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3415_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3415_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3416_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3416_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3417_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3417_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3419_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3419_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3421_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3421_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3423_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3423_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3424_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3424_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3426_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3426_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3427_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3427_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3428_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3428_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3429_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3429_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3431_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3431_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3432_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3432_customers.jpg -------------------------------------------------------------------------------- /plot_pic/3436_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3436_customers.jpg -------------------------------------------------------------------------------- /plot_pic/34_customers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/34_customers.jpg -------------------------------------------------------------------------------- /plot_pic/异常日期.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/异常日期.txt -------------------------------------------------------------------------------- /rnn/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Nov 30 22:01:26 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | # Naive LSTM to learn three-char window to one-char mapping 9 | import numpy 10 | from keras.models import Sequential 11 | from keras.layers import Dense 12 | from keras.layers import LSTM 13 | from keras.utils import np_utils 14 | 15 | # fix random seed for reproducibility 16 | numpy.random.seed(7) 17 | # define the raw dataset 18 | alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 19 | # create mapping of characters to integers (0-25) and the reverse 20 | char_to_int = dict((c, i) for i, c in enumerate(alphabet)) 21 | int_to_char = dict((i, c) for i, c in enumerate(alphabet)) 22 | # prepare the dataset of input to output pairs encoded as integers 23 | seq_length = 3 24 | dataX = [] 25 | dataY = [] 26 | for i in range(0, len(alphabet) - seq_length, 1): 27 | seq_in = alphabet[i:i + seq_length] 28 | seq_out = alphabet[i + seq_length] 29 | dataX.append([char_to_int[char] for char in seq_in]) 30 | dataY.append(char_to_int[seq_out]) 31 | print(seq_in, '->', seq_out) 32 | # reshape X to be [samples, time steps, features] 33 | X = numpy.reshape(dataX, (len(dataX), seq_length, 1)) 34 | # normalize 35 | X = X / float(len(alphabet)) 36 | # one hot encode the output variable 37 | y = np_utils.to_categorical(dataY) 38 | # create and fit the model 39 | model = Sequential() 40 | model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2]))) 41 | model.add(Dense(y.shape[1], activation='softmax')) 42 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 43 | model.fit(X, y, nb_epoch=500, batch_size=1, verbose=2) 44 | # summarize performance of the model 45 | scores = model.evaluate(X, y, verbose=0) 46 | print("Model Accuracy: %.2f%%" % (scores[1]*100)) 47 | # demonstrate some model predictions 48 | for pattern in dataX: 49 | x = numpy.reshape(pattern, (1, 1, len(pattern))) 50 | x = x / float(len(alphabet)) 51 | prediction = model.predict(x, verbose=0) 52 | index = numpy.argmax(prediction) 53 | result = int_to_char[index] 54 | seq_in = [int_to_char[value] for value in pattern] 55 | print(seq_in, "->", result) -------------------------------------------------------------------------------- /rnn/test2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Nov 30 22:16:06 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | import numpy as np 9 | import math 10 | 11 | from keras.models import Sequential 12 | from keras.layers import Dense 13 | from keras.layers import LSTM 14 | import dataLoader 15 | 16 | import matplotlib.pyplot as plt 17 | import xgboostPredicter 18 | 19 | loader = dataLoader.loader("datam.csv") 20 | loader.setSize(200, 43, 0) 21 | midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass() 22 | loader.closeFiles() 23 | 24 | seq_length = 0 25 | data_max = 35 26 | dataX = [] 27 | dataY = [] 28 | 29 | trainLabelN = [] 30 | for i in range(0, len(trainLabel)): 31 | trainLabelN.append(trainLabel[i] / data_max) 32 | 33 | for i in range(0, len(trainLabelN) - seq_length): 34 | dataX.append(trainData[i+seq_length]+trainLabelN[i:i+seq_length]) 35 | dataY.append(trainLabelN[i+seq_length]) 36 | 37 | X = np.reshape(dataX, (len(dataX), 1, len(trainData[0])+seq_length)) 38 | Y = np.reshape(dataY, (len(dataY), 1)) 39 | 40 | model = Sequential() 41 | model.add(LSTM(6, input_shape=(X.shape[1], X.shape[2]), batch_size=1, stateful=True)) 42 | model.add(Dense(1)) 43 | model.compile(loss='mean_squared_error', optimizer='adam') 44 | model.fit(X, Y, nb_epoch=300, batch_size=1, verbose=1) 45 | 46 | #history = trainLabelN[-1*seq_length:] 47 | predLabel = [] 48 | for i in range(0, len(testLabel)): 49 | #feature = np.array(testData[i]+history).reshape(1, 1, len(trainData[0])+seq_length) 50 | feature = np.array(testData[i]).reshape(1, 1, len(trainData[0])) 51 | predict = model.predict(feature) 52 | predLabel.append(predict[0][0]*data_max) 53 | #history.pop(0) 54 | #history.append(predict) 55 | 56 | predLabel = np.array(predLabel) 57 | testLabel = np.array(testLabel) 58 | bias = sum((predLabel-testLabel)*(predLabel-testLabel)) 59 | bias = math.sqrt(bias/len(testLabel)) 60 | print(bias) 61 | plt.plot(predLabel, color='blue',label='predict') 62 | plt.plot(testLabel, color='red', label='origan') 63 | plt.show(block=False) 64 | 65 | def xgboostPredict(trainData, trainLabel, testData): 66 | 67 | xgp = xgboostPredicter.predicter() 68 | model = xgp.xgboostTrain(trainData, trainLabel) 69 | predLabel = xgp.xgboostPredict(model, testData) 70 | return predLabel 71 | 72 | predLabel = xgboostPredict(trainData, trainLabel, testData) 73 | predLabel = np.array(predLabel) 74 | testLabel = np.array(testLabel) 75 | bias = sum((predLabel-testLabel)*(predLabel-testLabel)) 76 | bias = math.sqrt(bias/len(testLabel)) 77 | print(bias) 78 | plt.plot(predLabel, color='blue',label='predict') 79 | plt.plot(testLabel, color='red', label='origan') 80 | plt.show(block=False) 81 | -------------------------------------------------------------------------------- /rnn/test3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | '''Example script showing how to use a stateful LSTM model 4 | and how its stateless counterpart performs. 5 | 6 | More documentation about the Keras LSTM model can be found at 7 | https://keras.io/layers/recurrent/#lstm 8 | 9 | The models are trained on an input/output pair, where 10 | the input is a generated uniformly distributed 11 | random sequence of length = "input_len", 12 | and the output is a moving average of the input with window length = "tsteps". 13 | Both "input_len" and "tsteps" are defined in the "editable parameters" section. 14 | A larger "tsteps" value means that the LSTM will need more memory 15 | to figure out the input-output relationship. 16 | This memory length is controlled by the "lahead" variable (more details below). 17 | The rest of the parameters are: 18 | - input_len: the length of the generated input sequence 19 | - lahead: the input sequence length that the LSTM 20 | is trained on for each output point 21 | - batch_size, epochs: same parameters as in the model.fit(...) function 22 | When lahead > 1, the model input is preprocessed to a "rolling window view" 23 | of the data, with the window length = "lahead". 24 | This is similar to sklearn's "view_as_windows" 25 | with "window_shape" being a single number 26 | Ref: http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows 27 | When lahead < tsteps, only the stateful LSTM converges because its 28 | statefulness allows it to see beyond the capability that lahead 29 | gave it to fit the n-point average. The stateless LSTM does not have 30 | this capability, and hence is limited by its "lahead" parameter, 31 | which is not sufficient to see the n-point average. 32 | When lahead >= tsteps, both the stateful and stateless LSTM converge. 33 | ''' 34 | from __future__ import print_function 35 | import numpy as np 36 | import matplotlib.pyplot as plt 37 | import pandas as pd 38 | from keras.models import Sequential 39 | from keras.layers import Dense, LSTM 40 | 41 | # ---------------------------------------------------------- 42 | # EDITABLE PARAMETERS 43 | # Read the documentation in the script head for more details 44 | # ---------------------------------------------------------- 45 | 46 | # length of input 47 | input_len = 1000 48 | 49 | # The window length of the moving average used to generate 50 | # the output from the input in the input/output pair used 51 | # to train the LSTM 52 | # e.g. if tsteps=2 and input=[1, 2, 3, 4, 5], 53 | # then output=[1.5, 2.5, 3.5, 4.5] 54 | tsteps = 2 55 | 56 | # The input sequence length that the LSTM is trained on for each output point 57 | lahead = 5 58 | 59 | # training parameters passed to "model.fit(...)" 60 | batch_size = 1 61 | epochs = 10 62 | 63 | # ------------ 64 | # MAIN PROGRAM 65 | # ------------ 66 | 67 | print("*" * 33) 68 | if lahead >= tsteps: 69 | print("STATELESS LSTM WILL ALSO CONVERGE") 70 | else: 71 | print("STATELESS LSTM WILL NOT CONVERGE") 72 | print("*" * 33) 73 | 74 | np.random.seed(1986) 75 | 76 | print('Generating Data...') 77 | 78 | 79 | def gen_uniform_amp(amp=1, xn=10000): 80 | """Generates uniform random data between 81 | -amp and +amp 82 | and of length xn 83 | Arguments: 84 | amp: maximum/minimum range of uniform data 85 | xn: length of series 86 | """ 87 | data_input = np.random.uniform(-1 * amp, +1 * amp, xn) 88 | data_input = pd.DataFrame(data_input) 89 | return data_input 90 | 91 | # Since the output is a moving average of the input, 92 | # the first few points of output will be NaN 93 | # and will be dropped from the generated data 94 | # before training the LSTM. 95 | # Also, when lahead > 1, 96 | # the preprocessing step later of "rolling window view" 97 | # will also cause some points to be lost. 98 | # For aesthetic reasons, 99 | # in order to maintain generated data length = input_len after pre-processing, 100 | # add a few points to account for the values that will be lost. 101 | to_drop = max(tsteps - 1, lahead - 1) 102 | data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop) 103 | 104 | # set the target to be a N-point average of the input 105 | expected_output = data_input.rolling(window=tsteps, center=False).mean() 106 | 107 | # when lahead > 1, need to convert the input to "rolling window view" 108 | # https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html 109 | if lahead > 1: 110 | data_input = np.repeat(data_input.values, repeats=lahead, axis=1) 111 | data_input = pd.DataFrame(data_input) 112 | for i, c in enumerate(data_input.columns): 113 | data_input[c] = data_input[c].shift(i) 114 | 115 | # drop the nan 116 | expected_output = expected_output[to_drop:] 117 | data_input = data_input[to_drop:] 118 | 119 | print('Input shape:', data_input.shape) 120 | print('Output shape:', expected_output.shape) 121 | print('Input head: ') 122 | print(data_input.head()) 123 | print('Output head: ') 124 | print(expected_output.head()) 125 | print('Input tail: ') 126 | print(data_input.tail()) 127 | print('Output tail: ') 128 | print(expected_output.tail()) 129 | 130 | print('Plotting input and expected output') 131 | plt.plot(data_input[0][:10], '.') 132 | plt.plot(expected_output[0][:10], '-') 133 | plt.legend(['Input', 'Expected output']) 134 | plt.title('Input') 135 | plt.show() 136 | 137 | 138 | def create_model(stateful: bool): 139 | model = Sequential() 140 | model.add(LSTM(20, 141 | input_shape=(lahead, 1), 142 | batch_size=batch_size, 143 | stateful=stateful)) 144 | model.add(Dense(1)) 145 | model.compile(loss='mse', optimizer='adam') 146 | return model 147 | 148 | print('Creating Stateful Model...') 149 | model_stateful = create_model(stateful=True) 150 | 151 | 152 | # split train/test data 153 | def split_data(x, y, ratio: int = 0.8): 154 | to_train = int(input_len * ratio) 155 | # tweak to match with batch_size 156 | to_train -= to_train % batch_size 157 | 158 | x_train = x[:to_train] 159 | y_train = y[:to_train] 160 | x_test = x[to_train:] 161 | y_test = y[to_train:] 162 | 163 | # tweak to match with batch_size 164 | to_drop = x.shape[0] % batch_size 165 | if to_drop > 0: 166 | x_test = x_test[:-1 * to_drop] 167 | y_test = y_test[:-1 * to_drop] 168 | 169 | # some reshaping 170 | reshape_3 = lambda x: x.values.reshape((x.shape[0], x.shape[1], 1)) 171 | x_train = reshape_3(x_train) 172 | x_test = reshape_3(x_test) 173 | 174 | reshape_2 = lambda x: x.values.reshape((x.shape[0], 1)) 175 | y_train = reshape_2(y_train) 176 | y_test = reshape_2(y_test) 177 | 178 | return (x_train, y_train), (x_test, y_test) 179 | 180 | 181 | (x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output) 182 | print('x_train.shape: ', x_train.shape) 183 | print('y_train.shape: ', y_train.shape) 184 | print('x_test.shape: ', x_test.shape) 185 | print('y_test.shape: ', y_test.shape) 186 | 187 | print('Training') 188 | for i in range(epochs): 189 | print('Epoch', i + 1, '/', epochs) 190 | # Note that the last state for sample i in a batch will 191 | # be used as initial state for sample i in the next batch. 192 | # Thus we are simultaneously training on batch_size series with 193 | # lower resolution than the original series contained in data_input. 194 | # Each of these series are offset by one step and can be 195 | # extracted with data_input[i::batch_size]. 196 | model_stateful.fit(x_train, 197 | y_train, 198 | batch_size=batch_size, 199 | epochs=1, 200 | verbose=1, 201 | validation_data=(x_test, y_test), 202 | shuffle=False) 203 | model_stateful.reset_states() 204 | 205 | print('Predicting') 206 | predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size) 207 | 208 | print('Creating Stateless Model...') 209 | model_stateless = create_model(stateful=False) 210 | 211 | print('Training') 212 | model_stateless.fit(x_train, 213 | y_train, 214 | batch_size=batch_size, 215 | epochs=epochs, 216 | verbose=1, 217 | validation_data=(x_test, y_test), 218 | shuffle=False) 219 | 220 | print('Predicting') 221 | predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size) 222 | 223 | # ---------------------------- 224 | 225 | print('Plotting Results') 226 | plt.subplot(3, 1, 1) 227 | plt.plot(y_test) 228 | plt.title('Expected') 229 | plt.subplot(3, 1, 2) 230 | # drop the first "tsteps-1" because it is not possible to predict them 231 | # since the "previous" timesteps to use do not exist 232 | plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:]) 233 | plt.title('Stateful: Expected - Predicted') 234 | plt.subplot(3, 1, 3) 235 | plt.plot((y_test - predicted_stateless).flatten()) 236 | plt.title('Stateless: Expected - Predicted') 237 | plt.show() -------------------------------------------------------------------------------- /tools/backup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import xgboost as xgb 9 | from numpy import array 10 | from numpy import zeros 11 | import csv 12 | import math 13 | 14 | import pandas as pd 15 | from statsmodels.tsa.statespace.sarimax import SARIMAX 16 | import statsmodels.api as sm 17 | import datetime as dt 18 | import matplotlib.pylab as plt 19 | from statsmodels.tsa.stattools import adfuller 20 | 21 | larclasPred = {} 22 | larclasLabl = {} 23 | totalBias = 0 24 | totalCount = 0 25 | 26 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)] 27 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))] 28 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)] 29 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))] 30 | 31 | modelChoose = [] 32 | 33 | def getData(csvReader, trainCount, testCount): 34 | trainData = [] 35 | testData = [] 36 | trainLabel = [] 37 | testLabel = [] 38 | try: 39 | for x in range(0, trainCount): 40 | row = csvReader.next() 41 | """ 42 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 43 | float(row[7]), float(row[8]), float(row[9]), float(row[10]), 44 | float(row[11]), float(row[12])] 45 | """ 46 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 47 | float(row[7]), float(row[8])] 48 | trainData.append(data) 49 | trainLabel.append(float(row[15])) 50 | for x in range(0, testCount): 51 | row = csvReader.next() 52 | """ 53 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 54 | float(row[7]), float(row[8]), float(row[9]), float(row[10]), 55 | float(row[11]), float(row[12])] 56 | """ 57 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 58 | float(row[7]), float(row[8])] 59 | testData.append(data) 60 | testLabel.append(float(row[15])) 61 | return int(row[0]), trainData, trainLabel, testData, testLabel 62 | except StopIteration: 63 | return 0, [], [], [], [] 64 | 65 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl): 66 | with open('compare.csv', 'ab') as f: 67 | writer = csv.writer(f) 68 | count = 1 69 | writer.writerow([midclass, accuracy]) 70 | for x in trainLabl: 71 | writer.writerow([count, x]) 72 | count += 1 73 | for x in range(0, len(testPred)): 74 | writer.writerow([count, testLabl[x], testPred[x]]) 75 | count += 1 76 | 77 | def xgboostPredict(trainData, trainLabel, dataToPredict): 78 | dtrain = xgb.DMatrix(trainData, trainLabel) 79 | params = {"objective": "reg:linear"} 80 | gbm = xgb.train(dtrain=dtrain, params=params) 81 | return gbm.predict(xgb.DMatrix(dataToPredict)) 82 | 83 | def test_stationarity(timeseries): 84 | 85 | #Determing rolling statistics 86 | rolmean = timeseries.rolling(window=12,center=False).mean() 87 | rolstd = timeseries.rolling(window=12,center=False).std() 88 | 89 | #Plot rolling statistics: 90 | plt.plot(timeseries, color='blue',label='Original') 91 | plt.plot(rolmean, color='red', label='Rolling Mean') 92 | plt.plot(rolstd, color='black', label = 'Rolling Std') 93 | plt.legend(loc='best') 94 | plt.title('Rolling Mean & Standard Deviation') 95 | plt.show(block=False) 96 | 97 | #Perform Dickey-Fuller test: 98 | print 'Results of Dickey-Fuller Test:' 99 | dftest = adfuller(timeseries, autolag='AIC') 100 | dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) 101 | for key,value in dftest[4].items(): 102 | dfoutput['Critical Value (%s)'%key] = value 103 | print dfoutput 104 | 105 | #Get AR and MA parameter 106 | fig = plt.figure(figsize=(12,8)) 107 | ax1=fig.add_subplot(211) 108 | fig = sm.graphics.tsa.plot_acf(timeseries, lags=20, ax=ax1) 109 | ax2 = fig.add_subplot(212) 110 | fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2) 111 | plt.show(block=False) 112 | 113 | def sarimaTrain(trainLabel): 114 | dataLength = trainLabel.__len__() 115 | data = pd.Series(trainLabel) 116 | index = dtIndex[0:dataLength] 117 | data.index = pd.Index(index) 118 | 119 | model = SARIMAX(data, order=(1,1,1), seasonal_order=(0,1,1,7)) 120 | return model.fit() 121 | 122 | def sarimaPredict(model, predictLength): 123 | output = model.forecast(predictLength) 124 | return array(output) 125 | 126 | def sarimaBias(model, trainLabel): 127 | dataLength = trainLabel.__len__() 128 | data = pd.Series(trainLabel) 129 | index = dtIndex[0:dataLength] 130 | data.index = pd.Index(index) 131 | 132 | pred = model.predict() 133 | """ 134 | plt.plot(data, color='blue',label='Original') 135 | plt.plot(pred, color='red', label='Predicted') 136 | plt.show(block=False) 137 | """ 138 | return list(data - pred) 139 | 140 | def modelselect(trainSize, testSize): 141 | global larclasPred, larclasLabl, totalBias, totalCount 142 | larclasPred = {} 143 | larclasLabl = {} 144 | totalBias = 0 145 | totalCount = 0 146 | modelChoose = [] 147 | f = open("data.csv", "r") 148 | f_csv = csv.reader(f) 149 | 150 | teD = [] 151 | for i in range(31-testSize, 31): 152 | x = [i, (i+2)%7, 0, 0, 0, 0] 153 | if (x[1] == 6 or x[1]==0): 154 | x[3] = 1 155 | elif (x[1] == 5): 156 | x[2] = 1 157 | teD.append(x) 158 | 159 | while (True): 160 | midclass, trD, trL, _, teL = getData(f_csv, trainSize, testSize) 161 | if (midclass == 0): 162 | break 163 | else: 164 | 165 | # sarima model 166 | try: 167 | model = sarimaTrain(trL) 168 | teP1 = sarimaPredict(model, testSize) 169 | except: 170 | teP1 = zeros(testSize) 171 | 172 | # xgboost model 173 | try: 174 | teP2 = xgboostPredict(array(trD), array(trL), array(teD)) 175 | except: 176 | teP2 = zeros(testSize) 177 | 178 | # just zero 179 | teP3 = zeros(testSize) 180 | 181 | # count bias of midclass and update larclass 182 | label = array(teL) 183 | larclass = int(midclass/100) 184 | totalCount += testSize 185 | 186 | bias1 = sum((teP1-label)*(teP1-label)) 187 | bias2 = sum((teP2-label)*(teP2-label)) 188 | bias3 = sum((teP3-label)*(teP3-label)) 189 | if (bias3 < bias1 and bias3 < bias2): 190 | totalBias += bias3 191 | bias3 = math.sqrt(bias3/testSize) 192 | print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3) 193 | modelChoose.append(3) 194 | if (larclass in larclasPred): 195 | larclasPred[larclass] += teP3 196 | else: 197 | larclasPred[larclass] = teP3 198 | elif (bias1 < bias2): 199 | totalBias += bias1 200 | bias1 = math.sqrt(bias1/testSize) 201 | print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1) 202 | modelChoose.append(1) 203 | if (larclass in larclasPred): 204 | larclasPred[larclass] += teP1 205 | else: 206 | larclasPred[larclass] = teP1 207 | else: 208 | totalBias += bias2 209 | bias2 = math.sqrt(bias2/testSize) 210 | print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2) 211 | modelChoose.append(2) 212 | if (larclass in larclasPred): 213 | larclasPred[larclass] += teP2 214 | else: 215 | larclasPred[larclass] = teP2 216 | 217 | if (larclass in larclasLabl): 218 | larclasLabl[larclass] += label 219 | else: 220 | larclasLabl[larclass] = label 221 | #dataLog(midclass, bias, trL, teP, teL) 222 | 223 | # print bias of large class 224 | for larclass in larclasPred: 225 | bias = sum((larclasLabl[larclass] - larclasPred[larclass])* 226 | (larclasLabl[larclass] - larclasPred[larclass])) 227 | totalBias += bias 228 | totalCount += testSize 229 | bias = math.sqrt(bias/testSize) 230 | print "(Larclass %d predict finished, accuracy: %f)" % (larclass, bias) 231 | 232 | totalBias = math.sqrt(totalBias/totalCount) 233 | print "(Predict finished, accuracy: %f)" % (totalBias) 234 | f.close() 235 | 236 | def test(trainSize, testSize): 237 | global larclasPred, larclasLabl, totalBias, totalCount 238 | larclasPred = {} 239 | larclasLabl = {} 240 | totalBias = 0 241 | totalCount = 0 242 | f = open("data.csv", "r") 243 | f_csv = csv.reader(f) 244 | 245 | teD = [] 246 | for i in range(31-testSize, 31): 247 | x = [i, (i+2)%7, 0, 0, 0, 0] 248 | if (x[1] == 6 or x[1]==0): 249 | x[3] = 1 250 | elif (x[1] == 5): 251 | x[2] = 1 252 | teD.append(x) 253 | 254 | while (True): 255 | midclass, trD, trL, _, teL = getData(f_csv, trainSize, testSize) 256 | if (midclass == 0): 257 | break 258 | else: 259 | try: 260 | model = sarimaTrain(trL) 261 | teP = sarimaPredict(model, testSize) 262 | except: 263 | teP = xgboostPredict(array(trD), array(trL), array(teD)) 264 | 265 | # count bias of midclass 266 | bias = 0.0 267 | for i in range(0, testSize): 268 | bias += (teP[i]-teL[i])*(teP[i]-teL[i]); 269 | totalBias += bias 270 | totalCount += testSize 271 | bias = math.sqrt(bias/testSize) 272 | print "(Midclass %d predict finished, accuracy: %f)" % (midclass, bias) 273 | # update bias of large class 274 | larclass = int(midclass/100) 275 | if (larclass in larclasPred): 276 | for i in range(0, testSize): 277 | larclasPred[larclass][i] += teP[i] 278 | larclasLabl[larclass][i] += teL[i] 279 | else: 280 | larclasPred[larclass] = teP 281 | larclasLabl[larclass] = teL 282 | #dataLog(midclass, bias, trL, teP, teL) 283 | # print bias of large class 284 | for larclass in larclasPred: 285 | bias = 0.0 286 | for i in range(0, testSize): 287 | d = larclasLabl[larclass][i] - larclasPred[larclass][i] 288 | bias += d*d; 289 | totalBias += bias 290 | totalCount += testSize 291 | bias = math.sqrt(bias/testSize) 292 | print "(Larclass %d predict finished, accuracy: %f)" % (larclass, bias) 293 | 294 | totalBias = math.sqrt(totalBias/totalCount) 295 | print "(Predict finished, accuracy: %f)" % (totalBias) 296 | f.close() 297 | 298 | def submit(trainSize): 299 | global larclasPred 300 | larclasPred = {} 301 | f1 = open("data.csv", "r") 302 | data_csv = csv.reader(f1) 303 | f2 = open("submit.csv", "r") 304 | submit_csv = csv.reader(f2) 305 | submit_csv.next() 306 | 307 | # generate feature 308 | goal = [] 309 | for i in range(1, 31): 310 | x = [i, (i+4)%7, 0, 0, 0, 0] 311 | if (x[1] == 6 or x[1]==0): 312 | x[3] = 1 313 | elif (x[1] == 5): 314 | x[2] = 1 315 | goal.append(x) 316 | goal[0][3] = 1 317 | goal[0][2] = 0 318 | 319 | current = 0 320 | 321 | while (True): 322 | midclass, trD, trL, teD, teL = getData(data_csv, trainSize, 0) 323 | if (midclass == 0): 324 | break 325 | else: 326 | 327 | if (modelChoose[current] == 1): 328 | try: 329 | model = sarimaTrain(trL) 330 | teP = sarimaPredict(model, 30) 331 | except: 332 | teP = xgboostPredict(array(trD), array(trL), array(goal)) 333 | elif (modelChoose[current] == 2): 334 | teP = xgboostPredict(array(trD), array(trL), array(goal)) 335 | else: 336 | teP = zeros(30) 337 | current += 1 338 | 339 | # write file - midclass 340 | for x in teP: 341 | if (x < 0): 342 | x = 0 343 | row = submit_csv.next() 344 | if (int(row[0]) != midclass): 345 | raise KeyError 346 | with open('submit1.csv', 'ab') as f: 347 | writer = csv.writer(f) 348 | writer.writerow([row[0], row[1], x]) 349 | 350 | # count larclass 351 | larclass = int(midclass/100) 352 | if (larclass in larclasPred): 353 | for i in range(0, 30): 354 | larclasPred[larclass][i] += teP[i] 355 | else: 356 | larclasPred[larclass] = teP 357 | 358 | # write file - larcalss 359 | oldLC = 0 360 | for row in submit_csv: 361 | larclass = int(row[0]) 362 | if larclass != oldLC: 363 | oldLC = larclass 364 | i = 0 365 | with open('submit1.csv', 'ab') as f: 366 | writer = csv.writer(f) 367 | writer.writerow([row[0], row[1], larclasPred[larclass][i]]) 368 | i+=1 369 | f1.close() 370 | f2.close() 371 | 372 | test(106, 14) 373 | modelselect(106, 14) 374 | #submit(120) -------------------------------------------------------------------------------- /tools/csvloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 24 18:48:57 2017 4 | 5 | @author: wangjun 6 | 7 | 用于从给定的数据集生成训练数据; 8 | 由于训练程序是按照中类顺序(而非日期顺序)训练的,生成的训练数据需使用Excel按中类 9 | 进行排序:) 10 | """ 11 | 12 | import csv 13 | import datetime 14 | 15 | DictHoilday = [1,2,3,49,50,51,52,53,54,55,96,121,173] 16 | DictBeforeHoilday = [46,47,48,120] 17 | DictWorkday = [46, 58, 59] 18 | midClasses = {} 19 | 20 | date = datetime.datetime(2015, 1, 1) 21 | dailyData = {} 22 | # index -0 -1 23 | # middle class-sales count-promotions 24 | # string -float -int 25 | promotions = [] 26 | totalCount = 0 27 | totalPay = 0 28 | lineNum = 1 29 | dayCount = 1 30 | 31 | dataLog = [{}, {}, {}, {}, {}, {}, {}] 32 | 33 | def getHistory(midclass): 34 | total = 0 35 | log = [] 36 | for i in range(0, 7): 37 | try: 38 | temp = dataLog[i][midclass][0] 39 | total += temp 40 | log.append(temp) 41 | except KeyError: 42 | log.append(0) 43 | return log[0], log[1], log[2], total/7 44 | 45 | def writeData(): 46 | global dailyData, promotions, totalCount, totalPay, dayCount, dataLog 47 | day = date.day 48 | month = date.month 49 | week = (date.weekday() + 1) % 7 50 | if (dayCount in DictHoilday): 51 | holiday = 1 52 | beforeHoliday = 0 53 | elif (dayCount in DictBeforeHoilday): 54 | holiday = 0 55 | beforeHoliday = 1 56 | elif (dayCount in DictWorkday): 57 | holiday = 0 58 | if (week==6 or ((dayCount+1) in DictHoilday)): 59 | beforeHoliday = 1 60 | else: 61 | beforeHoliday = 0 62 | elif (week==0 or week==6): 63 | holiday = 1 64 | beforeHoliday = 0 65 | elif (week==5): 66 | holiday = 0 67 | beforeHoliday = 1 68 | else: 69 | holiday = 0 70 | beforeHoliday = 0 71 | promotionClass = {} 72 | for midclass in promotions: 73 | larclass = int(midclass)/100 74 | if larclass in promotionClass: 75 | promotionClass[larclass] = promotionClass[larclass] + 1; 76 | else: 77 | promotionClass[larclass] = 1; 78 | with open('output.csv', 'ab') as f: 79 | writer = csv.writer(f) 80 | for midclass in dailyData: 81 | l1, l2, l3, la = getHistory(midclass) 82 | if (midclass not in midClasses): 83 | continue 84 | else: 85 | midClasses[midclass] = 1 86 | try: 87 | larclass = int(midclass) / 100 88 | if (larclass in promotionClass): 89 | writer.writerow([midclass, dayCount, month, 90 | day, week, beforeHoliday, holiday, 91 | dailyData[midclass][1], 92 | promotionClass[larclass]-dailyData[midclass][1], 93 | l1, l2, l3, la, 94 | totalCount, totalPay, dailyData[midclass][0]]) 95 | else: 96 | writer.writerow([midclass, dayCount, month, 97 | day, week, beforeHoliday, holiday, 98 | 0, 0, l1, l2, l3, la, 99 | totalCount, totalPay, dailyData[midclass][0]]) 100 | except ZeroDivisionError: 101 | pass 102 | #just neglect it 103 | for midclass in midClasses: 104 | l1, l2, l3, la = getHistory(midclass) 105 | if (midClasses[midclass] == 0): 106 | larclass = int(midclass) / 100 107 | if (larclass in promotionClass): 108 | writer.writerow([midclass, dayCount, month, 109 | day, week, beforeHoliday, holiday, 110 | 0, promotionClass[larclass], 111 | l1, l2, l3, la, 112 | totalCount, totalPay, 0]) 113 | else: 114 | writer.writerow([midclass, dayCount, month, 115 | day, week, beforeHoliday, holiday, 0, 0, 116 | l1, l2, l3, la, 117 | totalCount, totalPay, 0]) 118 | dataLog.insert(0, dailyData) 119 | dataLog.pop() 120 | dailyData = {} 121 | promotions = [] 122 | totalCount = 0 123 | totalPay = 0 124 | dayCount += 1 125 | for midclass in midClasses: 126 | midClasses[midclass] = 0 127 | 128 | with open('example.csv') as f: 129 | f_csv = csv.reader(f) 130 | f_csv.next() 131 | for row in f_csv: 132 | if (int(row[0]) > 100): 133 | midClasses[row[0]] = 0; 134 | 135 | with open('train.csv') as f: 136 | f_csv = csv.reader(f) 137 | f_csv.next() 138 | for row in f_csv: 139 | lineNum += 1 140 | 141 | # check date 142 | day = int(row[7]) % 100 143 | month = int(row[7]) / 100 % 100 144 | tempdate = datetime.datetime(2015, month, day) 145 | while (date != tempdate): 146 | writeData() 147 | date = date.__add__(datetime.timedelta(1)) 148 | 149 | midclass = row[3] 150 | if (midclass in dailyData): 151 | #float(row[13]) or 1 152 | dailyData[midclass][0] = dailyData[midclass][0]+1 153 | totalCount=totalCount+1 154 | try: 155 | totalPay=totalPay+float(row[14]) 156 | except: 157 | pass 158 | else: 159 | dailyData[midclass] = [1, 0] 160 | totalCount=totalCount+1 161 | try: 162 | totalPay=totalPay+float(row[14]) 163 | except: 164 | pass 165 | if (row[16]!='\xb7\xf1'): 166 | dailyData[midclass][1] = 1 167 | if (midclass not in promotions): 168 | promotions.append(midclass) 169 | writeData(); 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /tools/csvloader_largeClass.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 24 18:48:57 2017 4 | 5 | @author: wangjun 6 | 7 | 用于从给定的数据集生成大类训练数据; 8 | 由于训练程序是按照大类顺序(而非日期顺序)训练的,生成的训练数据需使用Excel按中类 9 | 进行排序:) 10 | """ 11 | 12 | import csv 13 | import datetime 14 | 15 | DictHoilday = [1,2,3,49,50,51,52,53,54,55,96,121,173] 16 | DictBeforeHoilday = [46,47,48,120] 17 | DictWorkday = [46, 58, 59] 18 | larClasses = {} 19 | 20 | date = datetime.datetime(2015, 1, 1) 21 | dailyData = {} 22 | # index -0 -1 23 | # large class -sales count-promotions 24 | # string -float -int 25 | totalCount = 0 26 | totalPay = 0 27 | lineNum = 1 28 | dayCount = 1 29 | 30 | dataLog = [{}, {}, {}, {}, {}, {}, {}] 31 | 32 | def getHistory(larclass): 33 | total = 0 34 | log = [] 35 | for i in range(0, 7): 36 | try: 37 | temp = dataLog[i][larclass][0] 38 | total += temp 39 | log.append(temp) 40 | except KeyError: 41 | log.append(0) 42 | return log[0], log[1], log[2], total/7 43 | 44 | def writeData(): 45 | global dailyData, totalCount, totalPay, dayCount, dataLog 46 | day = date.day 47 | month = date.month 48 | week = (date.weekday() + 1) % 7 49 | if (dayCount in DictHoilday): 50 | holiday = 1 51 | beforeHoliday = 0 52 | elif (dayCount in DictBeforeHoilday): 53 | holiday = 0 54 | beforeHoliday = 1 55 | elif (dayCount in DictWorkday): 56 | holiday = 0 57 | if (week==6 or ((dayCount+1) in DictHoilday)): 58 | beforeHoliday = 1 59 | else: 60 | beforeHoliday = 0 61 | elif (week==0 or week==6): 62 | holiday = 1 63 | beforeHoliday = 0 64 | elif (week==5): 65 | holiday = 0 66 | beforeHoliday = 1 67 | else: 68 | holiday = 0 69 | beforeHoliday = 0 70 | with open('lcoutput.csv', 'ab') as f: 71 | writer = csv.writer(f) 72 | for larclass in dailyData: 73 | l1, l2, l3, la = getHistory(larclass) 74 | if (larclass not in larClasses): 75 | continue 76 | else: 77 | larClasses[larclass] = 1 78 | try: 79 | writer.writerow([larclass, dayCount, month, 80 | day, week, beforeHoliday, holiday, 81 | dailyData[larclass][1], 82 | l1, l2, l3, la, 83 | totalCount, totalPay, dailyData[larclass][0]]) 84 | except ZeroDivisionError: 85 | pass 86 | #just neglect it 87 | for larclass in larClasses: 88 | l1, l2, l3, la = getHistory(larclass) 89 | if (larClasses[larclass] == 0): 90 | writer.writerow([larclass, dayCount, month, 91 | day, week, beforeHoliday, holiday, 92 | 0, 93 | l1, l2, l3, la, 94 | totalCount, totalPay, 0]) 95 | dataLog.insert(0, dailyData) 96 | dataLog.pop() 97 | dailyData = {} 98 | totalCount = 0 99 | totalPay = 0 100 | dayCount += 1 101 | for larclass in larClasses: 102 | larClasses[larclass] = 0 103 | 104 | with open('example.csv') as f: 105 | f_csv = csv.reader(f) 106 | f_csv.next() 107 | for row in f_csv: 108 | if (int(row[0]) < 100): 109 | larClasses[row[0]] = 0; 110 | 111 | with open('train.csv') as f: 112 | f_csv = csv.reader(f) 113 | f_csv.next() 114 | for row in f_csv: 115 | lineNum += 1 116 | 117 | # check date 118 | day = int(row[7]) % 100 119 | month = int(row[7]) / 100 % 100 120 | tempdate = datetime.datetime(2015, month, day) 121 | while (date != tempdate): 122 | writeData() 123 | date = date.__add__(datetime.timedelta(1)) 124 | 125 | larclass = row[1] 126 | if (larclass in dailyData): 127 | #float(row[13]) or 1 128 | dailyData[larclass][0] = dailyData[larclass][0]+1 129 | totalCount=totalCount+1 130 | try: 131 | totalPay=totalPay+float(row[14]) 132 | except: 133 | pass 134 | else: 135 | dailyData[larclass] = [1, 0] 136 | totalCount=totalCount+1 137 | try: 138 | totalPay=totalPay+float(row[14]) 139 | except: 140 | pass 141 | if (row[16]!='\xb7\xf1'): 142 | dailyData[larclass][1] = 1 143 | writeData(); 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /tools/dataModify.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Nov 03 20:30:09 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | import csv 9 | import pandas as pd 10 | from statsmodels.tsa.statespace.sarimax import SARIMAX 11 | import datetime as dt 12 | import arimaPredicter 13 | 14 | dateToModify = [34, 44, 89, 98, 105, 150, 211] 15 | 16 | ap = arimaPredicter.predicter() 17 | index = ap.createIndex(dt.datetime(2015,1,1), 243) 18 | 19 | def getData(csvReader, count): 20 | data = [] 21 | label = [] 22 | try: 23 | for x in range(0, count): 24 | row = csvReader.next() 25 | data.append(row[:-1]) 26 | label.append(int(row[-1])) 27 | return int(row[0]), data, label 28 | except StopIteration: 29 | return 0, [], [] 30 | 31 | def modifyFile(reader, writer, count): 32 | global dateToModify, index 33 | while (True): 34 | clas, data, label = getData(reader, count) 35 | if (clas == 0): 36 | break 37 | data0 = pd.Series(label) 38 | data0.index = pd.Index(index) 39 | try: 40 | model = SARIMAX(data0, order=(1,1,1), seasonal_order=(0,1,1,7)) 41 | result = model.fit() 42 | except: 43 | print("%d: failed to train sarimax model, abort" % clas) 44 | for i in range(0, count): 45 | writer.writerow(data[i] + [label[i]]) 46 | continue 47 | for i in dateToModify: 48 | label[i] = round(result.predict(i, i)[0]) 49 | if (label[i] < 0): 50 | label[i] = 0 51 | for i in range(0, count): 52 | writer.writerow(data[i] + [label[i]]) 53 | 54 | f1 = open("data.csv", "r") 55 | reader = csv.reader(f1) 56 | f2 = open('datam.csv', 'wb') 57 | writer = csv.writer(f2) 58 | modifyFile(reader, writer, 243) 59 | f1.close() 60 | f2.close() 61 | 62 | f1 = open("lcdata.csv", "r") 63 | reader = csv.reader(f1) 64 | f2 = open('lcdatam.csv', 'wb') 65 | writer = csv.writer(f2) 66 | modifyFile(reader, writer, 243) 67 | f1.close() 68 | f2.close() 69 | 70 | -------------------------------------------------------------------------------- /tools/fileChecker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Nov 10 18:55:56 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | import csv 9 | 10 | f1 = open("submit1.csv", "r") 11 | f1_csv = csv.reader(f1) 12 | 13 | f2 = open("submit3.csv", "r") 14 | f2_csv = csv.reader(f2) 15 | 16 | lineNo = 2 17 | row1 = f1_csv.next() 18 | row2 = f2_csv.next() 19 | 20 | while (True): 21 | try: 22 | row1 = f1_csv.next() 23 | row2 = f2_csv.next() 24 | except StopIteration: 25 | break 26 | if (int(row1[2])!=int(row2[2])): 27 | print lineNo 28 | i = input() 29 | lineNo += 1 -------------------------------------------------------------------------------- /v3/Readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/Readme.txt -------------------------------------------------------------------------------- /v3/Version_3.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestRegressor 2 | import numpy as np 3 | import csv 4 | 5 | 6 | mid_class_num = 134 7 | large_class_num = 14 8 | class_codes = [] 9 | train_set_x = {} 10 | train_set_y = {} 11 | test_set_x = {} 12 | test_set_y = {} 13 | May_set_x = {} 14 | May_set_y = {} 15 | large_codes = ['10', '11', '12', '13', '15', '20', '21', '22', '23', '30', '31', '32', '33', '34'] 16 | commit_codes = [] 17 | 18 | accumulate_err = 0 19 | 20 | 21 | # 载入训练和测试模型的数据(不包括5月份的) 22 | def load_data(): 23 | with open('train.csv') as input_file: 24 | input_csv = csv.reader(input_file) 25 | day = 0 26 | for row in input_csv: 27 | code = row[0] 28 | if day == 0: 29 | class_codes.append(code) 30 | train_set_x[code] = [] 31 | train_set_y[code] = [] 32 | x = list(map(float, row[1:-1])) 33 | # 将大类的feature增加一项:预测的当天的对应中类customer之和, 初始化为0 34 | if code in large_codes: 35 | x.append(0) 36 | train_set_x[code].append(x) 37 | train_set_y[code].append(float(row[-1])) 38 | day = (day + 1) % 100 39 | with open('test.csv') as input_file: 40 | input_csv = csv.reader(input_file) 41 | day = 0 42 | for row in input_csv: 43 | code = row[0] 44 | if day == 0: 45 | test_set_x[code] = [] 46 | test_set_y[code] = [] 47 | x = list(map(float, row[1:-1])) 48 | # 将大类的feature增加一项:预测的当天的对应中类customer之和, 初始化为0 49 | if code in large_codes: 50 | x.append(0) 51 | test_set_x[code].append(x) 52 | test_set_y[code].append(float(row[-1])) 53 | day = (day + 1) % 20 54 | 55 | 56 | def load_May_data(): 57 | with open('May_input.csv') as input_file: 58 | input_csv = csv.reader(input_file) 59 | day = 0 60 | for row in input_csv: 61 | code = row[0] 62 | if code in commit_codes: 63 | if day == 0: 64 | May_set_x[code] = [] 65 | May_set_x[code].append(list(map(float, row[1:]))) 66 | day = (day + 1) % 30 67 | 68 | 69 | # 修改大类feature的最后一项(大类中中类的预测销量和) 70 | def modify_large_feature(type, class_code, pred): 71 | class_code = class_code[:2] 72 | if type == 'train': 73 | for day in range(len(train_set_x[class_code])): 74 | train_set_x[class_code][day][-1] += pred[day] 75 | if type == 'test': 76 | for day in range(len(test_set_x[class_code])): 77 | test_set_x[class_code][day][-1] += pred[day] 78 | 79 | 80 | def train_test_eval(train_x, train_y, test_x, test_y, params=None): 81 | # train 82 | if params is None: 83 | rf = RandomForestRegressor() 84 | else: 85 | rf = RandomForestRegressor(n_estimators=params['n_estimators'], oob_score=params['oob_score']) 86 | rf.fit(train_x, train_y) 87 | 88 | # test 89 | ypred = np.asarray(list(map(round, rf.predict(test_x)))) 90 | 91 | # evaluation 92 | rmse = np.sqrt(((test_y - ypred) ** 2).mean()) 93 | global accumulate_err 94 | accumulate_err += np.sum((test_y - ypred) ** 2) 95 | 96 | # this is used for modifying large class feature 97 | train_predict = rf.predict(train_x) 98 | 99 | return rf, ypred, rmse, train_predict 100 | 101 | 102 | # 为每一个类训练一个模型,如果params为None,则预测5月份的销量;否则用params测试,不预测5月份,并将结果RMSE写到 调参.txt 中 103 | def run_for_classes(params=None): 104 | output = [] 105 | for code in class_codes: 106 | if code not in commit_codes: 107 | continue 108 | model, ypred, rmse, train_predict = train_test_eval(train_set_x[code], train_set_y[code], test_set_x[code], test_set_y[code], params) 109 | if code in large_codes: 110 | modify_large_feature('train', code, train_predict) 111 | modify_large_feature('test', code, ypred) 112 | if params is None: 113 | print('class: ', code, ' RMSE: ', rmse) 114 | 115 | # prediction for May 116 | predict_May(model, code) 117 | 118 | else: 119 | output.append('class: ' + code + ' RMSE: ' + str(rmse) + '\n') 120 | 121 | if params is not None: 122 | global accumulate_err 123 | with open('调参.txt', 'a') as output_file: 124 | output_file.write('n_estimators=' + str(params['n_estimators']) + ' oob_score=' + str(params['oob_score']) + '\n') 125 | output_file.writelines(output) 126 | output_file.write('total RMSE: ' + str(accumulate_err / 2960)) 127 | accumulate_err = 0 128 | 129 | 130 | # 调参 131 | def run_for_classes_params(): 132 | for n_estimators in range(50, 160, 10): 133 | params = {'n_estimators': n_estimators, 'oob_score': False} 134 | run_for_classes(params) 135 | params = {'n_estimators': n_estimators, 'oob_score': True} 136 | run_for_classes(params) 137 | 138 | 139 | def predict_May(rfmodel, code): 140 | ypred = rfmodel.predict(May_set_x[code]) 141 | ypred = list(map(round, ypred)) 142 | May_set_y[code] = ypred 143 | large_code = code[:2] 144 | for day in range(30): 145 | May_set_x[large_code][day][-1] += ypred[day] 146 | 147 | 148 | # 获取提交文件中需要提交的codes,保存在commit_codes中 149 | def codes_list_out(): 150 | global commit_codes 151 | codes = [0] 152 | with open('commit_empty.csv') as native_set_file: 153 | native_csv = csv.reader(native_set_file) 154 | next(native_csv) 155 | for row in native_csv: 156 | if row[0] != codes[-1]: 157 | codes.append(row[0]) 158 | commit_codes = codes[1:] 159 | 160 | if __name__ == '__main__': 161 | load_data() 162 | codes_list_out() 163 | load_May_data() 164 | 165 | # 不调参,用默认参数预测5月份,结果保存在字典May_set_y中 166 | run_for_classes() 167 | # write the predicted results of May 168 | with open('submit.csv', 'w', newline='') as output_file: 169 | output_csv = csv.writer(output_file) 170 | output_csv.writerow(['编码', '日期', '销量']) 171 | for code in commit_codes: 172 | for day in range(30): 173 | output_csv.writerow([code, str(20150501 + day), str(int(May_set_y[code][day]))]) 174 | 175 | ''' 176 | # 调参时调用 177 | run_for_classes_params()''' 178 | -------------------------------------------------------------------------------- /v3/commit_empty.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/commit_empty.csv -------------------------------------------------------------------------------- /v3/five_fold.py: -------------------------------------------------------------------------------- 1 | # 五折交叉,用于v5的stacking 2 | 3 | import csv 4 | import numpy as np 5 | from sklearn.ensemble import RandomForestRegressor 6 | 7 | 8 | commit_codes = [] 9 | all_x = {} 10 | all_y = {} 11 | all_pred = {} 12 | 13 | 14 | def load_all_data(): 15 | global all_x, all_y 16 | with open('features.csv') as input_file: 17 | input_csv = csv.reader(input_file) 18 | next(input_csv) 19 | for row in input_csv: 20 | feature = list(map(float, row[1:-1])) 21 | if len(row[0]) == 2: 22 | feature.append(0) 23 | if row[0] not in all_x: 24 | all_x[row[0]] = [feature] 25 | all_y[row[0]] = [float(row[-1])] 26 | else: 27 | all_x[row[0]].append(feature) 28 | all_y[row[0]].append(float(row[-1])) 29 | 30 | 31 | def get_day(date): 32 | date = int(date) 33 | if date < 20150132: 34 | return date - 20150100 35 | elif date < 20150229: 36 | return date - 20150200 + 31 37 | elif date < 20150332: 38 | return date - 20150300 + 59 39 | else: 40 | return date - 20150400 + 89 41 | 42 | 43 | # 用不同参数调用five_fold_params_pred 44 | def five_fold_pred(): 45 | global all_pred 46 | for n_estimators in range(50, 160, 10): 47 | print('n_estimators= ', n_estimators) 48 | params = {'n_estimators': n_estimators, 'oob_score': False} 49 | five_fold_params_pred(params) 50 | # 写回文件 51 | output = [] 52 | with open('five_fold_feature.csv') as input_file: 53 | input_csv = csv.reader(input_file) 54 | output.append(next(input_csv)) 55 | for row in input_csv: 56 | output.append(row + [str(all_pred[row[0]][get_day(row[1])-1])]) 57 | with open('five_fold_feature_v3.csv', 'w', newline='') as output_file: 58 | output_csv = csv.writer(output_file) 59 | for row in output: 60 | output_csv.writerow(row) 61 | 62 | # 清空all_pred 63 | all_pred = {} 64 | 65 | 66 | # 用指定参数,5折交叉 67 | def five_fold_params_pred(params): 68 | global commit_codes, all_pred 69 | for code in commit_codes: 70 | if code not in all_pred: 71 | all_pred[code] = np.zeros(120) 72 | if code not in all_x: # 部分商品类原始数据里没有 73 | continue 74 | for i in range(5): 75 | train_x, train_y, test_x = get_fold_set(code, i) 76 | rf = RandomForestRegressor(n_estimators=params['n_estimators'], oob_score=params['oob_score']) 77 | rf.fit(train_x, train_y) 78 | ypred = rf.predict(test_x) 79 | # 存入all_pred 80 | for index in range(24): 81 | all_pred[code][i*24+index] = ypred[index] 82 | 83 | # 修改对应大类的最后一个特征值 84 | large_code = code[:2] 85 | for day in range(120): 86 | all_x[large_code][day][-1] += all_pred[code][day] 87 | 88 | 89 | def get_fold_set(code, fold_index): 90 | train_x, train_y, test_x = [], [], [] 91 | for i in range(120): 92 | if (i >= fold_index * 24) and (i < (fold_index + 1) * 24): 93 | test_x.append(all_x[code][i]) 94 | else: 95 | train_x.append(all_x[code][i]) 96 | train_y.append(all_y[code][i]) 97 | return train_x, train_y, test_x 98 | 99 | 100 | # 获取提交文件中需要提交的codes,保存在commit_codes中 101 | def codes_list_out(): 102 | global commit_codes 103 | codes = [0] 104 | with open('commit_empty.csv') as native_set_file: 105 | native_csv = csv.reader(native_set_file) 106 | next(native_csv) 107 | for row in native_csv: 108 | if row[0] != codes[-1]: 109 | codes.append(row[0]) 110 | commit_codes = codes[1:] 111 | 112 | 113 | # 初始化结果文件 114 | def initialize_file(): 115 | global commit_codes 116 | with open('five_fold_feature.csv', 'w', newline='') as output_file: 117 | output_csv = csv.writer(output_file) 118 | output_csv.writerow(['code', 'date', 'models']) 119 | for code in commit_codes: 120 | for date in range(20150101, 20150132): 121 | output_csv.writerow([code, str(date)]) 122 | for date in range(20150201, 20150229): 123 | output_csv.writerow([code, str(date)]) 124 | for date in range(20150301, 20150332): 125 | output_csv.writerow([code, str(date)]) 126 | for date in range(20150401, 20150431): 127 | output_csv.writerow([code, str(date)]) 128 | 129 | 130 | if __name__ == '__main__': 131 | codes_list_out() 132 | initialize_file() 133 | load_all_data() 134 | five_fold_pred() 135 | -------------------------------------------------------------------------------- /v3/preparedata.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | codes = [] 5 | 6 | 7 | # 计算1-4月份特征保存在features.csv中 8 | def get_features(): 9 | holidays = [0, 1, 2, 41, 44, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 93, 94, 95] 10 | 11 | def get_date_in_month(day): 12 | if day <= 31: 13 | return day 14 | elif day <= 59: 15 | return day - 31 16 | elif day <= 90: 17 | return day - 59 18 | else: 19 | return day - 90 20 | 21 | with open('timeseries_customers.csv') as customers_file,\ 22 | open('timeseries_discounts.csv') as discounts_file,\ 23 | open('features.csv', 'w', newline='') as output_file: 24 | input_customers = csv.reader(customers_file) 25 | input_discounts = csv.reader(discounts_file) 26 | output_csv = csv.writer(output_file) 27 | next(input_customers) 28 | next(input_discounts) 29 | output_csv.writerow(['class', 'day_in_week', 'day_in_month', 'holiday', 'discount', 'label']) # 中类特征 30 | for row in input_customers: 31 | class_code = row[0] 32 | discount_row = next(input_discounts) 33 | for day in range(1, 121): 34 | feature_row = [] 35 | feature_row.append(class_code) 36 | day_in_week = day % 7 + 4 37 | feature_row.append(str(day_in_week)) 38 | feature_row.append(str(get_date_in_month(day))) 39 | if day in holidays: 40 | feature_row.append('1') 41 | else: 42 | feature_row.append('0') 43 | feature_row.append(discount_row[day]) 44 | feature_row.append(row[day]) 45 | output_csv.writerow(feature_row) 46 | 47 | 48 | def divide_train_test_set(): 49 | with open('features.csv') as input_file,\ 50 | open('train.csv', 'w', newline='') as train_file,\ 51 | open('test.csv', 'w', newline='') as test_file: 52 | input_csv = csv.reader(input_file) 53 | train_csv = csv.writer(train_file) 54 | test_csv = csv.writer(test_file) 55 | next(input_csv) 56 | day = 0 57 | for row in input_csv: 58 | if day < 100: 59 | train_csv.writerow(row) 60 | day += 1 61 | else: 62 | test_csv.writerow(row) 63 | day = (day + 1) % 120 64 | 65 | 66 | # 计算5月份特征并保存在May_input.csv中,其中大类最后一个特征(大类中中类的预测销量之和)需一边预测一边修改 67 | def compute_May_features(): 68 | def codes_list_out(): 69 | global codes 70 | codes = [0] 71 | with open('commit_empty.csv') as native_set_file: 72 | native_csv = csv.reader(native_set_file) 73 | next(native_csv) 74 | for row in native_csv: 75 | if row[0] != codes[-1]: 76 | codes.append(row[0]) 77 | codes = codes[1:] 78 | print(codes) 79 | 80 | codes_list_out() 81 | with open('May_input.csv', 'w', newline='') as output_file: 82 | output_csv = csv.writer(output_file) 83 | for code in codes: 84 | for day in range(1, 31): 85 | feature = [code, str(day % 7 + 4), str(day), '0', '0'] 86 | if len(code) == 2: # 大类 87 | feature.append('0') 88 | output_csv.writerow(feature) 89 | 90 | 91 | if __name__ == '__main__': 92 | get_features() 93 | divide_train_test_set() 94 | compute_May_features() 95 | -------------------------------------------------------------------------------- /v3/submit.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/submit.csv -------------------------------------------------------------------------------- /v3/timeseries_customers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/timeseries_customers.csv -------------------------------------------------------------------------------- /v3/timeseries_discounts.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/timeseries_discounts.csv -------------------------------------------------------------------------------- /v5/KNN_interface.py: -------------------------------------------------------------------------------- 1 | # version 5 KNN算法的调用接口 2 | 3 | from sklearn.neighbors import KNeighborsRegressor 4 | import numpy as np 5 | import csv 6 | 7 | 8 | # data: array of float, 销量数据 9 | # pred_length: int,需预测的天数 10 | # D_window(窗口长度)和max_k(最大的k值) 11 | def knn(data, pred_length, D_window=14, max_k=7): 12 | if pred_length + D_window >= len(data): 13 | print('ERROR: pred_length or D_window too long') 14 | return None 15 | 16 | ret_ypred = [] 17 | for h in range(4): 18 | train_feature, train_label = get_train_set(data, h, D_window, pred_length) 19 | 20 | e_LOO_arr = np.zeros(max_k) 21 | for k in range(2, max_k + 1): 22 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 23 | model.fit(train_feature, train_label) 24 | 25 | # 获取k近邻 26 | dist_list, index_list = model.kneighbors([data[0 - D_window:]]) 27 | k_neighbor_label = [] 28 | for i in index_list[0]: 29 | k_neighbor_label.append(train_label[i]) 30 | 31 | # 基于k近邻的预测值 32 | ypred = model.predict([data[0-D_window:]]) 33 | ypred = np.asarray(list(map(round, ypred[0]))) 34 | 35 | # 计算e_LOO 36 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k) 37 | 38 | # 取e_LOO最小的k值 39 | k_min = np.argmin(e_LOO_arr[1:]) + 2 40 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto') 41 | model.fit(train_feature, train_label) 42 | ypred = model.predict([data[0 - D_window:]]) 43 | ret_ypred += list(map(round, ypred[0])) 44 | 45 | return np.asarray(ret_ypred) 46 | 47 | 48 | def get_train_set(train_data, h, D, pred_length): 49 | feature, label = [], [] 50 | block_len = int(pred_length / 4) 51 | if h != 3: 52 | for i in range(len(train_data) - D - block_len * (h + 1) + 1): 53 | feature.append(train_data[i:i + D]) 54 | label.append(train_data[i + D + block_len * h:i + D + block_len * h + block_len]) 55 | else: 56 | for i in range(len(train_data) - D - pred_length + 1): 57 | feature.append(train_data[i:i + D]) 58 | label.append(train_data[i + D + 3 * block_len:i + D + pred_length]) 59 | return np.array(feature), np.array(label) 60 | 61 | 62 | # 计算LOO,用于k(近邻数)的选择 63 | def LOO(k_neighbor_label, ypred, k): 64 | ret = 0 65 | for neighbor in k_neighbor_label: 66 | ret = ret + ((neighbor - ypred) ** 2).sum() 67 | ret = ret * k / (k - 1)**2 68 | # ret = ret / (k)**2 69 | return ret 70 | 71 | 72 | def test(): 73 | with open('timeseries_customers_processed.csv') as input_file: 74 | input_csv = csv.reader(input_file) 75 | next(input_csv) 76 | row = next(input_csv) 77 | data = list(map(float, row[1:])) 78 | print(knn(data, 30)) 79 | 80 | 81 | if __name__ == '__main__': 82 | test() 83 | -------------------------------------------------------------------------------- /v5/Readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/Readme.txt -------------------------------------------------------------------------------- /v5/Version_5.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from sklearn.neighbors import KNeighborsRegressor 3 | import numpy as np 4 | 5 | from modify_submit import change_pred 6 | 7 | 8 | def main_fun(): 9 | class_codes = ['1201', '2011', '12', '15', '20', '22', '23', '30'] 10 | with open('timeseries_customers_processed.csv') as input_file: 11 | input_csv = csv.reader(input_file) 12 | next(input_csv) 13 | for row in input_csv: 14 | if row[0] in class_codes: 15 | # MIMO_KNN_test(row) 16 | # MIMO_KNN_LOO_test(row) 17 | MIMO_KNN_LOO_May(row) 18 | 19 | 20 | # 划分数据集测试不同参数(D_window, k),没有预测5月份销量 21 | def MIMO_KNN_test(data): 22 | code = data[0] 23 | data = list(map(float, data[1:])) 24 | train_data = data[:90] 25 | test_data = data[90:] 26 | 27 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天 28 | D_window = 14 29 | for h in range(4): 30 | train_feature, train_label = get_train_set(train_data, h, D_window) 31 | y_label = get_test_label(test_data, h) 32 | 33 | for k in range(1, 8): 34 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 35 | model.fit(train_feature, train_label) 36 | 37 | ypred = model.predict([train_data[0-D_window:]]) 38 | ypred = np.array(list(map(round, ypred[0]))) 39 | 40 | rmse = np.sqrt(((ypred - y_label) ** 2).mean()) 41 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse) 42 | 43 | 44 | # 划分数据集,实现论文里的方法,没有预测5月份销量 45 | def MIMO_KNN_LOO_test(data): 46 | code = data[0] 47 | data = list(map(float, data[1:])) 48 | train_data = data[:90] 49 | test_data = data[90:] 50 | 51 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天 52 | D_window = 14 53 | max_k = 7 54 | for h in range(4): 55 | train_feature, train_label = get_train_set(train_data, h, D_window) 56 | y_label = get_test_label(test_data, h) 57 | 58 | e_LOO_arr = np.zeros(max_k) 59 | for k in range(2, max_k + 1): 60 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 61 | model.fit(train_feature, train_label) 62 | 63 | # 获取k近邻 64 | dist_list, index_list = model.kneighbors([train_data[0 - D_window:]]) 65 | k_neighbor_label = [] 66 | for i in index_list[0]: 67 | k_neighbor_label.append(train_label[i]) 68 | 69 | # 基于k近邻的预测值 70 | ypred = model.predict([train_data[0-D_window:]]) 71 | ypred = np.asarray(list(map(round, ypred[0]))) 72 | rmse = np.sqrt(((ypred - y_label) ** 2).mean()) 73 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse) 74 | 75 | # 计算e_LOO 76 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k) 77 | 78 | # 取e_LOO最小的k值 79 | k_min = np.argmin(e_LOO_arr[1:]) + 2 80 | print('k_min=', k_min) 81 | 82 | 83 | # 使用整个数据集,实现论文里的方法,预测5月份销量 84 | def MIMO_KNN_LOO_May(data): 85 | code = data[0] 86 | data = list(map(float, data[1:])) 87 | 88 | D_window = 14 89 | max_k = 7 90 | pred_May = [] 91 | for h in range(4): 92 | train_feature, train_label = get_train_set(data, h, D_window) 93 | e_LOO_arr = np.zeros(max_k) 94 | for k in range(2, max_k + 1): 95 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto') 96 | model.fit(train_feature, train_label) 97 | 98 | # 获取k近邻 99 | dist_list, index_list = model.kneighbors([data[0 - D_window:]]) 100 | k_neighbor_label = [] 101 | for i in index_list[0]: 102 | k_neighbor_label.append(train_label[i]) 103 | 104 | # 基于k近邻的预测值 105 | ypred = model.predict([data[0 - D_window:]]) 106 | ypred = np.asarray(list(map(round, ypred[0]))) 107 | 108 | # 计算e_LOO 109 | e_LOO_arr[k - 1] = LOO(k_neighbor_label, ypred, k) 110 | 111 | # 取e_LOO最小的k值 112 | k_min = np.argmin(e_LOO_arr[1:]) + 2 113 | 114 | # 令k=k_min,做预测 115 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto') 116 | model.fit(train_feature, train_label) 117 | ypred = model.predict([data[0 - D_window:]]) 118 | ypred = list(map(round, ypred[0])) 119 | pred_May = pred_May + ypred 120 | 121 | print(pred_May) 122 | # 替换文件里编码为code的预测值 123 | change_pred(code, pred_May) 124 | 125 | 126 | # 计算LOO,用于k(近邻数)的选择 127 | def LOO(k_neighbor_label, ypred, k): 128 | ret = 0 129 | for neighbor in k_neighbor_label: 130 | ret = ret + ((neighbor - ypred) ** 2).sum() 131 | ret = ret * k / (k - 1)**2 132 | # ret = ret / (k)**2 133 | return ret 134 | 135 | 136 | def get_train_set(train_data, h, D): 137 | feature, label = [], [] 138 | if h != 3: 139 | for i in range(len(train_data) - D - 7 * (h+1) + 1): 140 | feature.append(train_data[i:i+D]) 141 | label.append(train_data[i+D+7*h:i+D+7*h+7]) 142 | else: 143 | for i in range(len(train_data) - D - 30 + 1): 144 | feature.append(train_data[i:i+D]) 145 | label.append(train_data[i+D+21:i+D+30]) 146 | return np.array(feature), np.array(label) 147 | 148 | 149 | def get_test_label(test_data, h): 150 | if h != 3: 151 | return test_data[7*h:7*h+7] 152 | else: 153 | return test_data[21:] 154 | 155 | 156 | if __name__ == '__main__': 157 | main_fun() 158 | -------------------------------------------------------------------------------- /v5/commit_empty.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/commit_empty.csv -------------------------------------------------------------------------------- /v5/modify_submit.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | # 读取原预测文件,预测结果取整再写回去 5 | def get_round(): 6 | rows = [] 7 | with open('submit.csv') as input_file: 8 | input_csv = csv.reader(input_file) 9 | rows.append(next(input_csv)) 10 | for row in input_csv: 11 | row[2] = str(int(round(float(row[2])))) 12 | rows.append(row) 13 | with open('submit.csv', 'w', newline='') as output_file: 14 | output_csv = csv.writer(output_file) 15 | for row in rows: 16 | output_csv.writerow(row) 17 | 18 | 19 | # 将预测文件中编码为code的类别预测值用pred替换 20 | def change_pred(code, pred): 21 | rows = [] 22 | file_name = 'submit_WJ_2.csv' 23 | with open(file_name) as input_file: 24 | input_csv = csv.reader(input_file) 25 | rows.append(next(input_csv)) 26 | i = 0 27 | for row in input_csv: 28 | if row[0] == code: 29 | rows.append([code, row[1], str(pred[i])]) 30 | i += 1 31 | else: 32 | rows.append(row) 33 | with open(file_name, 'w', newline='') as output_file: 34 | output_csv = csv.writer(output_file) 35 | for row in rows: 36 | output_csv.writerow(row) 37 | 38 | 39 | if __name__ == '__main__': 40 | get_round() 41 | -------------------------------------------------------------------------------- /v5/submit.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/submit.csv -------------------------------------------------------------------------------- /v5/timeseries_customers_processed.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/timeseries_customers_processed.csv -------------------------------------------------------------------------------- /v5/调参.txt: -------------------------------------------------------------------------------- 1 | 1201 h= 0 k= 1 rmse= 42.6430366113 2 | 1201 h= 0 k= 2 rmse= 38.3723718188 3 | 1201 h= 0 k= 3 rmse= 32.4761047772 4 | 1201 h= 0 k= 4 rmse= 24.5028359411 5 | 1201 h= 0 k= 5 rmse= 24.2706714272 6 | 1201 h= 0 k= 6 rmse= 23.3642368486 7 | 1201 h= 0 k= 7 rmse= 25.0468866937 8 | 1201 h= 1 k= 1 rmse= 31.5300310362 9 | 1201 h= 1 k= 2 rmse= 28.4413534227 10 | 1201 h= 1 k= 3 rmse= 23.3969834544 11 | 1201 h= 1 k= 4 rmse= 21.5311928775 12 | 1201 h= 1 k= 5 rmse= 17.0535644291 13 | 1201 h= 1 k= 6 rmse= 15.5524493595 14 | 1201 h= 1 k= 7 rmse= 17.4633370662 15 | 1201 h= 2 k= 1 rmse= 38.0844925321 16 | 1201 h= 2 k= 2 rmse= 28.5215343758 17 | 1201 h= 2 k= 3 rmse= 23.8729502535 18 | 1201 h= 2 k= 4 rmse= 24.2934772971 19 | 1201 h= 2 k= 5 rmse= 24.4475628222 20 | 1201 h= 2 k= 6 rmse= 26.420499459 21 | 1201 h= 2 k= 7 rmse= 24.835233271 22 | 1201 h= 3 k= 1 rmse= 36.0986303218 23 | 1201 h= 3 k= 2 rmse= 29.8369045237 24 | 1201 h= 3 k= 3 rmse= 29.5872383746 25 | 1201 h= 3 k= 4 rmse= 33.2802005146 26 | 1201 h= 3 k= 5 rmse= 31.1755055639 27 | 1201 h= 3 k= 6 rmse= 31.1048216768 28 | 1201 h= 3 k= 7 rmse= 29.6407753314 29 | 12 h= 0 k= 1 rmse= 53.8582796394 30 | 12 h= 0 k= 2 rmse= 44.5673280603 31 | 12 h= 0 k= 3 rmse= 34.0183858095 32 | 12 h= 0 k= 4 rmse= 32.9145546344 33 | 12 h= 0 k= 5 rmse= 29.392663649 34 | 12 h= 0 k= 6 rmse= 26.9584608416 35 | 12 h= 0 k= 7 rmse= 28.3394613257 36 | 12 h= 1 k= 1 rmse= 69.6593753305 37 | 12 h= 1 k= 2 rmse= 40.9992685927 38 | 12 h= 1 k= 3 rmse= 40.0368671351 39 | 12 h= 1 k= 4 rmse= 41.3502184792 40 | 12 h= 1 k= 5 rmse= 36.9253007747 41 | 12 h= 1 k= 6 rmse= 34.0657271442 42 | 12 h= 1 k= 7 rmse= 33.3902866927 43 | 12 h= 2 k= 1 rmse= 44.7931755006 44 | 12 h= 2 k= 2 rmse= 52.5043373322 45 | 12 h= 2 k= 3 rmse= 47.9634527977 46 | 12 h= 2 k= 4 rmse= 45.7079428355 47 | 12 h= 2 k= 5 rmse= 41.747281894 48 | 12 h= 2 k= 6 rmse= 40.4535372974 49 | 12 h= 2 k= 7 rmse= 39.1421180701 50 | 12 h= 3 k= 1 rmse= 43.1470354331 51 | 12 h= 3 k= 2 rmse= 44.093202437 52 | 12 h= 3 k= 3 rmse= 36.2472381216 53 | 12 h= 3 k= 4 rmse= 34.4079060788 54 | 12 h= 3 k= 5 rmse= 38.8412886812 55 | 12 h= 3 k= 6 rmse= 38.0493486071 56 | 12 h= 3 k= 7 rmse= 37.641989556 57 | 15 h= 0 k= 1 rmse= 14.0813960347 58 | 15 h= 0 k= 2 rmse= 12.2485595361 59 | 15 h= 0 k= 3 rmse= 13.2287257726 60 | 15 h= 0 k= 4 rmse= 12.7065671652 61 | 15 h= 0 k= 5 rmse= 11.87624956 62 | 15 h= 0 k= 6 rmse= 13.4246432771 63 | 15 h= 0 k= 7 rmse= 13.2146747442 64 | 15 h= 1 k= 1 rmse= 9.57675757834 65 | 15 h= 1 k= 2 rmse= 10.9716280299 66 | 15 h= 1 k= 3 rmse= 11.3157687678 67 | 15 h= 1 k= 4 rmse= 10.8028703988 68 | 15 h= 1 k= 5 rmse= 12.8554179205 69 | 15 h= 1 k= 6 rmse= 12.5323010033 70 | 15 h= 1 k= 7 rmse= 12.388370937 71 | 15 h= 2 k= 1 rmse= 20.4904158781 72 | 15 h= 2 k= 2 rmse= 12.0169917472 73 | 15 h= 2 k= 3 rmse= 11.8937080614 74 | 15 h= 2 k= 4 rmse= 11.1624704725 75 | 15 h= 2 k= 5 rmse= 11.7760027097 76 | 15 h= 2 k= 6 rmse= 12.1629264637 77 | 15 h= 2 k= 7 rmse= 13.0868435863 78 | 15 h= 3 k= 1 rmse= 31.3581462037 79 | 15 h= 3 k= 2 rmse= 20.8123719358 80 | 15 h= 3 k= 3 rmse= 18.6966951072 81 | 15 h= 3 k= 4 rmse= 13.589438938 82 | 15 h= 3 k= 5 rmse= 12.2614526733 83 | 15 h= 3 k= 6 rmse= 13.2781398948 84 | 15 h= 3 k= 7 rmse= 12.7837448865 85 | 20 h= 0 k= 1 rmse= 19.6650523954 86 | 20 h= 0 k= 2 rmse= 17.5970236087 87 | 20 h= 0 k= 3 rmse= 16.9153042486 88 | 20 h= 0 k= 4 rmse= 16.7590587119 89 | 20 h= 0 k= 5 rmse= 16.8402285481 90 | 20 h= 0 k= 6 rmse= 16.5004886991 91 | 20 h= 0 k= 7 rmse= 15.6761515174 92 | 20 h= 1 k= 1 rmse= 11.5201686496 93 | 20 h= 1 k= 2 rmse= 9.63450635631 94 | 20 h= 1 k= 3 rmse= 7.58729930368 95 | 20 h= 1 k= 4 rmse= 8.10614695274 96 | 20 h= 1 k= 5 rmse= 9.25628043697 97 | 20 h= 1 k= 6 rmse= 7.5033965945 98 | 20 h= 1 k= 7 rmse= 7.24532229897 99 | 20 h= 2 k= 1 rmse= 22.5134880207 100 | 20 h= 2 k= 2 rmse= 19.3281403697 101 | 20 h= 2 k= 3 rmse= 17.5829934327 102 | 20 h= 2 k= 4 rmse= 16.5980608769 103 | 20 h= 2 k= 5 rmse= 17.2165487439 104 | 20 h= 2 k= 6 rmse= 16.4409280022 105 | 20 h= 2 k= 7 rmse= 15.524009627 106 | 20 h= 3 k= 1 rmse= 26.1023626006 107 | 20 h= 3 k= 2 rmse= 33.697276393 108 | 20 h= 3 k= 3 rmse= 29.951316307 109 | 20 h= 3 k= 4 rmse= 32.3881422814 110 | 20 h= 3 k= 5 rmse= 25.7046785895 111 | 20 h= 3 k= 6 rmse= 25.2773090489 112 | 20 h= 3 k= 7 rmse= 22.2029257966 113 | 22 h= 0 k= 1 rmse= 43.8438788691 114 | 22 h= 0 k= 2 rmse= 41.6797254107 115 | 22 h= 0 k= 3 rmse= 38.884937818 116 | 22 h= 0 k= 4 rmse= 32.1492490424 117 | 22 h= 0 k= 5 rmse= 31.6650243057 118 | 22 h= 0 k= 6 rmse= 33.4756367623 119 | 22 h= 0 k= 7 rmse= 33.0505710863 120 | 22 h= 1 k= 1 rmse= 29.3841551083 121 | 22 h= 1 k= 2 rmse= 23.6434437635 122 | 22 h= 1 k= 3 rmse= 19.4591808033 123 | 22 h= 1 k= 4 rmse= 19.5137028576 124 | 22 h= 1 k= 5 rmse= 16.9122752479 125 | 22 h= 1 k= 6 rmse= 17.1294598104 126 | 22 h= 1 k= 7 rmse= 16.8492357209 127 | 22 h= 2 k= 1 rmse= 38.7243002335 128 | 22 h= 2 k= 2 rmse= 32.7553576964 129 | 22 h= 2 k= 3 rmse= 33.8979605757 130 | 22 h= 2 k= 4 rmse= 30.356791558 131 | 22 h= 2 k= 5 rmse= 28.7923710234 132 | 22 h= 2 k= 6 rmse= 25.7167889433 133 | 22 h= 2 k= 7 rmse= 31.4764977499 134 | 22 h= 3 k= 1 rmse= 127.657092765 135 | 22 h= 3 k= 2 rmse= 109.449161924 136 | 22 h= 3 k= 3 rmse= 86.6052707902 137 | 22 h= 3 k= 4 rmse= 75.037769277 138 | 22 h= 3 k= 5 rmse= 69.8723081655 139 | 22 h= 3 k= 6 rmse= 62.136255331 140 | 22 h= 3 k= 7 rmse= 57.9065491146 141 | 23 h= 0 k= 1 rmse= 5.63154381269 142 | 23 h= 0 k= 2 rmse= 4.63173518114 143 | 23 h= 0 k= 3 rmse= 4.83828724199 144 | 23 h= 0 k= 4 rmse= 5.80938123219 145 | 23 h= 0 k= 5 rmse= 5.68562994596 146 | 23 h= 0 k= 6 rmse= 4.35150609445 147 | 23 h= 0 k= 7 rmse= 3.76632918272 148 | 23 h= 1 k= 1 rmse= 6.4142698059 149 | 23 h= 1 k= 2 rmse= 6.7005057888 150 | 23 h= 1 k= 3 rmse= 7.15141892803 151 | 23 h= 1 k= 4 rmse= 6.56498281641 152 | 23 h= 1 k= 5 rmse= 9.63436179487 153 | 23 h= 1 k= 6 rmse= 10.8737237449 154 | 23 h= 1 k= 7 rmse= 10.0319859996 155 | 23 h= 2 k= 1 rmse= 5.8064004094 156 | 23 h= 2 k= 2 rmse= 5.92811066778 157 | 23 h= 2 k= 3 rmse= 7.75757150579 158 | 23 h= 2 k= 4 rmse= 7.67441476216 159 | 23 h= 2 k= 5 rmse= 7.50348547005 160 | 23 h= 2 k= 6 rmse= 6.80538086404 161 | 23 h= 2 k= 7 rmse= 7.34530943247 162 | 23 h= 3 k= 1 rmse= 31.3209195267 163 | 23 h= 3 k= 2 rmse= 26.1043465949 164 | 23 h= 3 k= 3 rmse= 26.5938520968 165 | 23 h= 3 k= 4 rmse= 20.6796134379 166 | 23 h= 3 k= 5 rmse= 18.5126222927 167 | 23 h= 3 k= 6 rmse= 17.3251045002 168 | 23 h= 3 k= 7 rmse= 15.8480509754 169 | 30 h= 0 k= 1 rmse= 17.4396920025 170 | 30 h= 0 k= 2 rmse= 15.286577467 171 | 30 h= 0 k= 3 rmse= 14.799299084 172 | 30 h= 0 k= 4 rmse= 13.4640063851 173 | 30 h= 0 k= 5 rmse= 12.5409692762 174 | 30 h= 0 k= 6 rmse= 12.2409109388 175 | 30 h= 0 k= 7 rmse= 11.5748669357 176 | 30 h= 1 k= 1 rmse= 12.5470542929 177 | 30 h= 1 k= 2 rmse= 11.1238979074 178 | 30 h= 1 k= 3 rmse= 10.0442549022 179 | 30 h= 1 k= 4 rmse= 10.4489658474 180 | 30 h= 1 k= 5 rmse= 9.76706560335 181 | 30 h= 1 k= 6 rmse= 9.5492204658 182 | 30 h= 1 k= 7 rmse= 8.02405103911 183 | 30 h= 2 k= 1 rmse= 13.7995859151 184 | 30 h= 2 k= 2 rmse= 11.9178406627 185 | 30 h= 2 k= 3 rmse= 10.9421559178 186 | 30 h= 2 k= 4 rmse= 11.4155579741 187 | 30 h= 2 k= 5 rmse= 11.1698815423 188 | 30 h= 2 k= 6 rmse= 10.3885622264 189 | 30 h= 2 k= 7 rmse= 9.75646178754 190 | 30 h= 3 k= 1 rmse= 18.5082564159 191 | 30 h= 3 k= 2 rmse= 16.269989379 192 | 30 h= 3 k= 3 rmse= 13.5020710817 193 | 30 h= 3 k= 4 rmse= 13.4553609332 194 | 30 h= 3 k= 5 rmse= 12.1959416121 195 | 30 h= 3 k= 6 rmse= 12.5498267393 196 | 30 h= 3 k= 7 rmse= 12.7345557171 -------------------------------------------------------------------------------- /v6_stacking/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /v6_stacking/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /v6_stacking/.idea/v6_stacking.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /v6_stacking/Version6_stacking.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from sklearn import linear_model 3 | from sklearn.ensemble import RandomForestRegressor 4 | import xgboost as xgb 5 | import numpy as np 6 | 7 | 8 | train_set_x = {} 9 | train_set_y = {} 10 | test_set_x = {} 11 | test_set_y = {} 12 | commit_codes = [] 13 | 14 | 15 | # 载入训练和测试模型的数据(不包括5月份的) 16 | def load_data(): 17 | with open('train.csv') as input_file: 18 | input_csv = csv.reader(input_file) 19 | day = 0 20 | for row in input_csv: 21 | code = row[0] 22 | if day == 0: 23 | train_set_x[code] = [] 24 | train_set_y[code] = [] 25 | x = list(map(float, row[2:-1])) 26 | train_set_x[code].append(x) 27 | train_set_y[code].append(float(row[-1])) 28 | day = (day + 1) % 100 29 | with open('test.csv') as input_file: 30 | input_csv = csv.reader(input_file) 31 | day = 0 32 | for row in input_csv: 33 | code = row[0] 34 | if day == 0: 35 | test_set_x[code] = [] 36 | test_set_y[code] = [] 37 | x = list(map(float, row[2:-1])) 38 | test_set_x[code].append(x) 39 | test_set_y[code].append(float(row[-1])) 40 | day = (day + 1) % 20 41 | 42 | 43 | # 获取提交文件中需要提交的codes,保存在commit_codes中 44 | def codes_list_out(): 45 | global commit_codes 46 | codes = [0] 47 | with open('commit_empty.csv') as native_set_file: 48 | native_csv = csv.reader(native_set_file) 49 | next(native_csv) 50 | for row in native_csv: 51 | if row[0] != codes[-1]: 52 | codes.append(row[0]) 53 | commit_codes = codes[1:] 54 | 55 | 56 | def train_test_eval(): 57 | for code in commit_codes: 58 | # model = linear_model.LinearRegression() 59 | model = RandomForestRegressor() 60 | model.fit(train_set_x[code], train_set_y[code]) 61 | ypred = model.predict(test_set_x[code]) 62 | ypred = np.array(list(map(round, ypred))) 63 | rmse = np.sqrt(((test_set_y[code] - ypred) ** 2).mean()) 64 | print(code, ' rmse=', rmse) 65 | 66 | 67 | if __name__ == '__main__': 68 | codes_list_out() 69 | load_data() 70 | train_test_eval() 71 | -------------------------------------------------------------------------------- /v6_stacking/commit_empty.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v6_stacking/commit_empty.csv -------------------------------------------------------------------------------- /v6_stacking/cv/arima_cv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import datetime as dt 9 | import numpy as np 10 | 11 | import csv 12 | import arimaPredicter 13 | import dataLoader 14 | 15 | index = [dt.datetime(2015,1,x) for x in range(1, 32)] 16 | index = index + [dt.datetime(2015,2,x) for x in (range(1 ,29))] 17 | index = index + [dt.datetime(2015,3,x) for x in range(1, 32)] 18 | index = index + [dt.datetime(2015,4,x) for x in range(1, 31)] 19 | 20 | def sariamOutput(): 21 | loader = dataLoader.loader("datam.csv", "lcdatam.csv") 22 | loader.setSize(120, 0, 0) 23 | 24 | f1 = open("result01.csv", "wb") 25 | writer1 = csv.writer(f1) 26 | f2 = open("result11.csv", "wb") 27 | writer2 = csv.writer(f2) 28 | f3 = open("result12.csv", "wb") 29 | writer3 = csv.writer(f3) 30 | 31 | ap = arimaPredicter.predicter(); 32 | ap.setIndex(index) 33 | 34 | while (True): 35 | midclass, _, trainData, _, _ = loader.getNextMidClass() 36 | if (midclass == 0): 37 | break 38 | 39 | ap.setPara(midclass, (0, 1)) 40 | try: 41 | model = ap.sarimaTrain(midclass, trainData) 42 | result = ap.sarimaPredict(model, 30) 43 | except: 44 | result = np.zeros(30) 45 | for i in range(0, 30): 46 | writer1.writerow([midclass, "201505%02d" % (i+1), result[i]]) 47 | 48 | 49 | ap.setPara(midclass, (1, 1)) 50 | try: 51 | model = ap.sarimaTrain(midclass, trainData) 52 | result = ap.sarimaPredict(model, 30) 53 | except: 54 | result = np.zeros(30) 55 | for i in range(0, 30): 56 | writer2.writerow([midclass, "201505%02d" % (i+1), result[i]]) 57 | 58 | ap.setPara(midclass, (1, 2)) 59 | try: 60 | model = ap.sarimaTrain(midclass, trainData) 61 | result = ap.sarimaPredict(model, 30) 62 | except: 63 | result = np.zeros(30) 64 | for i in range(0, 30): 65 | writer3.writerow([midclass, "201505%02d" % (i+1), result[i]]) 66 | 67 | 68 | while (True): 69 | larclass, _, trainData, _, _ = loader.getNextLarClass() 70 | if (larclass == 0): 71 | break 72 | 73 | ap.setPara(larclass, (0, 1)) 74 | try: 75 | model = ap.sarimaTrain(larclass, trainData) 76 | result = ap.sarimaPredict(model, 30) 77 | except: 78 | result = np.zeros(30) 79 | for i in range(0, 30): 80 | writer1.writerow([larclass, "201505%02d" % (i+1), result[i]]) 81 | 82 | 83 | ap.setPara(larclass, (1, 1)) 84 | try: 85 | model = ap.sarimaTrain(larclass, trainData) 86 | result = ap.sarimaPredict(model, 30) 87 | except: 88 | result = np.zeros(30) 89 | for i in range(0, 30): 90 | writer2.writerow([larclass, "201505%02d" % (i+1), result[i]]) 91 | 92 | ap.setPara(larclass, (1, 2)) 93 | try: 94 | model = ap.sarimaTrain(larclass, trainData) 95 | result = ap.sarimaPredict(model, 30) 96 | except: 97 | result = np.zeros(30) 98 | for i in range(0, 30): 99 | writer3.writerow([larclass, "201505%02d" % (i+1), result[i]]) 100 | 101 | f1.close() 102 | f2.close() 103 | f3.close() 104 | loader.closeFiles() 105 | 106 | sariamOutput() -------------------------------------------------------------------------------- /v6_stacking/cv/xgboost_cv.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import xgboost as xgb 9 | from numpy import array 10 | import csv 11 | import datetime as dt 12 | 13 | larclasPred = {} 14 | larclasLabl = {} 15 | totalBias = 0 16 | totalCount = 0 17 | 18 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)] 19 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))] 20 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)] 21 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))] 22 | 23 | modelChoose = [] 24 | lcModelChoose = [] 25 | arimaParaChoose = {} 26 | 27 | def getData(csvReader, trainCount, testCount): 28 | trainData = [] 29 | testData = [] 30 | trainLabel = [] 31 | testLabel = [] 32 | try: 33 | for x in range(0, trainCount): 34 | row = csvReader.next() 35 | """ 36 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 37 | float(row[7]), float(row[8]), float(row[9]), float(row[10]), 38 | float(row[11]), float(row[12])] 39 | """ 40 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 41 | float(row[7]), float(row[8])] 42 | trainData.append(data) 43 | trainLabel.append(float(row[15])) 44 | for x in range(0, testCount): 45 | row = csvReader.next() 46 | """ 47 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 48 | float(row[7]), float(row[8]), float(row[9]), float(row[10]), 49 | float(row[11]), float(row[12])] 50 | """ 51 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 52 | float(row[7]), float(row[8])] 53 | testData.append(data) 54 | testLabel.append(float(row[15])) 55 | return int(row[0]), trainData, trainLabel, testData, testLabel 56 | except StopIteration: 57 | return 0, [], [], [], [] 58 | 59 | def getLCData(csvReader, trainCount, testCount): 60 | trainData = [] 61 | testData = [] 62 | trainLabel = [] 63 | testLabel = [] 64 | try: 65 | for x in range(0, trainCount): 66 | row = csvReader.next() 67 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 68 | float(row[7])] 69 | trainData.append(data) 70 | trainLabel.append(float(row[14])) 71 | for x in range(0, testCount): 72 | row = csvReader.next() 73 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]), 74 | float(row[7])] 75 | testData.append(data) 76 | testLabel.append(float(row[14])) 77 | return int(row[0]), trainData, trainLabel, testData, testLabel 78 | except StopIteration: 79 | return 0, [], [], [], [] 80 | 81 | def xgboostPredict(trainData, trainLabel, dataToPredict): 82 | dtrain = xgb.DMatrix(trainData, trainLabel) 83 | params = {"objective": "reg:linear"} 84 | gbm = xgb.train(dtrain=dtrain, params=params) 85 | return gbm.predict(xgb.DMatrix(dataToPredict)) 86 | 87 | def simData(data): 88 | ret = data[:] 89 | for i in range(0, len(ret)): 90 | for j in range(4, len(ret[i])): 91 | ret[i][j] = 0 92 | return ret 93 | 94 | def xgboostCV(trainSize): 95 | global larclasPred 96 | larclasPred = {} 97 | f1 = open("datam.csv", "r") 98 | data_csv = csv.reader(f1) 99 | f3 = open("lcdatam.csv", "r") 100 | lc_data_csv = csv.reader(f3) 101 | f4 = open('xgboost_cv.csv', 'wb') 102 | writer = csv.writer(f4) 103 | 104 | split = [int(trainSize/5), int(2*trainSize/5), 105 | int(3*trainSize/5), int(4*trainSize/5)] 106 | 107 | while (True): 108 | midclass, trD, trL, teD, teL = getData(data_csv, trainSize, 0) 109 | if (midclass == 0): 110 | break 111 | else: 112 | trd1 = trD[split[0]:] 113 | trl1 = trL[split[0]:] 114 | ted1 = simData(trD[:split[0]]) 115 | tep1 = xgboostPredict(array(trd1), array(trl1), array(ted1)) 116 | 117 | trd2 = trD[:split[0]]+trD[split[1]:] 118 | trl2 = trL[:split[0]]+trL[split[1]:] 119 | ted2 = simData(trD[split[0]:split[1]]) 120 | tep2 = xgboostPredict(array(trd2), array(trl2), array(ted2)) 121 | 122 | trd3 = trD[:split[1]]+trD[split[2]:] 123 | trl3 = trL[:split[1]]+trL[split[2]:] 124 | ted3 = simData(trD[split[1]:split[2]]) 125 | tep3 = xgboostPredict(array(trd3), array(trl3), array(ted3)) 126 | 127 | trd4 = trD[:split[2]]+trD[split[3]:] 128 | trl4 = trL[:split[2]]+trL[split[3]:] 129 | ted4 = simData(trD[split[2]:split[3]]) 130 | tep4 = xgboostPredict(array(trd4), array(trl4), array(ted4)) 131 | 132 | trd5 = trD[:split[3]] 133 | trl5 = trL[:split[3]] 134 | ted5 = simData(trD[split[3]:]) 135 | tep5 = xgboostPredict(array(trd5), array(trl5), array(ted5)) 136 | 137 | ans = list(tep1) + list(tep2) + list(tep3) + list(tep4) + list(tep5) 138 | 139 | for i in range(0, trainSize): 140 | writer.writerow([midclass, dtIndex[i].strftime("%Y%m%d"), 141 | ans[i]]) 142 | 143 | while (True): 144 | larclass, trD, trL, teD, teL = getLCData(lc_data_csv, trainSize, 0) 145 | if (larclass == 0): 146 | break 147 | else: 148 | trd1 = trD[split[0]:] 149 | trl1 = trL[split[0]:] 150 | ted1 = simData(trD[:split[0]]) 151 | tep1 = xgboostPredict(array(trd1), array(trl1), array(ted1)) 152 | 153 | trd2 = trD[:split[0]]+trD[split[1]:] 154 | trl2 = trL[:split[0]]+trL[split[1]:] 155 | ted2 = simData(trD[split[0]:split[1]]) 156 | tep2 = xgboostPredict(array(trd2), array(trl2), array(ted2)) 157 | 158 | trd3 = trD[:split[1]]+trD[split[2]:] 159 | trl3 = trL[:split[1]]+trL[split[2]:] 160 | ted3 = simData(trD[split[1]:split[2]]) 161 | tep3 = xgboostPredict(array(trd3), array(trl3), array(ted3)) 162 | 163 | trd4 = trD[:split[2]]+trD[split[3]:] 164 | trl4 = trL[:split[2]]+trL[split[3]:] 165 | ted4 = simData(trD[split[2]:split[3]]) 166 | tep4 = xgboostPredict(array(trd4), array(trl4), array(ted4)) 167 | 168 | trd5 = trD[:split[3]] 169 | trl5 = trL[:split[3]] 170 | ted5 = simData(trD[split[3]:]) 171 | tep5 = xgboostPredict(array(trd5), array(trl5), array(ted5)) 172 | 173 | ans = list(tep1) + list(tep2) + list(tep3) + list(tep4) + list(tep5) 174 | 175 | for i in range(0, trainSize): 176 | writer.writerow([larclass, dtIndex[i].strftime("%Y%m%d"), 177 | ans[i]]) 178 | 179 | f1.close() 180 | f3.close() 181 | f4.close() 182 | 183 | xgboostCV(120) -------------------------------------------------------------------------------- /v6_stacking/prepare_data.py: -------------------------------------------------------------------------------- 1 | # 把不同模型结果合并在一个文件中 2 | 3 | import csv 4 | 5 | commit_codes = [] 6 | 7 | 8 | # 获取提交文件中需要提交的codes,保存在commit_codes中 9 | def codes_list_out(): 10 | global commit_codes 11 | codes = [0] 12 | with open('commit_empty.csv') as native_set_file: 13 | native_csv = csv.reader(native_set_file) 14 | next(native_csv) 15 | for row in native_csv: 16 | if row[0] != codes[-1]: 17 | codes.append(row[0]) 18 | commit_codes = codes[1:] 19 | 20 | 21 | def get_day(date): 22 | date = int(date) 23 | if date < 20150132: 24 | return date - 20150100 25 | elif date < 20150229: 26 | return date - 20150200 + 31 27 | elif date < 20150332: 28 | return date - 20150300 + 59 29 | else: 30 | return date - 20150400 + 90 31 | 32 | 33 | def merge_file(): 34 | features = {} 35 | with open('five_fold_feature_v3.csv') as input_file: 36 | input_csv = csv.reader(input_file) 37 | next(input_csv) 38 | for row in input_csv: 39 | if row[0] not in features: 40 | features[row[0]] = [row] 41 | else: 42 | features[row[0]].append(row) 43 | with open('five_fold_feature_xgboost.csv') as input_file: 44 | input_csv = csv.reader(input_file) 45 | for row in input_csv: 46 | if row[0] in features: 47 | features[row[0]][get_day(row[1])-1] = features[row[0]][get_day(row[1])-1] + row[2:] 48 | # 最后一列是label 49 | with open('timeseries_customers.csv') as input_file: 50 | input_csv = csv.reader(input_file) 51 | for row in input_csv: 52 | if row[0] in features: 53 | for day in range(120): 54 | features[row[0]][day].append(row[day+1]) 55 | with open('merged_feature.csv', 'w', newline='') as output_file: 56 | output_csv = csv.writer(output_file) 57 | for code in commit_codes: 58 | for row in features[code]: 59 | output_csv.writerow(row) 60 | 61 | 62 | def divide_train_test_set(): 63 | with open('merged_feature.csv') as input_file,\ 64 | open('train.csv', 'w', newline='') as train_file,\ 65 | open('test.csv', 'w', newline='') as test_file: 66 | input_csv = csv.reader(input_file) 67 | train_csv = csv.writer(train_file) 68 | test_csv = csv.writer(test_file) 69 | day = 0 70 | for row in input_csv: 71 | if day < 100: 72 | train_csv.writerow(row) 73 | day += 1 74 | else: 75 | test_csv.writerow(row) 76 | day = (day + 1) % 120 77 | 78 | 79 | if __name__ == '__main__': 80 | codes_list_out() 81 | merge_file() 82 | divide_train_test_set() 83 | -------------------------------------------------------------------------------- /v6_stacking/timeseries_customers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v6_stacking/timeseries_customers.csv -------------------------------------------------------------------------------- /xgboostPredicter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 27 21:53:16 2017 4 | 5 | @author: wangjun 6 | """ 7 | 8 | import xgboost as xgb 9 | from numpy import array 10 | import datetime as dt 11 | import numpy 12 | 13 | class predicter: 14 | def __init__(self, params = {"objective":"reg:linear", "max_depth":1, "gamma":2}): 15 | self.params = params 16 | 17 | def setDefaultParams(self, params): 18 | self.params = params 19 | 20 | def xgboostTrain(self, trainData, trainLabel, params = {}): 21 | if (type(trainData)!=numpy.ndarray): 22 | trainData = array(trainData) 23 | if (type(trainLabel)!=numpy.ndarray): 24 | trainLabel = array(trainLabel) 25 | dTrain = xgb.DMatrix(trainData, trainLabel) 26 | if (len(params)==0): 27 | params = self.params 28 | model = xgb.train(dtrain=dTrain, params=params) 29 | return model 30 | 31 | @staticmethod 32 | def xgboostPredict(model, dataToPredict): 33 | if (type(dataToPredict)!=numpy.ndarray): 34 | dataToPredict = array(dataToPredict) 35 | return model.predict(xgb.DMatrix(dataToPredict)) 36 | 37 | @staticmethod 38 | def simulateFeature(trainData, musk): 39 | for feature in trainData: 40 | for i in musk: 41 | feature[i] = 0 42 | 43 | @staticmethod 44 | def createFeature(date_from, length, zeros, DictHoilday, DictBeforeHoilday, 45 | DictWorkday): 46 | delta = dt.timedelta(days=1) 47 | now = date_from 48 | index = [] 49 | for i in range(0, length): 50 | index.append(now) 51 | now = now + delta 52 | feature = [] 53 | empty = [0 for x in range(0, zeros+4)] 54 | for i in range(0, length): 55 | x = empty[:] 56 | x[0] = index[i].day 57 | x[1] = (index[i].weekday() + 1) % 7 58 | dayCount = i + 1 59 | if (dayCount in DictHoilday): 60 | x[3] = 1 61 | elif (dayCount in DictBeforeHoilday): 62 | x[2] = 1 63 | elif (dayCount in DictWorkday): 64 | if (x[1]==6 or ((dayCount+1) in DictHoilday)): 65 | x[2] = 1 66 | elif (x[1]==0 or x[1]==6): 67 | x[3] = 1 68 | elif (x[1]==5): 69 | x[2] = 1 70 | feature.append(x) 71 | return feature --------------------------------------------------------------------------------