├── .gitignore
├── README.md
├── arimaPredicter.py
├── arima_knn
    ├── KNN_interface.py
    ├── KNN_interface.pyc
    ├── Version_5.py
    ├── arima_knn.py
    └── modify_submit.py
├── arima_xgboost
    ├── arima_xgboost.py
    ├── arima_xgboost_imporve.py
    ├── arima_xgboost_multi.py
    └── statanalysis.py
├── data
    ├── datam.csv
    ├── example.csv
    ├── lcdatam.csv
    ├── submit0.csv
    ├── submit1.csv
    └── train.csv
├── dataLoader.py
├── doc
    ├── compare.docx
    ├── manual.py
    ├── parareport.txt
    ├── report.pdf
    ├── report.txt
    ├── ~$ompare.docx
    └── 特征选择.xlsx
├── plot_pic
    ├── 1001_customers.jpg
    ├── 1002_customers.jpg
    ├── 1004_customers.jpg
    ├── 1005_customers.jpg
    ├── 1006_customers.jpg
    ├── 1007_customers.jpg
    ├── 1099_customers.jpg
    ├── 10_customers.jpg
    ├── 1101_customers.jpg
    ├── 1102_customers.jpg
    ├── 1103_customers.jpg
    ├── 1104_customers.jpg
    ├── 11_customers.jpg
    ├── 1201_customers.jpg
    ├── 1202_customers.jpg
    ├── 1203_customers.jpg
    ├── 1205_customers.jpg
    ├── 12_customers.jpg
    ├── 1301_customers.jpg
    ├── 1302_customers.jpg
    ├── 1306_customers.jpg
    ├── 1308_customers.jpg
    ├── 1399_customers.jpg
    ├── 13_customers.jpg
    ├── 1401_customers.jpg
    ├── 1402_customers.jpg
    ├── 1403_customers.jpg
    ├── 1404_customers.jpg
    ├── 14_customers.jpg
    ├── 1501_customers.jpg
    ├── 1502_customers.jpg
    ├── 1503_customers.jpg
    ├── 1504_customers.jpg
    ├── 1505_customers.jpg
    ├── 1507_customers.jpg
    ├── 1508_customers.jpg
    ├── 1509_customers.jpg
    ├── 1510_customers.jpg
    ├── 1511_customers.jpg
    ├── 1512_customers.jpg
    ├── 1513_customers.jpg
    ├── 1514_customers.jpg
    ├── 1515_customers.jpg
    ├── 1516_customers.jpg
    ├── 1517_customers.jpg
    ├── 1518_customers.jpg
    ├── 1519_customers.jpg
    ├── 1520_customers.jpg
    ├── 1521_customers.jpg
    ├── 15_customers.jpg
    ├── 2001_customers.jpg
    ├── 2002_customers.jpg
    ├── 2003_customers.jpg
    ├── 2004_customers.jpg
    ├── 2005_customers.jpg
    ├── 2006_customers.jpg
    ├── 2007_customers.jpg
    ├── 2008_customers.jpg
    ├── 2009_customers.jpg
    ├── 2010_customers.jpg
    ├── 2011_customers.jpg
    ├── 2012_customers.jpg
    ├── 2013_customers.jpg
    ├── 2014_customers.jpg
    ├── 2015_customers.jpg
    ├── 20_customers.jpg
    ├── 2101_customers.jpg
    ├── 2102_customers.jpg
    ├── 2103_customers.jpg
    ├── 2104_customers.jpg
    ├── 2105_customers.jpg
    ├── 2106_customers.jpg
    ├── 2107_customers.jpg
    ├── 2108_customers.jpg
    ├── 21_customers.jpg
    ├── 2201_customers.jpg
    ├── 2202_customers.jpg
    ├── 2203_customers.jpg
    ├── 2204_customers.jpg
    ├── 2205_customers.jpg
    ├── 2206_customers.jpg
    ├── 2207_customers.jpg
    ├── 2208_customers.jpg
    ├── 2209_customers.jpg
    ├── 2210_customers.jpg
    ├── 2211_customers.jpg
    ├── 2212_customers.jpg
    ├── 22_customers.jpg
    ├── 2301_customers.jpg
    ├── 2302_customers.jpg
    ├── 2303_customers.jpg
    ├── 2304_customers.jpg
    ├── 2305_customers.jpg
    ├── 2306_customers.jpg
    ├── 2307_customers.jpg
    ├── 2308_customers.jpg
    ├── 2309_customers.jpg
    ├── 2310_customers.jpg
    ├── 2311_customers.jpg
    ├── 2312_customers.jpg
    ├── 2313_customers.jpg
    ├── 2314_customers.jpg
    ├── 2315_customers.jpg
    ├── 2316_customers.jpg
    ├── 2317_customers.jpg
    ├── 23_customers.jpg
    ├── 3001_customers.jpg
    ├── 3002_customers.jpg
    ├── 3003_customers.jpg
    ├── 3004_customers.jpg
    ├── 3005_customers.jpg
    ├── 3006_customers.jpg
    ├── 3007_customers.jpg
    ├── 3008_customers.jpg
    ├── 3009_customers.jpg
    ├── 3010_customers.jpg
    ├── 3011_customers.jpg
    ├── 3012_customers.jpg
    ├── 3013_customers.jpg
    ├── 3014_customers.jpg
    ├── 3015_customers.jpg
    ├── 3016_customers.jpg
    ├── 3017_customers.jpg
    ├── 3018_customers.jpg
    ├── 30_customers.jpg
    ├── 3101_customers.jpg
    ├── 3102_customers.jpg
    ├── 3104_customers.jpg
    ├── 3105_customers.jpg
    ├── 3106_customers.jpg
    ├── 3107_customers.jpg
    ├── 3108_customers.jpg
    ├── 3109_customers.jpg
    ├── 3110_customers.jpg
    ├── 3111_customers.jpg
    ├── 3112_customers.jpg
    ├── 3113_customers.jpg
    ├── 3114_customers.jpg
    ├── 3115_customers.jpg
    ├── 3116_customers.jpg
    ├── 3117_customers.jpg
    ├── 3118_customers.jpg
    ├── 3119_customers.jpg
    ├── 3120_customers.jpg
    ├── 3121_customers.jpg
    ├── 3122_customers.jpg
    ├── 3125_customers.jpg
    ├── 3126_customers.jpg
    ├── 3128_customers.jpg
    ├── 31_customers.jpg
    ├── 3208_customers.jpg
    ├── 3212_customers.jpg
    ├── 3213_customers.jpg
    ├── 3215_customers.jpg
    ├── 3216_customers.jpg
    ├── 3217_customers.jpg
    ├── 3218_customers.jpg
    ├── 3227_customers.jpg
    ├── 32_customers.jpg
    ├── 3301_customers.jpg
    ├── 3303_customers.jpg
    ├── 3311_customers.jpg
    ├── 3313_customers.jpg
    ├── 3314_customers.jpg
    ├── 3315_customers.jpg
    ├── 3316_customers.jpg
    ├── 3317_customers.jpg
    ├── 3319_customers.jpg
    ├── 3320_customers.jpg
    ├── 3321_customers.jpg
    ├── 3322_customers.jpg
    ├── 3323_customers.jpg
    ├── 3325_customers.jpg
    ├── 3326_customers.jpg
    ├── 3328_customers.jpg
    ├── 3330_customers.jpg
    ├── 33_customers.jpg
    ├── 3401_customers.jpg
    ├── 3402_customers.jpg
    ├── 3403_customers.jpg
    ├── 3404_customers.jpg
    ├── 3405_customers.jpg
    ├── 3406_customers.jpg
    ├── 3407_customers.jpg
    ├── 3408_customers.jpg
    ├── 3409_customers.jpg
    ├── 3410_customers.jpg
    ├── 3412_customers.jpg
    ├── 3413_customers.jpg
    ├── 3414_customers.jpg
    ├── 3415_customers.jpg
    ├── 3416_customers.jpg
    ├── 3417_customers.jpg
    ├── 3419_customers.jpg
    ├── 3421_customers.jpg
    ├── 3423_customers.jpg
    ├── 3424_customers.jpg
    ├── 3426_customers.jpg
    ├── 3427_customers.jpg
    ├── 3428_customers.jpg
    ├── 3429_customers.jpg
    ├── 3431_customers.jpg
    ├── 3432_customers.jpg
    ├── 3436_customers.jpg
    ├── 34_customers.jpg
    └── 异常日期.txt
├── rnn
    ├── test.py
    ├── test2.py
    └── test3.py
├── tools
    ├── backup.py
    ├── csvloader.py
    ├── csvloader_largeClass.py
    ├── dataModify.py
    └── fileChecker.py
├── v3
    ├── May_input.csv
    ├── Readme.txt
    ├── Version_3.py
    ├── commit_empty.csv
    ├── features.csv
    ├── five_fold.py
    ├── five_fold_feature_v3.csv
    ├── preparedata.py
    ├── submit.csv
    ├── test.csv
    ├── timeseries_customers.csv
    ├── timeseries_discounts.csv
    ├── train.csv
    └── 调参.txt
├── v5
    ├── KNN_interface.py
    ├── Readme.txt
    ├── Version_5.py
    ├── commit_empty.csv
    ├── modify_submit.py
    ├── submit.csv
    ├── timeseries_customers_processed.csv
    └── 调参.txt
├── v6_stacking
    ├── .idea
    │   ├── misc.xml
    │   ├── modules.xml
    │   ├── v6_stacking.iml
    │   └── workspace.xml
    ├── Version6_stacking.py
    ├── commit_empty.csv
    ├── cv
    │   ├── arima01.csv
    │   ├── arima11.csv
    │   ├── arima12.csv
    │   ├── arima_cv.py
    │   ├── result01.csv
    │   ├── result11.csv
    │   ├── result12.csv
    │   ├── xgboost_cv.csv
    │   └── xgboost_cv.py
    ├── five_fold_feature_v3.csv
    ├── five_fold_feature_xgboost.csv
    ├── merged_feature.csv
    ├── prepare_data.py
    ├── test.csv
    ├── timeseries_customers.csv
    └── train.csv
└── xgboostPredicter.py


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.pyc
3 | report


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BDCI2017
 2 | 
 3 | 2017年CCF大数据与计算智能大赛-小超市供销存管理优化
 4 | 
 5 | ## 参赛人员
 6 | 
 7 | Wang Jun   cnwj@mail.ustc.edu.cn
 8 | Wang Fei   wf314159@mail.ustc.edu.cn
 9 | 
10 | ## 算法说明与结果报告
11 | 
12 | 见doc文件夹下report.pdf
13 | 
14 | ## 文件说明
15 | 
16 | ### arimaPredicter.py 
17 | 
18 | 封装后的Sarima预测器
19 | 
20 | ### xgboostPredicter.py 
21 | 
22 | 封装后的xgboost预测器
23 | 
24 | ### dataLoader.py 
25 | 
26 | 封装后的数据读取类
27 | 
28 | ### data文件夹
29 | 
30 | 比赛数据
31 | 
32 | train.csv 比赛给定的训练数据
33 | 
34 | example.csv 比赛给定的结果样本
35 | 
36 | datam.csv 预处理后的中类样本
37 | 
38 | lcdatam.csv 预处理后的大类样本
39 | 
40 | submit0.csv submit1.csv 比赛中提交的两个文件
41 | 
42 | ### tools文件夹
43 | 
44 | 用于预处理的工具
45 | 
46 | ### doc文件夹
47 | 
48 | 相关文档
49 | 
50 | report.pdf 实验报告
51 | 
52 | manual.py Sarima预测器与xgboost预测器的使用指南
53 | 
54 | ### arima_knn文件夹
55 | 
56 | 基于arima、knn的集成学习
57 | 
58 | ### arima_xgboost文件夹
59 | 
60 | 基于arima、xgboost的集成学习
61 | 
62 | arima_xgboost_multi.py 是实验最终用于预测的集成学习预测器
63 | 
64 | ### plot_pic文件夹
65 | 
66 | 销量-时间图
67 | 
68 | ### rnn文件夹
69 | 
70 | 基于LSTM的学习器（未封装，最终未使用）
71 | 
72 | ### v3文件夹
73 | 
74 | 基于随机森林的学习器（未封装，最终未使用）
75 | 
76 | ### v5文件夹
77 | 
78 | 基于knn的学习器（未封装，最终未使用）
79 | 
80 | ### v6_stacking文件夹
81 | 
82 | 基于stacking的集成学习预测器
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/arimaPredicter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Nov 15 12:25:37 2017
  4 | 
  5 | @author: wangjun
  6 | """
  7 | 
  8 | from numpy import array
  9 | from numpy import log
 10 | from numpy import exp
 11 | import math
 12 | 
 13 | import datetime as dt
 14 | import pandas as pd
 15 | from statsmodels.tsa.statespace.sarimax import SARIMAX  
 16 | import statsmodels.api as sm
 17 | 
 18 | import matplotlib.pylab as plt
 19 | from statsmodels.tsa.stattools import adfuller 
 20 | 
 21 | class predicter():
 22 |     def __init__(self):
 23 |         self.ParaChoose = {}
 24 |         self.dtIndex = []
 25 |         
 26 |     def setIndex(self, index):
 27 |         self.dtIndex = index[:]
 28 |         
 29 |     def getIndex(self):
 30 |         return self.dtIndex
 31 |     
 32 |     def createIndex(self, date_from, length):
 33 |         delta = dt.timedelta(days=1)
 34 |         now = date_from
 35 |         self.dtIndex = []
 36 |         for i in range(0, length):
 37 |             self.dtIndex.append(now)
 38 |             now = now + delta
 39 |         return self.dtIndex
 40 | 
 41 |     def setPara(self, clas, para):
 42 |         if (type(para)!=tuple or len(para)!=2):
 43 |             raise TypeError("timeserise should be (ar, ma)")
 44 |         self.ParaChoose[clas] = para
 45 |     
 46 |     def getPara(self):
 47 |         return self.ParaChoose
 48 | 
 49 |     def test_stationarity(self, timeseries):
 50 |         #Determing rolling statistics
 51 |         if (type(timeseries) == list):
 52 |             length = len(timeseries)
 53 |             timeseries = pd.Series(timeseries)
 54 |             timeseries.index = pd.Index(self.dtIndex[0:length])
 55 |         elif (type(timeseries) != pd.core.series.Series):
 56 |             raise TypeError("timeserise should be a list or series")
 57 |         rolmean = timeseries.rolling(window=12,center=False).mean()
 58 |         rolstd = timeseries.rolling(window=12,center=False).std()
 59 |     
 60 |         #Plot rolling statistics:
 61 |         plt.plot(timeseries, color='blue',label='Original')
 62 |         plt.plot(rolmean, color='red', label='Rolling Mean')
 63 |         plt.plot(rolstd, color='black', label = 'Rolling Std')
 64 |         plt.legend(loc='best')
 65 |         plt.title('Rolling Mean & Standard Deviation')
 66 |         plt.show(block=False)
 67 |         
 68 |         #Perform Dickey-Fuller test:
 69 |         print('Results of Dickey-Fuller Test:')
 70 |         dftest = adfuller(timeseries, autolag='AIC')
 71 |         dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
 72 |         for key,value in dftest[4].items():
 73 |             dfoutput['Critical Value (%s)'%key] = value
 74 |         print(dfoutput)
 75 |         
 76 |         #Get AR and MA parameter
 77 |         fig = plt.figure(figsize=(12,8))
 78 |         ax1=fig.add_subplot(211)
 79 |         fig = sm.graphics.tsa.plot_acf(timeseries, lags=20, ax=ax1)
 80 |         ax2 = fig.add_subplot(212)
 81 |         fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2)
 82 |         plt.show(block=False)
 83 |     
 84 |     def sarimaTrain(self, trainLabel, classNo=0, para=()):
 85 |         dataLength = len(trainLabel)
 86 |         data = pd.Series(trainLabel)
 87 |         for i in range(0, dataLength):
 88 |             data[i] = log(data[i] + 1)
 89 |         index = self.dtIndex[0:dataLength]
 90 |         data.index = pd.Index(index)
 91 |         
 92 |         if (len(para) != 2):
 93 |             try:
 94 |                 (ar, ma) = self.ParaChoose[classNo]
 95 |             except KeyError:
 96 |                 print("%d: parameter not set, use (1,1) default" % classNo)
 97 |                 (ar, ma) = (1, 1)
 98 |             return SARIMAX(data, order=(ar,1,ma), seasonal_order=(0,1,1,7)).fit()
 99 |         else:
100 |             return SARIMAX(data, order=(para[0], 1, para[1]), seasonal_order=(0,1,1,7)).fit()
101 |                 
102 |     def sarimaParaSelect(self, classNo, trainLabel, testLabel, useAic=False):
103 |         dataLength = len(trainLabel)
104 |         data = pd.Series(trainLabel)
105 |         for i in range(0, dataLength):
106 |             data[i] = log(data[i] + 1)
107 |         index = self.dtIndex[0:dataLength]
108 |         data.index = pd.Index(index)
109 |         
110 |         minBias = 99999.0
111 |         minAic = 99999.0
112 |         (ar, ma) = (0, 0)
113 |         label = array(testLabel)
114 |         for p, q in [(1, 1), (0, 1), (1, 2), (2, 0), (2, 1), (2, 2)]:
115 |             try:
116 |                 model = SARIMAX(data, order=(p,1,q), seasonal_order=(0,1,1,7)).fit()
117 |                 output = array(model.forecast(len(testLabel)))       
118 |                 for i in range(0, len(testLabel)):
119 |                     output[i] = exp(output[i]) - 1
120 |                 bias = math.sqrt(sum((output-label)*(output-label))/len(testLabel))
121 |                 if (bias < minBias and (useAic == False or model.aic < minAic)):
122 |                     (ar, ma) = (p, q)
123 |                     minBias = bias
124 |                     minAic = model.aic
125 |                     bestOutput = output
126 |             except:
127 |                 pass
128 |             
129 |         if (minBias < 90000.0):
130 |             self.ParaChoose[classNo] = (ar, ma)
131 |             return ((ar, ma), bestOutput)
132 |         else:
133 |             raise ValueError
134 | 
135 |     def checkBias(self, model, trainLabel):
136 |         dataLength = len(trainLabel)
137 |         data = pd.Series(trainLabel)
138 |         index = self.dtIndex[0:dataLength]
139 |         data.index = pd.Index(index)
140 |         
141 |         pred = model.predict()
142 |         plt.plot(data, color='blue',label='Original')
143 |         plt.plot(pred, color='red', label='Predicted')
144 |         plt.show(block=False)
145 |         return list(data - pred)
146 |     
147 |     @staticmethod
148 |     def sarimaPredict(model, predictLength):
149 |         output = model.forecast(predictLength)
150 |         for i in range(0, predictLength):
151 |             output[i] = exp(output[i]) - 1
152 |         return array(output)
153 | 


--------------------------------------------------------------------------------
/arima_knn/KNN_interface.py:
--------------------------------------------------------------------------------
 1 | # IDE not support Chinese
 2 | 
 3 | from sklearn.neighbors import KNeighborsRegressor
 4 | import numpy as np
 5 | import csv
 6 | 
 7 | def knn(data, pred_length, D_window=14, max_k=7):
 8 |     if pred_length + D_window >= len(data):
 9 |         print('ERROR: pred_length or D_window too long')
10 |         return None
11 | 
12 |     ret_ypred = []
13 |     for h in range(4):
14 |         train_feature, train_label = get_train_set(data, h, D_window, pred_length)
15 | 
16 |         e_LOO_arr = np.zeros(max_k)
17 |         for k in range(2, max_k + 1):
18 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
19 |             model.fit(train_feature, train_label)
20 | 
21 |             dist_list, index_list = model.kneighbors([data[0 - D_window:]])
22 |             k_neighbor_label = []
23 |             for i in index_list[0]:
24 |                 k_neighbor_label.append(train_label[i])
25 | 
26 |             ypred = model.predict([data[0-D_window:]])
27 |             ypred = np.asarray(list(map(round, ypred[0])))
28 |             
29 |             e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
30 | 
31 |         k_min = np.argmin(e_LOO_arr[1:]) + 2
32 |         model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
33 |         model.fit(train_feature, train_label)
34 |         ypred = model.predict([data[0 - D_window:]])
35 |         ret_ypred += list(map(round, ypred[0]))
36 | 
37 |     return np.asarray(ret_ypred)
38 | 
39 | 
40 | def get_train_set(train_data, h, D, pred_length):
41 |     feature, label = [], []
42 |     block_len = int(pred_length / 4)
43 |     if h != 3:
44 |         for i in range(len(train_data) - D - block_len * (h + 1) + 1):
45 |             feature.append(train_data[i:i + D])
46 |             label.append(train_data[i + D + block_len * h:i + D + block_len * h + block_len])
47 |     else:
48 |         for i in range(len(train_data) - D - pred_length + 1):
49 |             feature.append(train_data[i:i + D])
50 |             label.append(train_data[i + D + 3 * block_len:i + D + pred_length])
51 |     return np.array(feature), np.array(label)
52 | 
53 | def LOO(k_neighbor_label, ypred, k):
54 |     ret = 0
55 |     for neighbor in k_neighbor_label:
56 |         ret = ret + ((neighbor - ypred) ** 2).sum()
57 |     ret = ret * k / (k - 1)**2
58 |     # ret = ret / (k)**2
59 |     return ret
60 | 
61 | 
62 | def test():
63 |     with open('timeseries_customers_processed.csv') as input_file:
64 |         input_csv = csv.reader(input_file)
65 |         next(input_csv)
66 |         row = next(input_csv)
67 |         data = list(map(float, row[1:]))
68 |         print(knn(data, 30))
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     test()
73 | 


--------------------------------------------------------------------------------
/arima_knn/KNN_interface.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/arima_knn/KNN_interface.pyc


--------------------------------------------------------------------------------
/arima_knn/Version_5.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | from sklearn.neighbors import KNeighborsRegressor
  3 | import numpy as np
  4 | 
  5 | from modify_submit import change_pred
  6 | 
  7 | 
  8 | def main_fun():
  9 |     class_codes = ['1201', '2011', '12', '15', '20', '22', '23', '30']
 10 |     with open('timeseries_customers_processed.csv') as input_file:
 11 |         input_csv = csv.reader(input_file)
 12 |         next(input_csv)
 13 |         for row in input_csv:
 14 |             if row[0] in class_codes:
 15 |                 # MIMO_KNN_test(row)
 16 |                 # MIMO_KNN_LOO_test(row)
 17 |                 MIMO_KNN_LOO_May(row)
 18 | 
 19 | 
 20 | # 划分数据集测试不同参数（D_window, k），没有预测5月份销量
 21 | def MIMO_KNN_test(data):
 22 |     code = data[0]
 23 |     data = list(map(float, data[1:]))
 24 |     train_data = data[:90]
 25 |     test_data = data[90:]
 26 | 
 27 |     # 对4个时间段分别训练模型，时间段分别为7天、7天、7天、9天
 28 |     D_window = 14
 29 |     for h in range(4):
 30 |         train_feature, train_label = get_train_set(train_data, h, D_window)
 31 |         y_label = get_test_label(test_data, h)
 32 | 
 33 |         for k in range(1, 8):
 34 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
 35 |             model.fit(train_feature, train_label)
 36 | 
 37 |             ypred = model.predict([train_data[0-D_window:]])
 38 |             ypred = np.array(list(map(round, ypred[0])))
 39 | 
 40 |             rmse = np.sqrt(((ypred - y_label) ** 2).mean())
 41 |             print(code, '  h=', h, '  k=', k, '  rmse=', rmse)
 42 | 
 43 | 
 44 | # 划分数据集，实现论文里的方法，没有预测5月份销量
 45 | def MIMO_KNN_LOO_test(data):
 46 |     code = data[0]
 47 |     data = list(map(float, data[1:]))
 48 |     train_data = data[:90]
 49 |     test_data = data[90:]
 50 | 
 51 |     # 对4个时间段分别训练模型，时间段分别为7天、7天、7天、9天
 52 |     D_window = 14
 53 |     max_k = 7
 54 |     for h in range(4):
 55 |         train_feature, train_label = get_train_set(train_data, h, D_window)
 56 |         y_label = get_test_label(test_data, h)
 57 | 
 58 |         e_LOO_arr = np.zeros(max_k)
 59 |         for k in range(2, max_k + 1):
 60 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
 61 |             model.fit(train_feature, train_label)
 62 | 
 63 |             # 获取k近邻
 64 |             dist_list, index_list = model.kneighbors([train_data[0 - D_window:]])
 65 |             k_neighbor_label = []
 66 |             for i in index_list[0]:
 67 |                 k_neighbor_label.append(train_label[i])
 68 | 
 69 |             # 基于k近邻的预测值
 70 |             ypred = model.predict([train_data[0-D_window:]])
 71 |             ypred = np.asarray(list(map(round, ypred[0])))
 72 |             rmse = np.sqrt(((ypred - y_label) ** 2).mean())
 73 |             print(code, '  h=', h, '  k=', k, '  rmse=', rmse)
 74 | 
 75 |             # 计算e_LOO
 76 |             e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
 77 | 
 78 |         # 取e_LOO最小的k值
 79 |         k_min = np.argmin(e_LOO_arr[1:]) + 2
 80 |         print('k_min=', k_min)
 81 | 
 82 | 
 83 | # 使用整个数据集，实现论文里的方法，预测5月份销量
 84 | def MIMO_KNN_LOO_May(data):
 85 |     code = data[0]
 86 |     data = list(map(float, data[1:]))
 87 | 
 88 |     D_window = 14
 89 |     max_k = 7
 90 |     pred_May = []
 91 |     for h in range(4):
 92 |         train_feature, train_label = get_train_set(data, h, D_window)
 93 |         e_LOO_arr = np.zeros(max_k)
 94 |         for k in range(2, max_k + 1):
 95 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
 96 |             model.fit(train_feature, train_label)
 97 | 
 98 |             # 获取k近邻
 99 |             dist_list, index_list = model.kneighbors([data[0 - D_window:]])
100 |             k_neighbor_label = []
101 |             for i in index_list[0]:
102 |                 k_neighbor_label.append(train_label[i])
103 | 
104 |             # 基于k近邻的预测值
105 |             ypred = model.predict([data[0 - D_window:]])
106 |             ypred = np.asarray(list(map(round, ypred[0])))
107 | 
108 |             # 计算e_LOO
109 |             e_LOO_arr[k - 1] = LOO(k_neighbor_label, ypred, k)
110 | 
111 |         # 取e_LOO最小的k值
112 |         k_min = np.argmin(e_LOO_arr[1:]) + 2
113 | 
114 |         # 令k=k_min，做预测
115 |         model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
116 |         model.fit(train_feature, train_label)
117 |         ypred = model.predict([data[0 - D_window:]])
118 |         ypred = list(map(round, ypred[0]))
119 |         pred_May = pred_May + ypred
120 | 
121 |     print(pred_May)
122 |     # 替换文件里编码为code的预测值
123 |     change_pred(code, pred_May)
124 | 
125 | 
126 | # 计算LOO，用于k（近邻数）的选择
127 | def LOO(k_neighbor_label, ypred, k):
128 |     ret = 0
129 |     for neighbor in k_neighbor_label:
130 |         ret = ret + ((neighbor - ypred) ** 2).sum()
131 |     ret = ret * k / (k - 1)**2
132 |     # ret = ret / (k)**2
133 |     return ret
134 | 
135 | 
136 | def get_train_set(train_data, h, D):
137 |     feature, label = [], []
138 |     if h != 3:
139 |         for i in range(len(train_data) - D - 7 * (h+1) + 1):
140 |             feature.append(train_data[i:i+D])
141 |             label.append(train_data[i+D+7*h:i+D+7*h+7])
142 |     else:
143 |         for i in range(len(train_data) - D - 30 + 1):
144 |             feature.append(train_data[i:i+D])
145 |             label.append(train_data[i+D+21:i+D+30])
146 |     return np.array(feature), np.array(label)
147 | 
148 | 
149 | def get_test_label(test_data, h):
150 |     if h != 3:
151 |         return test_data[7*h:7*h+7]
152 |     else:
153 |         return test_data[21:]
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     main_fun()
158 | 


--------------------------------------------------------------------------------
/arima_knn/arima_knn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | from numpy import array
  9 | from numpy import zeros
 10 | import csv
 11 | import math
 12 | import datetime as dt
 13 | 
 14 | import arimaPredicter
 15 | import dataLoader
 16 | import KNN_interface
 17 | 
 18 | larclasPred = {}
 19 | larclasLabl = {}
 20 | totalBias = 0
 21 | totalCount = 0
 22 | 
 23 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)]
 24 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))]
 25 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)]
 26 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))]
 27 | 
 28 | modelChoose = []
 29 | lcModelChoose = []
 30 | 
 31 | ap = arimaPredicter.predicter()
 32 | ap.setIndex(dtIndex)
 33 | 
 34 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
 35 |     with open('compare.csv', 'ab') as f:
 36 |         writer = csv.writer(f)
 37 |         count = 1
 38 |         writer.writerow([midclass, accuracy])
 39 |         for x in trainLabl:
 40 |             writer.writerow([count, x])
 41 |             count += 1
 42 |         for x in range(0, len(testPred)):
 43 |             writer.writerow([count, testLabl[x], testPred[x]])
 44 |             count += 1
 45 | 
 46 | def modelselect(trainSize, testSize, skipSize = 0):
 47 |     global larclasPred, totalBias, totalCount, modelChoose, lcModelChoose, ap 
 48 |     larclasPred = {}
 49 |     totalBias = 0
 50 |     totalCount = 0
 51 |     modelChoose = []
 52 |     lcModelChoose = []
 53 |     
 54 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
 55 |     loader.setSize(trainSize, testSize, skipSize)
 56 |         
 57 |     # middle class
 58 |     while (True):
 59 |         midclass, trD, trL, _, teL = loader.getNextMidClass()
 60 |         if (midclass == 0):
 61 |             break
 62 |         else:
 63 |             # sarima model
 64 |             try:
 65 |                 model = ap.sarimaTrain(midclass, trL, teL)
 66 |                 teP1 = ap.sarimaPredict(model, testSize)
 67 |             except:
 68 |                 teP1 = zeros(testSize)
 69 |             
 70 |             # kNN model
 71 |             try:
 72 |                 teP2 = KNN_interface.knn(trL, testSize)
 73 |             except:
 74 |                 print("Warning: kNN train fail")
 75 |                 teP2 = zeros(testSize)
 76 |             
 77 |             # just zero
 78 |             teP3 = zeros(testSize)
 79 | 
 80 |             # count bias of midclass and update larclass
 81 |             label = array(teL)
 82 |             larclass = int(midclass/100)
 83 |             totalCount += testSize
 84 |   
 85 |             bias1 = sum((teP1-label)*(teP1-label))
 86 |             bias2 = sum((teP2-label)*(teP2-label))
 87 |             bias3 = sum((teP3-label)*(teP3-label))
 88 |             if (bias3 <= bias1 and bias3 <= bias2):
 89 |                 totalBias += bias3
 90 |                 bias3 = math.sqrt(bias3/testSize)
 91 |                 print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3)
 92 |                 modelChoose.append(3)
 93 |                 if (larclass in larclasPred):
 94 |                     larclasPred[larclass] += teP3
 95 |                 else:
 96 |                     larclasPred[larclass] = teP3
 97 |             elif (bias1 <= bias2):
 98 |                 totalBias += bias1
 99 |                 bias1 = math.sqrt(bias1/testSize)
100 |                 print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1)
101 |                 modelChoose.append(1)
102 |                 if (larclass in larclasPred):
103 |                     larclasPred[larclass] += teP1
104 |                 else:
105 |                     larclasPred[larclass] = teP1
106 |             else:
107 |                 totalBias += bias2
108 |                 bias2 = math.sqrt(bias2/testSize)
109 |                 print "(Midclass %d select kNN, accuracy: %f)" % (midclass, bias2)
110 |                 modelChoose.append(2)
111 |                 if (larclass in larclasPred):
112 |                     larclasPred[larclass] += teP2
113 |                 else:
114 |                     larclasPred[larclass] = teP2
115 |                     
116 |     # large class
117 |     while (True):
118 |         larclass, trD, trL, _, teL = loader.getNextLarClass()   
119 |         if (larclass == 0):
120 |             break
121 |         else:
122 |             # sarima model
123 |             try:
124 |                 model = ap.sarimaTrain(larclass, trL, teL)
125 |                 teP1 = ap.sarimaPredict(model, testSize)
126 |             except:
127 |                 teP1 = zeros(testSize)
128 |             
129 |             # knn model
130 |             try:
131 |                 teP2 = KNN_interface.knn(trL, testSize)
132 |             except:
133 |                 print("Warning: kNN train fail")
134 |                 teP2 = zeros(testSize)
135 |             
136 |             # sum of midclasses
137 |             teP3 = larclasPred[larclass]
138 | 
139 |             # count bias of midclass and update larclass
140 |             label = array(teL)
141 |             totalCount += testSize
142 |   
143 |             bias1 = sum((teP1-label)*(teP1-label))
144 |             bias2 = sum((teP2-label)*(teP2-label))
145 |             bias3 = sum((teP3-label)*(teP3-label))
146 |             if (bias3 <= bias1 and bias3 <= bias2):
147 |                 totalBias += bias3
148 |                 bias3 = math.sqrt(bias3/testSize)
149 |                 print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3)
150 |                 lcModelChoose.append(3)
151 |             elif (bias1 <= bias2):
152 |                 totalBias += bias1
153 |                 bias1 = math.sqrt(bias1/testSize)
154 |                 print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1)
155 |                 lcModelChoose.append(1)
156 |             else:
157 |                 totalBias += bias2
158 |                 bias2 = math.sqrt(bias2/testSize)
159 |                 print "(Larclass %d select kNN, accuracy: %f)" % (larclass, bias2)
160 |                 lcModelChoose.append(2)
161 | 
162 |     totalBias = math.sqrt(totalBias/totalCount)
163 |     print "(Predict finished, accuracy: %f)" % (totalBias)  
164 |     loader.closeFiles()
165 |     
166 | def submit(trainSize): 
167 |     global larclasPred, ap
168 |     larclasPred = {}
169 | 
170 |     f1 = open("submit.csv", "r")
171 |     submit_csv = csv.reader(f1)
172 |     submit_csv.next()
173 |     f2 = open('submit1.csv', 'wb')
174 |     writer = csv.writer(f2)
175 |     
176 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
177 |     loader.setSize(trainSize)
178 |     
179 |     # middle class
180 |     current = 0    
181 |     while (True):
182 |         midclass, trD, trL, teD, teL = loader.getNextMidClass()
183 |         if (midclass == 0):
184 |             break
185 |         else:
186 |             if (modelChoose[current] == 1):
187 |                 try:
188 |                     model = ap.sarimaTrain(midclass, trL)
189 |                     teP = ap.sarimaPredict(model, 30)
190 |                 except:
191 |                     print("%d: failed to use arima, use kNN instead" % midclass)
192 |                     teP = KNN_interface.knn(trL, 30)
193 |             elif (modelChoose[current] == 2):
194 |                 teP = KNN_interface.knn(trL, 30)
195 |             else:
196 |                 teP = zeros(30)
197 |             current += 1
198 |             
199 |             for x in teP:
200 |                 x_int = round(x)
201 |                 row = submit_csv.next()
202 |                 if (int(row[0]) != midclass):
203 |                     raise KeyError
204 |                 writer.writerow([row[0], row[1], x_int])
205 |             
206 |             # count larclass
207 |             larclass = int(midclass/100)
208 |             if (larclass in larclasPred):
209 |                 larclasPred[larclass] += teP
210 |             else:
211 |                 larclasPred[larclass] = teP  
212 |     
213 |     # large class
214 |     current = 0 
215 |     while (True):
216 |         larclass, trD, trL, teD, teL = loader.getNextLarClass()
217 |         if (larclass == 0):
218 |             break
219 |         else:
220 |             if (lcModelChoose[current] == 1):
221 |                 try:
222 |                     model = ap.sarimaTrain(larclass, trL)
223 |                     teP = ap.sarimaPredict(model, 30)
224 |                 except:
225 |                     print("%d: failed to use arima, use kNN instead" % larclass)
226 |                     teP = KNN_interface.knn(trL, 30)
227 |             elif (lcModelChoose[current] == 2):
228 |                 teP = KNN_interface.knn(trL, 30)
229 |             else:
230 |                 teP = larclasPred[larclass]
231 |             current += 1
232 | 
233 |             # write file - larclass
234 |             for x in teP:
235 |                 x_int = round(x)
236 |                 row = submit_csv.next()
237 |                 if (int(row[0]) != larclass):
238 |                     raise KeyError
239 |                 writer.writerow([row[0], row[1], x_int])
240 | 
241 |     f1.close()
242 |     f2.close()
243 |     loader.closeFiles()
244 |            
245 | modelselect(75, 30, 15)
246 | """
247 | with open("report.txt", "w") as f:
248 |     for clas in arimaParaChoose:
249 |         f.writelines("class %d: (%d,%d)\n" % (clas, arimaParaChoose[clas][0], arimaParaChoose[clas][1]))
250 | """
251 | submit(120)


--------------------------------------------------------------------------------
/arima_knn/modify_submit.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | # 读取原预测文件，预测结果取整再写回去
 5 | def get_round():
 6 |     rows = []
 7 |     with open('submit.csv') as input_file:
 8 |         input_csv = csv.reader(input_file)
 9 |         rows.append(next(input_csv))
10 |         for row in input_csv:
11 |             row[2] = str(int(round(float(row[2]))))
12 |             rows.append(row)
13 |     with open('submit.csv', 'w', newline='') as output_file:
14 |         output_csv = csv.writer(output_file)
15 |         for row in rows:
16 |             output_csv.writerow(row)
17 | 
18 | 
19 | # 将预测文件中编码为code的类别预测值用pred替换
20 | def change_pred(code, pred):
21 |     rows = []
22 |     file_name = 'submit_WJ_2.csv'
23 |     with open(file_name) as input_file:
24 |         input_csv = csv.reader(input_file)
25 |         rows.append(next(input_csv))
26 |         i = 0
27 |         for row in input_csv:
28 |             if row[0] == code:
29 |                 rows.append([code, row[1], str(pred[i])])
30 |                 i += 1
31 |             else:
32 |                 rows.append(row)
33 |     with open(file_name, 'w', newline='') as output_file:
34 |         output_csv = csv.writer(output_file)
35 |         for row in rows:
36 |             output_csv.writerow(row)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     get_round()
41 | 


--------------------------------------------------------------------------------
/arima_xgboost/arima_xgboost.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import xgboost as xgb
  9 | import arimaPredicter
 10 | import dataLoader
 11 | 
 12 | from numpy import array
 13 | from numpy import zeros
 14 | import csv
 15 | import math
 16 | import datetime as dt
 17 | 
 18 | ap = arimaPredicter.predicter()
 19 | ap.createIndex(dt.datetime(2015,1,1), 243)
 20 | 
 21 | modelChoose = {}
 22 | 
 23 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
 24 |     with open('compare.csv', 'ab') as f:
 25 |         writer = csv.writer(f)
 26 |         count = 1
 27 |         writer.writerow([midclass, accuracy])
 28 |         for x in trainLabl:
 29 |             writer.writerow([count, x])
 30 |             count += 1
 31 |         for x in range(0, len(testPred)):
 32 |             writer.writerow([count, testLabl[x], testPred[x]])
 33 |             count += 1
 34 | 
 35 | def xgboostPredict(trainData, trainLabel, dataToPredict, 
 36 |                    params = {"objective":"reg:linear", "max_depth":1, "gamma":2}):
 37 |     dtrain = xgb.DMatrix(trainData, trainLabel)
 38 |     gbm = xgb.train(dtrain=dtrain, params=params)
 39 |     return gbm.predict(xgb.DMatrix(dataToPredict))
 40 | 
 41 | def simulateFeature(trainData, musk):
 42 |     for feature in trainData:
 43 |         for i in musk:
 44 |             feature[i] = 0
 45 |             
 46 | def createFeature(date_from, length, zeros, DictHoilday, DictBeforeHoilday, 
 47 |                 DictWorkday):
 48 |     delta = dt.timedelta(days=1)
 49 |     now = date_from
 50 |     index = []
 51 |     for i in range(0, length):
 52 |         index.append(now)
 53 |         now = now + delta
 54 |     feature = []
 55 |     empty = [0 for x in range(0, zeros+4)]
 56 |     for i in range(0, length):
 57 |         x = empty[:]
 58 |         x[0] = index[i].day
 59 |         x[1] = (index[i].weekday() + 1) % 7
 60 |         dayCount = i + 1
 61 |         if (dayCount in DictHoilday):
 62 |             x[3] = 1
 63 |         elif (dayCount in DictBeforeHoilday):
 64 |             x[2] = 1
 65 |         elif (dayCount in DictWorkday):
 66 |             if (x[1]==6 or ((dayCount+1) in DictHoilday)):
 67 |                 x[2] = 1
 68 |         elif (x[1]==0 or x[1]==6):
 69 |             x[3] = 1
 70 |         elif (x[1]==5):
 71 |             x[2] = 1
 72 |         feature.append(x)
 73 |     return feature     
 74 |             
 75 | def setModel(clas, model):
 76 |     global modelChoose
 77 |     if (clas not in modelChoose):
 78 |         modelChoose[clas] = model
 79 |     elif (model < modelChoose[clas]):
 80 |         modelChoose[clas] = model   
 81 |     
 82 | def modelselect(trainSize, testSize, skipSize = 0):
 83 |     larclasPred = {}
 84 |     totalBias = 0
 85 |     totalCount = 0
 86 |     
 87 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
 88 |     loader.setSize(trainSize, testSize, skipSize)
 89 |         
 90 |     # middle class
 91 |     while (True):
 92 |         midclass, trD, trL, teD, teL = loader.getNextMidClass() 
 93 |         if (midclass == 0):
 94 |             break
 95 |         else:
 96 | 
 97 |             # sarima model
 98 |             try:
 99 |                 model = ap.sarimaTrain(midclass, trL, teL)
100 |                 teP1 = ap.sarimaPredict(model, testSize)
101 |             except:
102 |                 teP1 = zeros(testSize)
103 |             
104 |             # xgboost model
105 |             simulateFeature(teD, [-2, -1])
106 |             try:
107 |                 teP2 = xgboostPredict(array(trD), array(trL), array(teD))
108 |             except:
109 |                 teP2 = zeros(testSize)
110 |             
111 |             # just zero
112 |             teP3 = zeros(testSize)
113 | 
114 |             # count bias of midclass and update larclass
115 |             label = array(teL)
116 |             larclass = int(midclass/100)
117 |             totalCount += testSize
118 |   
119 |             bias1 = sum((teP1-label)*(teP1-label))
120 |             bias2 = sum((teP2-label)*(teP2-label))
121 |             bias3 = sum((teP3-label)*(teP3-label))
122 |             if (bias3 <= bias1 and bias3 <= bias2):
123 |                 totalBias += bias3
124 |                 bias3 = math.sqrt(bias3/testSize)
125 |                 print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3)
126 |                 setModel(midclass, 3)
127 |                 if (larclass in larclasPred):
128 |                     larclasPred[larclass] += teP3
129 |                 else:
130 |                     larclasPred[larclass] = teP3
131 |             elif (bias1 <= bias2):
132 |                 totalBias += bias1
133 |                 bias1 = math.sqrt(bias1/testSize)
134 |                 print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1)
135 |                 setModel(midclass, 1)
136 |                 if (larclass in larclasPred):
137 |                     larclasPred[larclass] += teP1
138 |                 else:
139 |                     larclasPred[larclass] = teP1
140 |             else:
141 |                 totalBias += bias2
142 |                 bias2 = math.sqrt(bias2/testSize)
143 |                 print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2)
144 |                 setModel(midclass, 2)
145 |                 if (larclass in larclasPred):
146 |                     larclasPred[larclass] += teP2
147 |                 else:
148 |                     larclasPred[larclass] = teP2
149 |                     
150 |     # large class
151 |     while (True):
152 |         larclass, trD, trL, teD, teL = loader.getNextLarClass()  
153 |         if (larclass == 0):
154 |             break
155 |         else:
156 | 
157 |             # sarima model
158 |             try:
159 |                 model = ap.sarimaTrain(larclass, trL, teL)
160 |                 teP1 = ap.sarimaPredict(model, testSize)
161 |             except:
162 |                 teP1 = zeros(testSize)
163 |             
164 |             # xgboost model
165 |             simulateFeature(teD, [-2, -1])
166 |             try:
167 |                 teP2 = xgboostPredict(array(trD), array(trL), array(teD))
168 |             except:
169 |                 teP2 = zeros(testSize)
170 |             
171 |             # sum of midclasses
172 |             try:
173 |                 teP3 = larclasPred[larclass]
174 |             except:
175 |                 teP3 = zeros(testSize)
176 | 
177 |             # count bias of midclass and update larclass
178 |             label = array(teL)
179 |             totalCount += testSize
180 |   
181 |             bias1 = sum((teP1-label)*(teP1-label))
182 |             bias2 = sum((teP2-label)*(teP2-label))
183 |             bias3 = sum((teP3-label)*(teP3-label))
184 |             if (bias3 <= bias1 and bias3 <= bias2):
185 |                 totalBias += bias3
186 |                 bias3 = math.sqrt(bias3/testSize)
187 |                 print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3)
188 |                 setModel(larclass, 3)
189 |             elif (bias1 <= bias2):
190 |                 totalBias += bias1
191 |                 bias1 = math.sqrt(bias1/testSize)
192 |                 print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1)
193 |                 setModel(larclass, 1)
194 |             else:
195 |                 totalBias += bias2
196 |                 bias2 = math.sqrt(bias2/testSize)
197 |                 print "(Larclass %d select XGBOOST, accuracy: %f)" % (larclass, bias2)
198 |                 setModel(larclass, 2)
199 | 
200 |     totalBias = math.sqrt(totalBias/totalCount)
201 |     print "(Predict finished, accuracy: %f)" % (totalBias)        
202 |     loader.closeFiles()
203 |     
204 | def submit(trainSize): 
205 |     global larclasPred
206 |     larclasPred = {}
207 |     f1 = open("example.csv", "r")
208 |     submit_csv = csv.reader(f1)
209 |     row = submit_csv.next()
210 |     f2 = open('submit.csv', 'wb')
211 |     writer = csv.writer(f2)
212 |     writer.writerow(row)
213 |     
214 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
215 |     loader.setSize(trainSize)
216 |     
217 |     preDate = range(0, 9) + range(10, 59)
218 |     
219 |     # middle class
220 |     goal = createFeature(dt.datetime(2015,9,1), 59, 2,
221 |                          range(31, 38), [30], [39, 40])
222 | 
223 |     while (True):
224 |         midclass, trD, trL, teD, teL = loader.getNextMidClass()
225 |         if (midclass == 0):
226 |             break
227 |         else:
228 |             if (modelChoose[midclass] == 1):
229 |                 try:
230 |                     model = ap.sarimaTrain(midclass, trL)
231 |                     teP = ap.sarimaPredict(model, 59)
232 |                 except:
233 |                     print("%d: failed to use arima, use xgboost instead" % midclass)
234 |                     teP = xgboostPredict(array(trD), array(trL), array(goal))
235 |             elif (modelChoose[midclass] == 2):
236 |                 teP = xgboostPredict(array(trD), array(trL), array(goal))
237 |             else:
238 |                 teP = zeros(59)
239 |             
240 |             for i in preDate:
241 |                 x_int = round(teP[i])
242 |                 if (x_int < 0):
243 |                     x_int = 0
244 |                 row = submit_csv.next()
245 |                 if (int(row[0]) != midclass):
246 |                     raise KeyError
247 |                 writer.writerow([row[0], row[1], x_int])
248 |             
249 |             # count larclass
250 |             larclass = int(midclass/100)
251 |             if (larclass in larclasPred):
252 |                 larclasPred[larclass] += teP
253 |             else:
254 |                 larclasPred[larclass] = teP  
255 |     
256 |     # large class
257 |     goal = createFeature(dt.datetime(2015,9,1), 59, 1,
258 |                          range(31, 38), [30], [39, 40])
259 | 
260 |     while (True):
261 |         larclass, trD, trL, teD, teL = loader.getNextLarClass()
262 |         if (larclass == 0):
263 |             break
264 |         else:
265 |             if (modelChoose[larclass] == 1):
266 |                 try:
267 |                     model = ap.sarimaTrain(larclass, trL)
268 |                     teP = ap.sarimaPredict(model, 59)
269 |                 except:
270 |                     print("%d: failed to use arima, use xgboost instead" % larclass)
271 |                     teP = xgboostPredict(array(trD), array(trL), array(goal))
272 |             elif (modelChoose[larclass] == 2):
273 |                 teP = xgboostPredict(array(trD), array(trL), array(goal))
274 |             else:
275 |                 try:
276 |                     teP = larclasPred[larclass]
277 |                 except:
278 |                     teP = zeros(59)
279 | 
280 |             # write file - midclass
281 |             for i in preDate:
282 |                 x_int = round(teP[i])
283 |                 if (x_int < 0):
284 |                     x_int = 0
285 |                 row = submit_csv.next()
286 |                 if (int(row[0]) != larclass):
287 |                     raise KeyError
288 |                 writer.writerow([row[0], row[1], x_int])
289 | 
290 |     f1.close()
291 |     f2.close()
292 |     loader.closeFiles()
293 |            
294 | modelselect(200, 43, 0)
295 | para = ap.getPara()
296 | submit(243)


--------------------------------------------------------------------------------
/arima_xgboost/arima_xgboost_multi.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import arimaPredicter
  9 | import dataLoader
 10 | import xgboostPredicter
 11 | 
 12 | from numpy import array
 13 | from numpy import zeros
 14 | import csv
 15 | import math
 16 | import datetime as dt
 17 | 
 18 | aps = []
 19 | for i in range(0, 3):
 20 |     ap = arimaPredicter.predicter()
 21 |     ap.createIndex(dt.datetime(2015,1,1), 243)
 22 |     aps.append(ap)
 23 | 
 24 | xgp = xgboostPredicter.predicter()
 25 | 
 26 | modelChoose = {}
 27 | 
 28 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
 29 |     with open('compare.csv', 'ab') as f:
 30 |         writer = csv.writer(f)
 31 |         count = 1
 32 |         writer.writerow([midclass, accuracy])
 33 |         for x in trainLabl:
 34 |             writer.writerow([count, x])
 35 |             count += 1
 36 |         for x in range(0, len(testPred)):
 37 |             writer.writerow([count, testLabl[x], testPred[x]])
 38 |             count += 1  
 39 |             
 40 | def setModel(clas, model):
 41 |     global modelChoose
 42 |     if (clas not in modelChoose):
 43 |         modelChoose[clas] = [model]
 44 |     elif (model < modelChoose[clas]):
 45 |         modelChoose[clas].append(model)
 46 |         
 47 | def trainAndCompare(ap, clas, trD, trL, teD, teL, teP3):
 48 |     testSize = len(teL)
 49 |     # sarima model
 50 |     try:
 51 |         (_, teP1) = ap.sarimaParaSelect(clas, trL, teL)
 52 |     except:
 53 |         teP1 = zeros(testSize)
 54 |             
 55 |     # xgboost model
 56 |     xgp.simulateFeature(teD, [-2, -1])
 57 |     try:
 58 |         model = xgp.xgboostTrain(trD, trL)
 59 |         teP2 = xgp.xgboostPredict(model, teD)
 60 |     except:
 61 |         teP2 = zeros(testSize)
 62 |         
 63 |     label = array(teL)
 64 |     bias1 = sum((teP1-label)*(teP1-label))
 65 |     bias2 = sum((teP2-label)*(teP2-label))
 66 |     bias3 = sum((teP3-label)*(teP3-label))
 67 |     if (bias3 <= bias1 and bias3 <= bias2):
 68 |         return (3, bias3, teP3)
 69 |     elif (bias1 <= bias2):
 70 |         return (1, bias1, teP1)
 71 |     else:
 72 |         return (2, bias2, teP2)
 73 |     
 74 | def modelselect(ap, trainSize, testSize, skipSize = 0):
 75 |     larclasPred = {}
 76 |     totalBias = 0
 77 |     totalCount = 0
 78 |     
 79 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
 80 |     loader.setSize(trainSize, testSize, skipSize)
 81 |         
 82 |     # middle class
 83 |     while (True):
 84 |         midclass, trD, trL, teD, teL = loader.getNextMidClass() 
 85 |         if (midclass == 0):
 86 |             break
 87 |         else:
 88 |             (model, bias, teP) = trainAndCompare(ap, midclass, trD, trL, teD, teL, zeros(testSize))
 89 | 
 90 |             larclass = int(midclass/100)
 91 |             totalCount += testSize
 92 |             totalBias += bias
 93 |             bias = math.sqrt(bias/testSize)
 94 |             print("(Midclass %d select model %d, accuracy: %f)" % (midclass, model, bias))
 95 |             setModel(midclass, model)
 96 |             if (larclass in larclasPred):
 97 |                 larclasPred[larclass] += teP
 98 |             else:
 99 |                 larclasPred[larclass] = teP
100 |                     
101 |     # large class
102 |     while (True):
103 |         larclass, trD, trL, teD, teL = loader.getNextLarClass()  
104 |         if (larclass == 0):
105 |             break
106 |         else:
107 |             if (larclass in larclasPred):
108 |                 (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, larclasPred[larclass])
109 |             else:
110 |                 (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, zeros(testSize))
111 | 
112 |             totalCount += testSize            
113 |             totalBias += bias
114 |             bias = math.sqrt(bias/testSize)
115 |             print("(Larclass %d select model %d, accuracy: %f)" % (larclass, model, bias))
116 |             setModel(larclass, model)
117 | 
118 |     totalBias = math.sqrt(totalBias/totalCount)
119 |     print("(Predict finished, accuracy: %f)" % (totalBias))       
120 |     loader.closeFiles()
121 |     
122 | def writeClass(clas, result, dates, checker, writer):
123 |     for i in dates:
124 |         x_int = round(result[i])
125 |         if (x_int < 0):
126 |             x_int = 0
127 |         row = checker.next()
128 |         if (int(row[0]) != clas):
129 |             raise KeyError
130 |         writer.writerow([row[0], row[1], x_int])
131 |         
132 | def predictClass(clas, cvSize, trD, trL, teD, teP3):
133 |     teP = zeros(59)
134 |     count = cvSize
135 |     for i in range(0, cvSize):
136 |         if (modelChoose[clas][i] == 1):
137 |             try:
138 |                 model = aps[i].sarimaTrain(trL, clas)
139 |                 teP += aps[i].sarimaPredict(model, 59)
140 |             except:
141 |                 print("%d: failed to use arima" % clas)
142 |                 count -= 1
143 |         elif (modelChoose[clas][i] == 2):
144 |             model = xgp.xgboostTrain(trD, trL)
145 |             teP += xgp.xgboostPredict(model, teD)
146 |         else:
147 |             teP += teP3
148 |                     
149 |     if (count == 0):
150 |         print("%d: failed to use arima at all, only use xgboost" % clas)
151 |         model = xgp.xgboostTrain(trD, trL)
152 |         teP = xgp.xgboostPredict(model, teD)
153 |     else:
154 |         teP = teP / count
155 |     return teP
156 |     
157 |     
158 | def submit(trainSize, cvSize): 
159 |     larclasPred = {}
160 |     f1 = open("example.csv", "r")
161 |     submit_csv = csv.reader(f1)
162 |     row = submit_csv.next()
163 |     f2 = open('submit.csv', 'wb')
164 |     writer = csv.writer(f2)
165 |     writer.writerow(row)
166 |     
167 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
168 |     loader.setSize(trainSize)
169 |     
170 |     preDate = range(0, 9) + range(10, 59)
171 |     
172 |     # middle class
173 |     goal = xgp.createFeature(dt.datetime(2015,9,1), 59, 2,
174 |                          range(31, 38), [30], [39, 40])
175 | 
176 |     while (True):
177 |         midclass, trD, trL, teD, teL = loader.getNextMidClass()
178 |         if (midclass == 0):
179 |             break
180 |         else:
181 |             teP = predictClass(midclass, cvSize, trD, trL, goal, zeros(59))
182 |             writeClass(midclass, teP, preDate, submit_csv, writer)
183 |             
184 |             # count larclass
185 |             larclass = int(midclass/100)
186 |             if (larclass in larclasPred):
187 |                 larclasPred[larclass] += teP
188 |             else:
189 |                 larclasPred[larclass] = teP  
190 |     
191 |     # large class
192 |     goal = xgp.createFeature(dt.datetime(2015,9,1), 59, 1,
193 |                          range(31, 38), [30], [39, 40])
194 | 
195 |     while (True):
196 |         larclass, trD, trL, teD, teL = loader.getNextLarClass()
197 |         if (larclass == 0):
198 |             break
199 |         else:           
200 |             if (larclass in larclasPred):
201 |                 teP = predictClass(larclass, cvSize, trD, trL, goal, larclasPred[larclass])
202 |             else:
203 |                 teP = predictClass(larclass, cvSize, trD, trL, goal, zeros(59))
204 |             writeClass(larclass, teP, preDate, submit_csv, writer)
205 | 
206 |     f1.close()
207 |     f2.close()
208 |     loader.closeFiles()
209 |            
210 | modelselect(aps[0], 210, 28, 5)
211 | modelselect(aps[1], 180, 28, 35)
212 | modelselect(aps[2], 150, 28, 65)
213 | submit(243, 3)


--------------------------------------------------------------------------------
/arima_xgboost/statanalysis.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import datetime as dt
  9 | import pandas as pd
 10 | import numpy as np
 11 | 
 12 | import csv
 13 | import math
 14 | import arimaPredicter
 15 | 
 16 | temp = []
 17 | 
 18 | index = [dt.datetime(2015,1,x) for x in range(1, 32)]
 19 | index = index + [dt.datetime(2015,2,x) for x in (range(1 ,29))]
 20 | index = index + [dt.datetime(2015,3,x) for x in range(1, 32)]
 21 | index = index + [dt.datetime(2015,4,x) for x in range(1, 31)]
 22 | 
 23 | def getData(csvReader, trainCount, testCount):
 24 |     trainData = []
 25 |     testData = []
 26 |     trainLabel = []
 27 |     testLabel = []
 28 |     try:
 29 |         for x in range(0, trainCount):
 30 |             row = next(csvReader)
 31 |             """
 32 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 33 |                     float(row[7]), float(row[8]), float(row[9]), float(row[10]),
 34 |                     float(row[11]), float(row[12])]
 35 |             """
 36 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 37 |                     float(row[7]), float(row[8])]
 38 |             trainData.append(data)
 39 |             trainLabel.append(float(row[15]))
 40 |         for x in range(0, testCount):
 41 |             row = next(csvReader)
 42 |             """
 43 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 44 |                     float(row[7]), float(row[8]), float(row[9]), float(row[10]),
 45 |                     float(row[11]), float(row[12])]
 46 |             """
 47 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 48 |                     float(row[7]), float(row[8])]
 49 |             testData.append(data)
 50 |             testLabel.append(float(row[15]))
 51 |         return int(row[0]), trainData, trainLabel, testData, testLabel
 52 |     except StopIteration:
 53 |         return 0, [], [], [], []
 54 |     
 55 | def getLCData(csvReader, trainCount, testCount):
 56 |     trainData = []
 57 |     testData = []
 58 |     trainLabel = []
 59 |     testLabel = []
 60 |     try:
 61 |         for x in range(0, trainCount):
 62 |             row = next(csvReader)
 63 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 64 |                     float(row[7])]
 65 |             trainData.append(data)
 66 |             trainLabel.append(float(row[14]))
 67 |         for x in range(0, testCount):
 68 |             row = next(csvReader)
 69 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 70 |                     float(row[7])]
 71 |             testData.append(data)
 72 |             testLabel.append(float(row[14]))
 73 |         return int(row[0]), trainData, trainLabel, testData, testLabel
 74 |     except StopIteration:
 75 |         return 0, [], [], [], []
 76 |     
 77 | def getBias(label, pred):
 78 |     a1 = np.array(label)
 79 |     a2 = np.array(pred)
 80 |     if (a1.__len__() != a2.__len__()):
 81 |         raise ValueError("length not equel")
 82 |     m = a1 - a2
 83 |     return math.sqrt(sum(m*m)/a1.__len__()) 
 84 |     
 85 | def sariamTest():
 86 |     f = open("datam.csv", "r")
 87 |     f_csv = csv.reader(f)
 88 |     
 89 |     # writer = open("report.txt", "w")
 90 |     
 91 |     ap = arimaPredicter.predicter();
 92 |     ap.setIndex(index)
 93 |     
 94 |     for i in range(0, 10):
 95 |         midclass, trD, trL, teD, teL = getData(f_csv, 120, 0)    
 96 |         if (midclass == 0):
 97 |             break
 98 |         
 99 |         trainData = trL[:99]
100 |         testData = trL[99:]
101 |         
102 |         ap.test_stationarity(trL)
103 |     
104 |         greatfit = (0, 0, 0)
105 |         minaic = 99999
106 |         
107 |         for p in range(0, 3):
108 |             for q in range(0, 3):
109 |                 try:
110 |                     ap.setPara(midclass, (p, q))
111 |                     model = ap.sarimaTrain(midclass, trainData)
112 |                     if (model.aic < minaic):
113 |                         minaic = model.aic
114 |                         greatfit = (p, 1, q)
115 |                     result = ap.sarimaPredict(model, len(testData))
116 |                     print("(%d,%d) %f %f\n" % (p, q, model.aic, getBias(testData, result)))
117 |                     
118 |                 except:
119 |                     pass
120 |         
121 |         print("midclass %d: %d %d\n" % (midclass, greatfit[0], greatfit[2]))       
122 |     
123 |     f.close()
124 |     #writer.close()
125 | """    
126 | def test_Ljung_Box(timeseries, l):
127 |     acf, q, p = sm.tsa.acf(timeseries, nlags=l, qstat=True)
128 |     out = np.c_[range(1, l+1), acf[1:], q, p]
129 |     output=pd.DataFrame(out, columns=['lag', "AC", "Q", "P-value"])
130 |     output = output.set_index('lag')
131 |     print output
132 |     
133 | import arch    
134 |     
135 | def sariamGarchTest():
136 |     global larclasPred, larclasLabl, totalBias, totalCount, temp
137 |     f = open("datam.csv", "r")
138 |     f_csv = csv.reader(f)
139 |     
140 |     for i in range(0, 1):
141 |         midclass, trD, trL, teD, teL = getData(f_csv, 120, 0)    
142 |         if (midclass == 0):
143 |             break
144 |         # print trL  
145 |         data0 = pd.Series(trL) 
146 |         data0.index = pd.Index(index)
147 |         
148 |         trainData = data0[:dt.datetime(2015,4,9)]
149 |         testData = data0[dt.datetime(2015,4,10):]
150 |     
151 |         model = SARIMAX(trainData, order=(1,1,1), seasonal_order=(0,1,1,7)) 
152 |         result = model.fit() 
153 |         
154 |         at = trainData - result.fittedvalues
155 |         #plt.plot(at, color='red')  
156 |         #plt.show(block=False)    
157 |         
158 |         at2 = np.square(at)
159 |         plt.plot(at2, color='red')  
160 |         plt.show(block=False)  
161 |         #test_Ljung_Box(at2, 10)
162 |         
163 |         amodel = arch.arch_model(at2) 
164 |         aresult = amodel.fit(disp='off')
165 |         aresult.summary()
166 |         temp.append(aresult)
167 |         output1 = result.forecast(trL.__len__()-trainData.__len__())
168 |         forecasts = aresult.forecast(horizon=5, start=dt.datetime(2015,4,9))
169 |         print forecasts.mean[dt.datetime(2015,4,9):]
170 |         print forecasts.variance[dt.datetime(2015,4,9):]
171 |     f.close()
172 | """    
173 | sariamTest()


--------------------------------------------------------------------------------
/data/train.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/data/train.csv


--------------------------------------------------------------------------------
/dataLoader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import csv
  9 | 
 10 | class loader():
 11 |     def __init__(self, midClassFile = "", LarClassFile = ""):
 12 |         if (midClassFile != ""):
 13 |             self.mid_f = open(midClassFile, "r")
 14 |             self.mid_f_csv = csv.reader(self.mid_f)
 15 |         else:
 16 |             self.mid_f = None
 17 |             self.mid_f_csv = None
 18 |         if (LarClassFile != ""):
 19 |             self.lar_f = open(LarClassFile, "r")
 20 |             self.lar_f_csv = csv.reader(self.lar_f)
 21 |         else:
 22 |             self.lar_f = None
 23 |             self.lar_f_csv = None
 24 |         self.trainCount = 120
 25 |         self.testCount = 0
 26 |         self.skipCount = 0
 27 |         self.midClassFeature = range(3, 9)
 28 |         self.midSuffix = []
 29 |         self.larClassFeature = range(3, 8)
 30 |         self.larSuffix = []
 31 |             
 32 |         
 33 |     def setFile(self, midClassFile = "", LarClassFile = ""):
 34 |         if (midClassFile != ""):
 35 |             try:
 36 |                 self.mid_f.close()
 37 |             except:
 38 |                 pass
 39 |             self.mid_f = open(midClassFile, "r")
 40 |             self.mid_f_csv = csv.reader(self.mid_f)
 41 |         if (LarClassFile != ""):
 42 |             try:
 43 |                 self.lar_f.close()
 44 |             except:
 45 |                 pass
 46 |             self.lar_f = open(LarClassFile, "r")
 47 |             self.lar_f_csv = csv.reader(self.lar_f)
 48 |             
 49 |     def closeFiles(self):
 50 |         try:
 51 |             self.mid_f.close()
 52 |         except:
 53 |             pass
 54 |         try:
 55 |             self.lar_f.close()
 56 |         except:
 57 |             pass
 58 | 
 59 |     def setSize(self, train, test = 0, skip = 0):
 60 |         self.trainCount = train
 61 |         self.testCount = test
 62 |         self.skipCount = skip
 63 |         
 64 |     def setMidClassFeature(self, feature=[], suffix=[]):
 65 |         self.midClassFeature = feature
 66 |         self.midSuffix = suffix
 67 |         
 68 |     def setLarClassFeature(self, feature=[], suffix=[]):
 69 |         self.larClassFeature = feature
 70 |         self.larSuffix = suffix
 71 | 
 72 |     def getNextMidClass(self):
 73 |         trainData = []
 74 |         testData = []
 75 |         trainLabel = []
 76 |         testLabel = []
 77 |         try:
 78 |             for x in range(0, self.trainCount):
 79 |                 row = next(self.mid_f_csv)
 80 |                 data = []
 81 |                 for y in self.midClassFeature:
 82 |                     data.append(float(row[y]))
 83 |                 data = data + self.midSuffix
 84 |                 trainData.append(data)
 85 |                 trainLabel.append(float(row[-1]))
 86 |                 
 87 |             for x in range(0, self.testCount):
 88 |                 row = next(self.mid_f_csv)
 89 |                 data = []
 90 |                 for y in self.midClassFeature:
 91 |                     data.append(float(row[y]))
 92 |                 data = data + self.midSuffix
 93 |                 testData.append(data)
 94 |                 testLabel.append(float(row[-1]))
 95 |                 
 96 |             for x in range(0, self.skipCount):  
 97 |                 next(self.mid_f_csv)
 98 |             return int(row[0]), trainData, trainLabel, testData, testLabel
 99 |         except StopIteration:
100 |             return 0, [], [], [], []
101 |         
102 |     def getNextLarClass(self):
103 |         trainData = []
104 |         testData = []
105 |         trainLabel = []
106 |         testLabel = []
107 |         try:
108 |             for x in range(0, self.trainCount):
109 |                 row = next(self.lar_f_csv)
110 |                 data = []
111 |                 for y in self.larClassFeature:
112 |                     data.append(float(row[y]))
113 |                 data = data + self.larSuffix
114 |                 trainData.append(data)
115 |                 trainLabel.append(float(row[-1]))
116 |                 
117 |             for x in range(0, self.testCount):
118 |                 row = next(self.lar_f_csv)
119 |                 data = []
120 |                 for y in self.larClassFeature:
121 |                     data.append(float(row[y]))
122 |                 data = data + self.larSuffix
123 |                 testData.append(data)
124 |                 testLabel.append(float(row[-1]))
125 |                 
126 |             for x in range(0, self.skipCount):  
127 |                 next(self.lar_f_csv)
128 |             return int(row[0]), trainData, trainLabel, testData, testLabel
129 |         except StopIteration:
130 |             return 0, [], [], [], []


--------------------------------------------------------------------------------
/doc/compare.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/compare.docx


--------------------------------------------------------------------------------
/doc/manual.py:
--------------------------------------------------------------------------------
  1 | ﻿# -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Nov 28 15:14:17 2017
  4 | 
  5 | @author: wangjun
  6 | """
  7 | 
  8 | # xgboostPredicter, arimaPredicter快速上手指南
  9 | # 最后更新 20171128
 10 | 
 11 | import arimaPredicter
 12 | import dataLoader
 13 | import xgboostPredicter
 14 | 
 15 | import matplotlib.pylab as plt
 16 | import datetime as dt
 17 | 
 18 | #读取训练数据，用其他方式读取也可以
 19 | loader = dataLoader.loader("datam.csv")
 20 | loader.setSize(200, 43, 0)
 21 | midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass()
 22 | 
 23 | plt.plot(trainLabel)
 24 | plt.title('Train Label')
 25 | plt.show(block=False)
 26 | 
 27 | def arimaPredict():
 28 |     # 首先创建类实例
 29 |     ap = arimaPredicter.predicter()
 30 |     # 设置索引，函数的第一个参数是训练数据开始的日期，第二次参数是索引的长度，索引
 31 |     # 长度不小于训练数据的长度即可
 32 |     ap.createIndex(dt.datetime(2015,1,1), 243)
 33 |     # 可以直接调用sarimaTrain函数训练arima模型，只需将训练标签输入即可
 34 |     model = ap.sarimaTrain(trainLabel)
 35 |     # 得到模型后调用sarimaPredict函数便可以预测紧接着训练数据之后若干天的预测值，
 36 |     # 两个参数分别为先前得到的模型与预测序列的长度
 37 |     # 这是一个静态函数，可以直接通过类名调用
 38 |     predictLabel = ap.sarimaPredict(model, 43)
 39 |     # 这样便可以得到结果
 40 |     plt.plot(testLabel, color='blue',label='actual')
 41 |     plt.plot(predictLabel, color='red',label='predict')
 42 |     plt.title('ARIMA(default)')
 43 |     plt.show(block=False)
 44 |     
 45 |     # 事实上，在sarimaTrain函数中，你也可以指定ARIMA模型的两个参数（ar, ma）
 46 |     model = ap.sarimaTrain(trainLabel, para=(2, 2))
 47 |     # 如果参数指定得当，结果将更好，反之更糟糕
 48 |     predictLabel = ap.sarimaPredict(model, 43)
 49 |     plt.plot(testLabel, color='blue',label='actual')
 50 |     plt.plot(predictLabel, color='red',label='predict')
 51 |     plt.title('ARIMA(2, 2)')
 52 |     plt.show(block=False)   
 53 |     
 54 |     # 如果你不知道该指定什么参数，那么可以使用sarimaParaSelect函数选择参数，该函数
 55 |     # 的输入为类别名称，训练集，测试集及决定在参数选择时是否参考AIC的布尔变量
 56 |     # 目前来看，在参数选择时是否参考AIC的结果差不多
 57 |     # 函数执行后将会返回最优的参数以及测试集上的运行结果，同时实例中也会以类别名称为
 58 |     # 键存储这个最优参数
 59 |     para, _ = ap.sarimaParaSelect(1001, trainLabel[:-50], trainLabel[-50:], True)
 60 |     
 61 |     # 由于最优参数已被存储，再次训练是指明类别名称即可
 62 |     model = ap.sarimaTrain(trainLabel, classNo=1001)
 63 |     # 预测的方式始终相同
 64 |     predictLabel = ap.sarimaPredict(model, 43)
 65 |     plt.plot(testLabel, color='blue',label='actual')
 66 |     plt.plot(predictLabel, color='red',label='predict')
 67 |     plt.title('ARIMA(%d, %d)' % (para[0], para[1]))
 68 |     plt.show(block=False)
 69 |     
 70 |     # 需要注意的是，当模型不等收敛时，sarimaTrain函数与sarimaParaSelect函数都有可能
 71 |     # 抛出异常
 72 |     
 73 | def xgboostPredict():
 74 |     # 首先创建类实例
 75 |     xgp = xgboostPredicter.predicter()
 76 | 
 77 |     # 可以直接调用xgboostTrain函数训练xgboost模型，输入为训练集的特征和对应的标签
 78 |     model = xgp.xgboostTrain(trainData, trainLabel)
 79 | 
 80 |     # 得到模型后调用xgboostPredict函数便可以根据测试集的特征得到对应的预测值
 81 |     # 这是一个静态函数，可以直接通过类名调用
 82 |     predictLabel = xgp.xgboostPredict(model, testData)
 83 |     # 这样便可以得到结果
 84 |     plt.plot(testLabel, color='blue',label='actual')
 85 |     plt.plot(predictLabel, color='red',label='predict')
 86 |     plt.title('xgboost(default)')
 87 |     plt.show(block=False)
 88 |     
 89 |     # 在predicter类中，还有两个静态的工具函数：
 90 |     # simulateFeature函数用于将特征向量的某些位清空，如
 91 |     xgp.simulateFeature(testData, [-2, -1])
 92 |     # 可以清空测试集中所有特征向量的后两位（在我的特征定义中对应促销信息），这将使
 93 |     # 在测试集上的结果更加真实
 94 |     predictLabel = xgp.xgboostPredict(model, testData)
 95 |     plt.plot(testLabel, color='blue',label='actual')
 96 |     plt.plot(predictLabel, color='red',label='predict')
 97 |     plt.title('xgboost(default)')
 98 |     plt.show(block=False)  
 99 |     
100 |     # createFeature函数用于创建测试用的特征向量，但只有在你的特征定义与我的一致时
101 |     # 才能使用它
102 |     # 其输入参数为 (开始日期,长度,后缀零数量,节假日列表,节假日前一天列表,工作日列表)
103 |     # 列表均为对应日期的序号，从1开始计数；需要注意的是，周末自动算节假日，周五自动
104 |     # 算节假日前一天，例如
105 |     data = xgp.createFeature(dt.datetime(2015,9,1), 7, 1, [4], [3], [6])
106 |     # 的输出为
107 |     for x in data:
108 |         print(x)
109 |     
110 | arimaPredict()
111 | xgboostPredict()


--------------------------------------------------------------------------------
/doc/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/report.pdf


--------------------------------------------------------------------------------
/doc/report.txt:
--------------------------------------------------------------------------------
  1 | (Midclass 1001 select SARIMA, accuracy: 4.164104)
  2 | (Midclass 1002 select SARIMA, accuracy: 0.530263)
  3 | (Midclass 1004 select SARIMA, accuracy: 2.009439)
  4 | (Midclass 1005 select ZERO, accuracy: 0.377964)
  5 | (Midclass 1099 select SARIMA, accuracy: 0.195678)
  6 | (Midclass 1101 select SARIMA, accuracy: 1.562947)
  7 | (Midclass 1102 select SARIMA, accuracy: 0.875281)
  8 | (Midclass 1103 select SARIMA, accuracy: 0.415788)
  9 | (Midclass 1201 select SARIMA, accuracy: 22.207918)
 10 | (Midclass 1202 select SARIMA, accuracy: 2.736437)
 11 | (Midclass 1203 select SARIMA, accuracy: 11.184439)
 12 | (Midclass 1205 select SARIMA, accuracy: 0.846538)
 13 | (Midclass 1301 select XGBOOST, accuracy: 1.802102)
 14 | (Midclass 1302 select SARIMA, accuracy: 1.113745)
 15 | (Midclass 1306 select XGBOOST, accuracy: 0.668731)
 16 | (Midclass 1308 select SARIMA, accuracy: 3.409499)
 17 | (Midclass 1501 select XGBOOST, accuracy: 1.607822)
 18 | (Midclass 1502 select SARIMA, accuracy: 0.649425)
 19 | (Midclass 1503 select XGBOOST, accuracy: 0.854608)
 20 | (Midclass 1504 select ZERO, accuracy: 0.577350)
 21 | (Midclass 1505 select SARIMA, accuracy: 5.885219)
 22 | (Midclass 1507 select ZERO, accuracy: 0.000000)
 23 | (Midclass 1508 select SARIMA, accuracy: 0.998574)
 24 | (Midclass 1510 select SARIMA, accuracy: 1.002930)
 25 | (Midclass 1511 select SARIMA, accuracy: 0.207669)
 26 | (Midclass 1512 select XGBOOST, accuracy: 0.536687)
 27 | (Midclass 1513 select SARIMA, accuracy: 1.473901)
 28 | (Midclass 1515 select SARIMA, accuracy: 1.343283)
 29 | (Midclass 1516 select XGBOOST, accuracy: 1.640316)
 30 | (Midclass 1517 select XGBOOST, accuracy: 2.080571)
 31 | (Midclass 1518 select SARIMA, accuracy: 5.853074)
 32 | (Midclass 1519 select XGBOOST, accuracy: 1.276925)
 33 | (Midclass 1521 select SARIMA, accuracy: 3.578035)
 34 | (Midclass 2001 select ZERO, accuracy: 2.203893)
 35 | (Midclass 2002 select SARIMA, accuracy: 1.330328)
 36 | (Midclass 2003 select XGBOOST, accuracy: 1.401267)
 37 | (Midclass 2004 select SARIMA, accuracy: 0.204031)
 38 | (Midclass 2005 select SARIMA, accuracy: 1.984291)
 39 | (Midclass 2006 select XGBOOST, accuracy: 3.567574)
 40 | (Midclass 2007 select SARIMA, accuracy: 2.002037)
 41 | (Midclass 2008 select SARIMA, accuracy: 2.831727)
 42 | (Midclass 2009 select SARIMA, accuracy: 2.368810)
 43 | (Midclass 2010 select XGBOOST, accuracy: 1.422014)
 44 | (Midclass 2011 select SARIMA, accuracy: 7.890765)
 45 | (Midclass 2012 select XGBOOST, accuracy: 0.779130)
 46 | (Midclass 2013 select SARIMA, accuracy: 3.032853)
 47 | (Midclass 2014 select SARIMA, accuracy: 4.112845)
 48 | (Midclass 2015 select XGBOOST, accuracy: 1.717115)
 49 | (Midclass 2101 select XGBOOST, accuracy: 0.531832)
 50 | (Midclass 2102 select ZERO, accuracy: 0.377964)
 51 | (Midclass 2103 select SARIMA, accuracy: 0.951623)
 52 | (Midclass 2104 select SARIMA, accuracy: 1.087212)
 53 | (Midclass 2105 select SARIMA, accuracy: 1.209021)
 54 | (Midclass 2106 select XGBOOST, accuracy: 0.528829)
 55 | (Midclass 2107 select ZERO, accuracy: 0.000000)
 56 | (Midclass 2201 select SARIMA, accuracy: 5.789172)
 57 | (Midclass 2202 select SARIMA, accuracy: 7.402780)
 58 | (Midclass 2203 select SARIMA, accuracy: 7.110155)
 59 | (Midclass 2204 select SARIMA, accuracy: 3.052818)
 60 | (Midclass 2205 select SARIMA, accuracy: 3.604804)
 61 | (Midclass 2206 select SARIMA, accuracy: 4.279676)
 62 | (Midclass 2207 select SARIMA, accuracy: 2.793577)
 63 | (Midclass 2208 select SARIMA, accuracy: 1.795416)
 64 | (Midclass 2209 select XGBOOST, accuracy: 1.610905)
 65 | (Midclass 2210 select SARIMA, accuracy: 3.808497)
 66 | (Midclass 2211 select ZERO, accuracy: 0.377964)
 67 | (Midclass 2212 select XGBOOST, accuracy: 1.402607)
 68 | (Midclass 2301 select XGBOOST, accuracy: 1.707330)
 69 | (Midclass 2302 select SARIMA, accuracy: 1.730345)
 70 | (Midclass 2303 select XGBOOST, accuracy: 2.240427)
 71 | (Midclass 2304 select XGBOOST, accuracy: 0.650331)
 72 | (Midclass 2305 select SARIMA, accuracy: 1.866917)
 73 | (Midclass 2306 select XGBOOST, accuracy: 3.693004)
 74 | (Midclass 2307 select SARIMA, accuracy: 1.606624)
 75 | (Midclass 2309 select XGBOOST, accuracy: 1.696085)
 76 | (Midclass 2310 select ZERO, accuracy: 0.617213)
 77 | (Midclass 2311 select XGBOOST, accuracy: 1.108243)
 78 | (Midclass 2312 select SARIMA, accuracy: 0.478464)
 79 | (Midclass 2313 select ZERO, accuracy: 0.308607)
 80 | (Midclass 2314 select XGBOOST, accuracy: 1.468442)
 81 | (Midclass 2316 select XGBOOST, accuracy: 1.258186)
 82 | (Midclass 2317 select XGBOOST, accuracy: 0.640845)
 83 | (Midclass 3001 select ZERO, accuracy: 0.308607)
 84 | (Midclass 3002 select SARIMA, accuracy: 1.270771)
 85 | (Midclass 3003 select SARIMA, accuracy: 0.708163)
 86 | (Midclass 3004 select XGBOOST, accuracy: 0.470117)
 87 | (Midclass 3005 select XGBOOST, accuracy: 0.946125)
 88 | (Midclass 3006 select SARIMA, accuracy: 2.675442)
 89 | (Midclass 3007 select SARIMA, accuracy: 1.639240)
 90 | (Midclass 3008 select SARIMA, accuracy: 1.766410)
 91 | (Midclass 3010 select SARIMA, accuracy: 0.838993)
 92 | (Midclass 3011 select SARIMA, accuracy: 0.640106)
 93 | (Midclass 3013 select SARIMA, accuracy: 2.473312)
 94 | (Midclass 3014 select ZERO, accuracy: 0.218218)
 95 | (Midclass 3016 select SARIMA, accuracy: 3.851208)
 96 | (Midclass 3017 select SARIMA, accuracy: 0.888357)
 97 | (Midclass 3018 select SARIMA, accuracy: 3.428816)
 98 | (Midclass 3102 select ZERO, accuracy: 0.218218)
 99 | (Midclass 3105 select XGBOOST, accuracy: 0.214763)
100 | (Midclass 3107 select SARIMA, accuracy: 0.215656)
101 | (Midclass 3109 select ZERO, accuracy: 0.218218)
102 | (Midclass 3110 select XGBOOST, accuracy: 0.681508)
103 | (Midclass 3112 select SARIMA, accuracy: 0.673105)
104 | (Midclass 3113 select SARIMA, accuracy: 0.196265)
105 | (Midclass 3114 select SARIMA, accuracy: 0.820782)
106 | (Midclass 3116 select SARIMA, accuracy: 0.859559)
107 | (Midclass 3117 select ZERO, accuracy: 0.690066)
108 | (Midclass 3118 select XGBOOST, accuracy: 1.138621)
109 | (Midclass 3119 select ZERO, accuracy: 0.308607)
110 | (Midclass 3125 select ZERO, accuracy: 0.000000)
111 | (Midclass 3126 select SARIMA, accuracy: 0.710316)
112 | (Midclass 3208 select ZERO, accuracy: 0.000000)
113 | (Midclass 3217 select ZERO, accuracy: 0.000000)
114 | (Midclass 3227 select SARIMA, accuracy: 0.470535)
115 | (Midclass 3311 select ZERO, accuracy: 0.000000)
116 | (Midclass 3316 select ZERO, accuracy: 0.000000)
117 | (Midclass 3319 select SARIMA, accuracy: 1.418897)
118 | (Midclass 3320 select XGBOOST, accuracy: 0.681791)
119 | (Midclass 3321 select ZERO, accuracy: 0.845154)
120 | (Midclass 3322 select ZERO, accuracy: 0.218218)
121 | (Midclass 3323 select ZERO, accuracy: 0.308607)
122 | (Midclass 3325 select SARIMA, accuracy: 0.306011)
123 | (Midclass 3326 select ZERO, accuracy: 0.000000)
124 | (Midclass 3402 select SARIMA, accuracy: 0.505201)
125 | (Midclass 3403 select ZERO, accuracy: 0.218218)
126 | (Midclass 3407 select XGBOOST, accuracy: 1.583984)
127 | (Midclass 3408 select ZERO, accuracy: 0.218218)
128 | (Midclass 3413 select ZERO, accuracy: 0.000000)
129 | (Midclass 3415 select SARIMA, accuracy: 0.871680)
130 | (Midclass 3417 select XGBOOST, accuracy: 0.377431)
131 | (Midclass 3423 select SARIMA, accuracy: 0.438360)
132 | (Midclass 3424 select XGBOOST, accuracy: 1.109004)
133 | (Midclass 3426 select XGBOOST, accuracy: 0.215652)
134 | (Midclass 3431 select SARIMA, accuracy: 0.555214)
135 | (Larclass 10 select SUM, accuracy: 5.288813)
136 | (Larclass 11 select SUM, accuracy: 1.967995)
137 | (Larclass 12 select SARIMA, accuracy: 29.097950)
138 | (Larclass 13 select SARIMA, accuracy: 3.669651)
139 | (Larclass 15 select SARIMA, accuracy: 15.189662)
140 | (Larclass 20 select SARIMA, accuracy: 13.969971)
141 | (Larclass 21 select SUM, accuracy: 2.007923)
142 | (Larclass 22 select SUM, accuracy: 22.782286)
143 | (Larclass 23 select SARIMA, accuracy: 9.731009)
144 | (Larclass 30 select SARIMA, accuracy: 8.978236)
145 | (Larclass 31 select SUM, accuracy: 2.468272)
146 | (Larclass 32 select SARIMA, accuracy: 0.447503)
147 | (Larclass 33 select SARIMA, accuracy: 2.195191)
148 | (Larclass 34 select SARIMA, accuracy: 2.465107)
149 | 
150 | 1502: failed to use arima, use xgboost instead
151 | 2302: failed to use arima, use xgboost instead
152 | 3017: failed to use arima, use xgboost instead
153 | 3018: failed to use arima, use xgboost instead
154 | 


--------------------------------------------------------------------------------
/doc/~$ompare.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/~$ompare.docx


--------------------------------------------------------------------------------
/doc/特征选择.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/特征选择.xlsx


--------------------------------------------------------------------------------
/plot_pic/1001_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1001_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1002_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1002_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1004_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1004_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1005_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1005_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1006_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1006_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1007_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1007_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1099_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1099_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/10_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/10_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1101_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1101_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1102_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1102_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1103_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1103_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1104_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1104_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/11_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/11_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1201_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1201_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1202_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1202_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1203_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1203_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1205_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1205_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/12_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/12_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1301_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1301_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1302_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1302_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1306_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1306_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1308_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1308_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1399_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1399_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/13_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/13_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1401_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1401_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1402_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1402_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1403_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1403_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1404_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1404_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/14_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/14_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1501_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1501_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1502_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1502_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1503_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1503_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1504_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1504_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1505_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1505_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1507_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1507_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1508_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1508_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1509_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1509_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1510_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1510_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1511_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1511_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1512_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1512_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1513_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1513_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1514_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1514_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1515_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1515_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1516_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1516_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1517_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1517_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1518_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1518_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1519_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1519_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1520_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1520_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/1521_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1521_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/15_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/15_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2001_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2001_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2002_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2002_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2003_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2003_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2004_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2004_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2005_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2005_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2006_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2006_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2007_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2007_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2008_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2008_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2009_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2009_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2010_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2010_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2011_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2011_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2012_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2012_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2013_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2013_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2014_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2014_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2015_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2015_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/20_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/20_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2101_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2101_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2102_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2102_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2103_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2103_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2104_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2104_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2105_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2105_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2106_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2106_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2107_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2107_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2108_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2108_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/21_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/21_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2201_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2201_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2202_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2202_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2203_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2203_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2204_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2204_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2205_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2205_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2206_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2206_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2207_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2207_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2208_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2208_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2209_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2209_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2210_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2210_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2211_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2211_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2212_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2212_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/22_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/22_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2301_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2301_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2302_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2302_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2303_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2303_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2304_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2304_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2305_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2305_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2306_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2306_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2307_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2307_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2308_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2308_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2309_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2309_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2310_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2310_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2311_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2311_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2312_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2312_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2313_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2313_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2314_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2314_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2315_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2315_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2316_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2316_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/2317_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2317_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/23_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/23_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3001_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3001_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3002_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3002_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3003_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3003_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3004_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3004_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3005_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3005_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3006_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3006_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3007_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3007_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3008_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3008_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3009_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3009_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3010_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3010_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3011_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3011_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3012_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3012_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3013_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3013_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3014_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3014_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3015_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3015_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3016_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3016_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3017_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3017_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3018_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3018_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/30_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/30_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3101_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3101_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3102_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3102_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3104_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3104_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3105_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3105_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3106_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3106_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3107_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3107_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3108_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3108_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3109_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3109_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3110_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3110_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3111_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3111_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3112_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3112_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3113_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3113_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3114_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3114_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3115_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3115_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3116_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3116_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3117_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3117_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3118_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3118_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3119_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3119_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3120_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3120_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3121_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3121_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3122_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3122_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3125_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3125_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3126_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3126_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3128_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3128_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/31_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/31_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3208_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3208_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3212_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3212_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3213_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3213_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3215_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3215_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3216_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3216_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3217_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3217_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3218_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3218_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3227_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3227_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/32_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/32_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3301_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3301_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3303_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3303_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3311_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3311_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3313_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3313_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3314_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3314_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3315_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3315_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3316_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3316_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3317_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3317_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3319_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3319_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3320_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3320_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3321_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3321_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3322_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3322_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3323_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3323_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3325_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3325_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3326_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3326_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3328_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3328_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3330_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3330_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/33_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/33_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3401_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3401_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3402_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3402_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3403_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3403_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3404_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3404_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3405_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3405_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3406_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3406_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3407_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3407_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3408_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3408_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3409_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3409_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3410_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3410_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3412_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3412_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3413_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3413_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3414_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3414_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3415_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3415_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3416_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3416_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3417_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3417_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3419_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3419_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3421_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3421_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3423_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3423_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3424_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3424_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3426_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3426_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3427_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3427_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3428_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3428_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3429_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3429_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3431_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3431_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3432_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3432_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/3436_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3436_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/34_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/34_customers.jpg


--------------------------------------------------------------------------------
/plot_pic/异常日期.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/异常日期.txt


--------------------------------------------------------------------------------
/rnn/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Nov 30 22:01:26 2017
 4 | 
 5 | @author: wangjun
 6 | """
 7 | 
 8 | # Naive LSTM to learn three-char window to one-char mapping
 9 | import numpy
10 | from keras.models import Sequential
11 | from keras.layers import Dense
12 | from keras.layers import LSTM
13 | from keras.utils import np_utils
14 | 
15 | # fix random seed for reproducibility
16 | numpy.random.seed(7)
17 | # define the raw dataset
18 | alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
19 | # create mapping of characters to integers (0-25) and the reverse
20 | char_to_int = dict((c, i) for i, c in enumerate(alphabet))
21 | int_to_char = dict((i, c) for i, c in enumerate(alphabet))
22 | # prepare the dataset of input to output pairs encoded as integers
23 | seq_length = 3
24 | dataX = []
25 | dataY = []
26 | for i in range(0, len(alphabet) - seq_length, 1):
27 |     seq_in = alphabet[i:i + seq_length]
28 |     seq_out = alphabet[i + seq_length]
29 |     dataX.append([char_to_int[char] for char in seq_in])
30 |     dataY.append(char_to_int[seq_out])
31 |     print(seq_in, '->', seq_out)
32 | # reshape X to be [samples, time steps, features]
33 | X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
34 | # normalize
35 | X = X / float(len(alphabet))
36 | # one hot encode the output variable
37 | y = np_utils.to_categorical(dataY)
38 | # create and fit the model
39 | model = Sequential()
40 | model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
41 | model.add(Dense(y.shape[1], activation='softmax'))
42 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
43 | model.fit(X, y, nb_epoch=500, batch_size=1, verbose=2)
44 | # summarize performance of the model
45 | scores = model.evaluate(X, y, verbose=0)
46 | print("Model Accuracy: %.2f%%" % (scores[1]*100))
47 | # demonstrate some model predictions
48 | for pattern in dataX:
49 |     x = numpy.reshape(pattern, (1, 1, len(pattern)))
50 |     x = x / float(len(alphabet))
51 |     prediction = model.predict(x, verbose=0)
52 |     index = numpy.argmax(prediction)
53 |     result = int_to_char[index]
54 |     seq_in = [int_to_char[value] for value in pattern]
55 |     print(seq_in, "->", result)


--------------------------------------------------------------------------------
/rnn/test2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Nov 30 22:16:06 2017
 4 | 
 5 | @author: wangjun
 6 | """
 7 | 
 8 | import numpy as np
 9 | import math
10 | 
11 | from keras.models import Sequential
12 | from keras.layers import Dense
13 | from keras.layers import LSTM
14 | import dataLoader
15 | 
16 | import matplotlib.pyplot as plt
17 | import xgboostPredicter
18 | 
19 | loader = dataLoader.loader("datam.csv")
20 | loader.setSize(200, 43, 0)
21 | midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass()
22 | loader.closeFiles()
23 | 
24 | seq_length = 0
25 | data_max = 35
26 | dataX = []
27 | dataY = []
28 | 
29 | trainLabelN = []
30 | for i in range(0, len(trainLabel)):
31 |     trainLabelN.append(trainLabel[i] / data_max)
32 | 
33 | for i in range(0, len(trainLabelN) - seq_length):
34 |     dataX.append(trainData[i+seq_length]+trainLabelN[i:i+seq_length])
35 |     dataY.append(trainLabelN[i+seq_length])
36 |     
37 | X = np.reshape(dataX, (len(dataX), 1, len(trainData[0])+seq_length))
38 | Y = np.reshape(dataY, (len(dataY), 1))
39 | 
40 | model = Sequential()
41 | model.add(LSTM(6, input_shape=(X.shape[1], X.shape[2]), batch_size=1, stateful=True))
42 | model.add(Dense(1))
43 | model.compile(loss='mean_squared_error', optimizer='adam')
44 | model.fit(X, Y, nb_epoch=300, batch_size=1, verbose=1)
45 | 
46 | #history = trainLabelN[-1*seq_length:]
47 | predLabel = []
48 | for i in range(0, len(testLabel)):
49 |     #feature = np.array(testData[i]+history).reshape(1, 1, len(trainData[0])+seq_length)
50 |     feature = np.array(testData[i]).reshape(1, 1, len(trainData[0]))
51 |     predict = model.predict(feature)
52 |     predLabel.append(predict[0][0]*data_max)
53 |     #history.pop(0)
54 |     #history.append(predict)
55 |  
56 | predLabel = np.array(predLabel)
57 | testLabel = np.array(testLabel)
58 | bias = sum((predLabel-testLabel)*(predLabel-testLabel))
59 | bias = math.sqrt(bias/len(testLabel))
60 | print(bias)
61 | plt.plot(predLabel, color='blue',label='predict')
62 | plt.plot(testLabel, color='red', label='origan')
63 | plt.show(block=False)
64 | 
65 | def xgboostPredict(trainData, trainLabel, testData):
66 |     
67 |     xgp = xgboostPredicter.predicter()
68 |     model = xgp.xgboostTrain(trainData, trainLabel)
69 |     predLabel = xgp.xgboostPredict(model, testData)
70 |     return predLabel
71 | 
72 | predLabel = xgboostPredict(trainData, trainLabel, testData)
73 | predLabel = np.array(predLabel)
74 | testLabel = np.array(testLabel)
75 | bias = sum((predLabel-testLabel)*(predLabel-testLabel))
76 | bias = math.sqrt(bias/len(testLabel))
77 | print(bias)
78 | plt.plot(predLabel, color='blue',label='predict')
79 | plt.plot(testLabel, color='red', label='origan')
80 | plt.show(block=False)
81 | 


--------------------------------------------------------------------------------
/rnn/test3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | '''Example script showing how to use a stateful LSTM model
  4 | and how its stateless counterpart performs.
  5 | 
  6 | More documentation about the Keras LSTM model can be found at
  7 | https://keras.io/layers/recurrent/#lstm
  8 | 
  9 | The models are trained on an input/output pair, where
 10 | the input is a generated uniformly distributed
 11 | random sequence of length = "input_len",
 12 | and the output is a moving average of the input with window length = "tsteps".
 13 | Both "input_len" and "tsteps" are defined in the "editable parameters" section.
 14 | A larger "tsteps" value means that the LSTM will need more memory
 15 | to figure out the input-output relationship.
 16 | This memory length is controlled by the "lahead" variable (more details below).
 17 | The rest of the parameters are:
 18 | - input_len: the length of the generated input sequence
 19 | - lahead: the input sequence length that the LSTM
 20 |   is trained on for each output point
 21 | - batch_size, epochs: same parameters as in the model.fit(...) function
 22 | When lahead > 1, the model input is preprocessed to a "rolling window view"
 23 | of the data, with the window length = "lahead".
 24 | This is similar to sklearn's "view_as_windows"
 25 | with "window_shape" being a single number
 26 | Ref: http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows
 27 | When lahead < tsteps, only the stateful LSTM converges because its
 28 | statefulness allows it to see beyond the capability that lahead
 29 | gave it to fit the n-point average. The stateless LSTM does not have
 30 | this capability, and hence is limited by its "lahead" parameter,
 31 | which is not sufficient to see the n-point average.
 32 | When lahead >= tsteps, both the stateful and stateless LSTM converge.
 33 | '''
 34 | from __future__ import print_function
 35 | import numpy as np
 36 | import matplotlib.pyplot as plt
 37 | import pandas as pd
 38 | from keras.models import Sequential
 39 | from keras.layers import Dense, LSTM
 40 | 
 41 | # ----------------------------------------------------------
 42 | # EDITABLE PARAMETERS
 43 | # Read the documentation in the script head for more details
 44 | # ----------------------------------------------------------
 45 | 
 46 | # length of input
 47 | input_len = 1000
 48 | 
 49 | # The window length of the moving average used to generate
 50 | # the output from the input in the input/output pair used
 51 | # to train the LSTM
 52 | # e.g. if tsteps=2 and input=[1, 2, 3, 4, 5],
 53 | #      then output=[1.5, 2.5, 3.5, 4.5]
 54 | tsteps = 2
 55 | 
 56 | # The input sequence length that the LSTM is trained on for each output point
 57 | lahead = 5
 58 | 
 59 | # training parameters passed to "model.fit(...)"
 60 | batch_size = 1
 61 | epochs = 10
 62 | 
 63 | # ------------
 64 | # MAIN PROGRAM
 65 | # ------------
 66 | 
 67 | print("*" * 33)
 68 | if lahead >= tsteps:
 69 |     print("STATELESS LSTM WILL ALSO CONVERGE")
 70 | else:
 71 |     print("STATELESS LSTM WILL NOT CONVERGE")
 72 | print("*" * 33)
 73 | 
 74 | np.random.seed(1986)
 75 | 
 76 | print('Generating Data...')
 77 | 
 78 | 
 79 | def gen_uniform_amp(amp=1, xn=10000):
 80 |     """Generates uniform random data between
 81 |     -amp and +amp
 82 |     and of length xn
 83 |     Arguments:
 84 |         amp: maximum/minimum range of uniform data
 85 |         xn: length of series
 86 |     """
 87 |     data_input = np.random.uniform(-1 * amp, +1 * amp, xn)
 88 |     data_input = pd.DataFrame(data_input)
 89 |     return data_input
 90 | 
 91 | # Since the output is a moving average of the input,
 92 | # the first few points of output will be NaN
 93 | # and will be dropped from the generated data
 94 | # before training the LSTM.
 95 | # Also, when lahead > 1,
 96 | # the preprocessing step later of "rolling window view"
 97 | # will also cause some points to be lost.
 98 | # For aesthetic reasons,
 99 | # in order to maintain generated data length = input_len after pre-processing,
100 | # add a few points to account for the values that will be lost.
101 | to_drop = max(tsteps - 1, lahead - 1)
102 | data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop)
103 | 
104 | # set the target to be a N-point average of the input
105 | expected_output = data_input.rolling(window=tsteps, center=False).mean()
106 | 
107 | # when lahead > 1, need to convert the input to "rolling window view"
108 | # https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html
109 | if lahead > 1:
110 |     data_input = np.repeat(data_input.values, repeats=lahead, axis=1)
111 |     data_input = pd.DataFrame(data_input)
112 |     for i, c in enumerate(data_input.columns):
113 |         data_input[c] = data_input[c].shift(i)
114 | 
115 | # drop the nan
116 | expected_output = expected_output[to_drop:]
117 | data_input = data_input[to_drop:]
118 | 
119 | print('Input shape:', data_input.shape)
120 | print('Output shape:', expected_output.shape)
121 | print('Input head: ')
122 | print(data_input.head())
123 | print('Output head: ')
124 | print(expected_output.head())
125 | print('Input tail: ')
126 | print(data_input.tail())
127 | print('Output tail: ')
128 | print(expected_output.tail())
129 | 
130 | print('Plotting input and expected output')
131 | plt.plot(data_input[0][:10], '.')
132 | plt.plot(expected_output[0][:10], '-')
133 | plt.legend(['Input', 'Expected output'])
134 | plt.title('Input')
135 | plt.show()
136 | 
137 | 
138 | def create_model(stateful: bool):
139 |     model = Sequential()
140 |     model.add(LSTM(20,
141 |               input_shape=(lahead, 1),
142 |               batch_size=batch_size,
143 |               stateful=stateful))
144 |     model.add(Dense(1))
145 |     model.compile(loss='mse', optimizer='adam')
146 |     return model
147 | 
148 | print('Creating Stateful Model...')
149 | model_stateful = create_model(stateful=True)
150 | 
151 | 
152 | # split train/test data
153 | def split_data(x, y, ratio: int = 0.8):
154 |     to_train = int(input_len * ratio)
155 |     # tweak to match with batch_size
156 |     to_train -= to_train % batch_size
157 | 
158 |     x_train = x[:to_train]
159 |     y_train = y[:to_train]
160 |     x_test = x[to_train:]
161 |     y_test = y[to_train:]
162 | 
163 |     # tweak to match with batch_size
164 |     to_drop = x.shape[0] % batch_size
165 |     if to_drop > 0:
166 |         x_test = x_test[:-1 * to_drop]
167 |         y_test = y_test[:-1 * to_drop]
168 | 
169 |     # some reshaping
170 |     reshape_3 = lambda x: x.values.reshape((x.shape[0], x.shape[1], 1))
171 |     x_train = reshape_3(x_train)
172 |     x_test = reshape_3(x_test)
173 | 
174 |     reshape_2 = lambda x: x.values.reshape((x.shape[0], 1))
175 |     y_train = reshape_2(y_train)
176 |     y_test = reshape_2(y_test)
177 | 
178 |     return (x_train, y_train), (x_test, y_test)
179 | 
180 | 
181 | (x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output)
182 | print('x_train.shape: ', x_train.shape)
183 | print('y_train.shape: ', y_train.shape)
184 | print('x_test.shape: ', x_test.shape)
185 | print('y_test.shape: ', y_test.shape)
186 | 
187 | print('Training')
188 | for i in range(epochs):
189 |     print('Epoch', i + 1, '/', epochs)
190 |     # Note that the last state for sample i in a batch will
191 |     # be used as initial state for sample i in the next batch.
192 |     # Thus we are simultaneously training on batch_size series with
193 |     # lower resolution than the original series contained in data_input.
194 |     # Each of these series are offset by one step and can be
195 |     # extracted with data_input[i::batch_size].
196 |     model_stateful.fit(x_train,
197 |                        y_train,
198 |                        batch_size=batch_size,
199 |                        epochs=1,
200 |                        verbose=1,
201 |                        validation_data=(x_test, y_test),
202 |                        shuffle=False)
203 |     model_stateful.reset_states()
204 | 
205 | print('Predicting')
206 | predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size)
207 | 
208 | print('Creating Stateless Model...')
209 | model_stateless = create_model(stateful=False)
210 | 
211 | print('Training')
212 | model_stateless.fit(x_train,
213 |                     y_train,
214 |                     batch_size=batch_size,
215 |                     epochs=epochs,
216 |                     verbose=1,
217 |                     validation_data=(x_test, y_test),
218 |                     shuffle=False)
219 | 
220 | print('Predicting')
221 | predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size)
222 | 
223 | # ----------------------------
224 | 
225 | print('Plotting Results')
226 | plt.subplot(3, 1, 1)
227 | plt.plot(y_test)
228 | plt.title('Expected')
229 | plt.subplot(3, 1, 2)
230 | # drop the first "tsteps-1" because it is not possible to predict them
231 | # since the "previous" timesteps to use do not exist
232 | plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:])
233 | plt.title('Stateful: Expected - Predicted')
234 | plt.subplot(3, 1, 3)
235 | plt.plot((y_test - predicted_stateless).flatten())
236 | plt.title('Stateless: Expected - Predicted')
237 | plt.show()


--------------------------------------------------------------------------------
/tools/backup.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import xgboost as xgb
  9 | from numpy import array
 10 | from numpy import zeros
 11 | import csv
 12 | import math
 13 | 
 14 | import pandas as pd
 15 | from statsmodels.tsa.statespace.sarimax import SARIMAX  
 16 | import statsmodels.api as sm
 17 | import datetime as dt
 18 | import matplotlib.pylab as plt
 19 | from statsmodels.tsa.stattools import adfuller 
 20 | 
 21 | larclasPred = {}
 22 | larclasLabl = {}
 23 | totalBias = 0
 24 | totalCount = 0
 25 | 
 26 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)]
 27 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))]
 28 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)]
 29 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))]
 30 | 
 31 | modelChoose = []
 32 | 
 33 | def getData(csvReader, trainCount, testCount):
 34 |     trainData = []
 35 |     testData = []
 36 |     trainLabel = []
 37 |     testLabel = []
 38 |     try:
 39 |         for x in range(0, trainCount):
 40 |             row = csvReader.next()
 41 |             """
 42 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 43 |                     float(row[7]), float(row[8]), float(row[9]), float(row[10]),
 44 |                     float(row[11]), float(row[12])]
 45 |             """
 46 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 47 |                     float(row[7]), float(row[8])]
 48 |             trainData.append(data)
 49 |             trainLabel.append(float(row[15]))
 50 |         for x in range(0, testCount):
 51 |             row = csvReader.next()
 52 |             """
 53 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 54 |                     float(row[7]), float(row[8]), float(row[9]), float(row[10]),
 55 |                     float(row[11]), float(row[12])]
 56 |             """
 57 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 58 |                     float(row[7]), float(row[8])]
 59 |             testData.append(data)
 60 |             testLabel.append(float(row[15]))
 61 |         return int(row[0]), trainData, trainLabel, testData, testLabel
 62 |     except StopIteration:
 63 |         return 0, [], [], [], []
 64 |     
 65 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
 66 |     with open('compare.csv', 'ab') as f:
 67 |         writer = csv.writer(f)
 68 |         count = 1
 69 |         writer.writerow([midclass, accuracy])
 70 |         for x in trainLabl:
 71 |             writer.writerow([count, x])
 72 |             count += 1
 73 |         for x in range(0, len(testPred)):
 74 |             writer.writerow([count, testLabl[x], testPred[x]])
 75 |             count += 1
 76 | 
 77 | def xgboostPredict(trainData, trainLabel, dataToPredict):
 78 |     dtrain = xgb.DMatrix(trainData, trainLabel)
 79 |     params = {"objective": "reg:linear"}
 80 |     gbm = xgb.train(dtrain=dtrain, params=params)
 81 |     return gbm.predict(xgb.DMatrix(dataToPredict))
 82 | 
 83 | def test_stationarity(timeseries):
 84 |     
 85 |     #Determing rolling statistics
 86 |     rolmean = timeseries.rolling(window=12,center=False).mean()
 87 |     rolstd = timeseries.rolling(window=12,center=False).std()
 88 | 
 89 |     #Plot rolling statistics:
 90 |     plt.plot(timeseries, color='blue',label='Original')
 91 |     plt.plot(rolmean, color='red', label='Rolling Mean')
 92 |     plt.plot(rolstd, color='black', label = 'Rolling Std')
 93 |     plt.legend(loc='best')
 94 |     plt.title('Rolling Mean & Standard Deviation')
 95 |     plt.show(block=False)
 96 |     
 97 |     #Perform Dickey-Fuller test:
 98 |     print 'Results of Dickey-Fuller Test:'
 99 |     dftest = adfuller(timeseries, autolag='AIC')
100 |     dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
101 |     for key,value in dftest[4].items():
102 |         dfoutput['Critical Value (%s)'%key] = value
103 |     print dfoutput
104 |     
105 |     #Get AR and MA parameter
106 |     fig = plt.figure(figsize=(12,8))
107 |     ax1=fig.add_subplot(211)
108 |     fig = sm.graphics.tsa.plot_acf(timeseries, lags=20, ax=ax1)
109 |     ax2 = fig.add_subplot(212)
110 |     fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2)
111 |     plt.show(block=False)
112 |     
113 | def sarimaTrain(trainLabel):
114 |     dataLength = trainLabel.__len__()
115 |     data = pd.Series(trainLabel)
116 |     index = dtIndex[0:dataLength]
117 |     data.index = pd.Index(index)
118 | 
119 |     model = SARIMAX(data, order=(1,1,1), seasonal_order=(0,1,1,7)) 
120 |     return model.fit() 
121 | 
122 | def sarimaPredict(model, predictLength):
123 |     output = model.forecast(predictLength)
124 |     return array(output)
125 | 
126 | def sarimaBias(model, trainLabel):
127 |     dataLength = trainLabel.__len__()
128 |     data = pd.Series(trainLabel)
129 |     index = dtIndex[0:dataLength]
130 |     data.index = pd.Index(index)
131 |     
132 |     pred = model.predict()
133 |     """
134 |     plt.plot(data, color='blue',label='Original')
135 |     plt.plot(pred, color='red', label='Predicted')
136 |     plt.show(block=False)
137 |     """
138 |     return list(data - pred)
139 | 
140 | def modelselect(trainSize, testSize):
141 |     global larclasPred, larclasLabl, totalBias, totalCount   
142 |     larclasPred = {}
143 |     larclasLabl = {}
144 |     totalBias = 0
145 |     totalCount = 0
146 |     modelChoose = []
147 |     f = open("data.csv", "r")
148 |     f_csv = csv.reader(f)
149 |     
150 |     teD = []
151 |     for i in range(31-testSize, 31):
152 |         x = [i, (i+2)%7, 0, 0, 0, 0]
153 |         if (x[1] == 6 or x[1]==0):
154 |             x[3] = 1
155 |         elif (x[1] == 5):
156 |             x[2] = 1
157 |         teD.append(x)
158 |             
159 |     while (True):
160 |         midclass, trD, trL, _, teL = getData(f_csv, trainSize, testSize)   
161 |         if (midclass == 0):
162 |             break
163 |         else:
164 | 
165 |             # sarima model
166 |             try:
167 |                 model = sarimaTrain(trL)
168 |                 teP1 = sarimaPredict(model, testSize)
169 |             except:
170 |                 teP1 = zeros(testSize)
171 |             
172 |             # xgboost model
173 |             try:
174 |                 teP2 = xgboostPredict(array(trD), array(trL), array(teD))
175 |             except:
176 |                 teP2 = zeros(testSize)
177 |             
178 |             # just zero
179 |             teP3 = zeros(testSize)
180 | 
181 |             # count bias of midclass and update larclass
182 |             label = array(teL)
183 |             larclass = int(midclass/100)
184 |             totalCount += testSize
185 |   
186 |             bias1 = sum((teP1-label)*(teP1-label))
187 |             bias2 = sum((teP2-label)*(teP2-label))
188 |             bias3 = sum((teP3-label)*(teP3-label))
189 |             if (bias3 < bias1 and bias3 < bias2):
190 |                 totalBias += bias3
191 |                 bias3 = math.sqrt(bias3/testSize)
192 |                 print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3)
193 |                 modelChoose.append(3)
194 |                 if (larclass in larclasPred):
195 |                     larclasPred[larclass] += teP3
196 |                 else:
197 |                     larclasPred[larclass] = teP3
198 |             elif (bias1 < bias2):
199 |                 totalBias += bias1
200 |                 bias1 = math.sqrt(bias1/testSize)
201 |                 print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1)
202 |                 modelChoose.append(1)
203 |                 if (larclass in larclasPred):
204 |                     larclasPred[larclass] += teP1
205 |                 else:
206 |                     larclasPred[larclass] = teP1
207 |             else:
208 |                 totalBias += bias2
209 |                 bias2 = math.sqrt(bias2/testSize)
210 |                 print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2)
211 |                 modelChoose.append(2)
212 |                 if (larclass in larclasPred):
213 |                     larclasPred[larclass] += teP2
214 |                 else:
215 |                     larclasPred[larclass] = teP2
216 |                
217 |             if (larclass in larclasLabl):
218 |                 larclasLabl[larclass] += label
219 |             else:
220 |                 larclasLabl[larclass] = label
221 |             #dataLog(midclass, bias, trL, teP, teL)                
222 |    
223 |     # print bias of large class
224 |     for larclass in larclasPred:
225 |         bias = sum((larclasLabl[larclass] - larclasPred[larclass])*
226 |                    (larclasLabl[larclass] - larclasPred[larclass]))
227 |         totalBias += bias
228 |         totalCount += testSize
229 |         bias = math.sqrt(bias/testSize)
230 |         print "(Larclass %d predict finished, accuracy: %f)" % (larclass, bias)  
231 |         
232 |     totalBias = math.sqrt(totalBias/totalCount)
233 |     print "(Predict finished, accuracy: %f)" % (totalBias)        
234 |     f.close()
235 |  
236 | def test(trainSize, testSize):
237 |     global larclasPred, larclasLabl, totalBias, totalCount   
238 |     larclasPred = {}
239 |     larclasLabl = {}
240 |     totalBias = 0
241 |     totalCount = 0
242 |     f = open("data.csv", "r")
243 |     f_csv = csv.reader(f)
244 |     
245 |     teD = []
246 |     for i in range(31-testSize, 31):
247 |         x = [i, (i+2)%7, 0, 0, 0, 0]
248 |         if (x[1] == 6 or x[1]==0):
249 |             x[3] = 1
250 |         elif (x[1] == 5):
251 |             x[2] = 1
252 |         teD.append(x)
253 |     
254 |     while (True):
255 |         midclass, trD, trL, _, teL = getData(f_csv, trainSize, testSize)
256 |         if (midclass == 0):
257 |             break
258 |         else:
259 |             try:
260 |                 model = sarimaTrain(trL)
261 |                 teP = sarimaPredict(model, testSize)
262 |             except:
263 |                 teP = xgboostPredict(array(trD), array(trL), array(teD))
264 | 
265 |             # count bias of midclass
266 |             bias = 0.0
267 |             for i in range(0, testSize):
268 |                 bias += (teP[i]-teL[i])*(teP[i]-teL[i]);
269 |             totalBias += bias
270 |             totalCount += testSize
271 |             bias = math.sqrt(bias/testSize)
272 |             print "(Midclass %d predict finished, accuracy: %f)" % (midclass, bias)
273 |             # update bias of large class
274 |             larclass = int(midclass/100)
275 |             if (larclass in larclasPred):
276 |                 for i in range(0, testSize):
277 |                     larclasPred[larclass][i] += teP[i]
278 |                     larclasLabl[larclass][i] += teL[i]
279 |             else:
280 |                 larclasPred[larclass] = teP
281 |                 larclasLabl[larclass] = teL
282 |             #dataLog(midclass, bias, trL, teP, teL)                
283 |     # print bias of large class
284 |     for larclass in larclasPred:
285 |         bias = 0.0
286 |         for i in range(0, testSize):
287 |             d = larclasLabl[larclass][i] - larclasPred[larclass][i]
288 |             bias += d*d;
289 |         totalBias += bias
290 |         totalCount += testSize
291 |         bias = math.sqrt(bias/testSize)
292 |         print "(Larclass %d predict finished, accuracy: %f)" % (larclass, bias)  
293 |         
294 |     totalBias = math.sqrt(totalBias/totalCount)
295 |     print "(Predict finished, accuracy: %f)" % (totalBias)        
296 |     f.close()
297 |     
298 | def submit(trainSize): 
299 |     global larclasPred
300 |     larclasPred = {}
301 |     f1 = open("data.csv", "r")
302 |     data_csv = csv.reader(f1)
303 |     f2 = open("submit.csv", "r")
304 |     submit_csv = csv.reader(f2)
305 |     submit_csv.next()
306 |     
307 |     # generate feature
308 |     goal = []
309 |     for i in range(1, 31):
310 |         x = [i, (i+4)%7, 0, 0, 0, 0]
311 |         if (x[1] == 6 or x[1]==0):
312 |             x[3] = 1
313 |         elif (x[1] == 5):
314 |             x[2] = 1
315 |         goal.append(x)
316 |     goal[0][3] = 1
317 |     goal[0][2] = 0
318 |     
319 |     current = 0
320 |     
321 |     while (True):
322 |         midclass, trD, trL, teD, teL = getData(data_csv, trainSize, 0)
323 |         if (midclass == 0):
324 |             break
325 |         else:
326 |             
327 |             if (modelChoose[current] == 1):
328 |                 try:
329 |                     model = sarimaTrain(trL)
330 |                     teP = sarimaPredict(model, 30)
331 |                 except:
332 |                     teP = xgboostPredict(array(trD), array(trL), array(goal))
333 |             elif (modelChoose[current] == 2):
334 |                 teP = xgboostPredict(array(trD), array(trL), array(goal))
335 |             else:
336 |                 teP = zeros(30)
337 |             current += 1
338 | 
339 |             # write file - midclass
340 |             for x in teP:
341 |                 if (x < 0):
342 |                     x = 0
343 |                 row = submit_csv.next()
344 |                 if (int(row[0]) != midclass):
345 |                     raise KeyError
346 |                 with open('submit1.csv', 'ab') as f:
347 |                     writer = csv.writer(f)
348 |                     writer.writerow([row[0], row[1], x])
349 |             
350 |             # count larclass
351 |             larclass = int(midclass/100)
352 |             if (larclass in larclasPred):
353 |                 for i in range(0, 30):
354 |                     larclasPred[larclass][i] += teP[i]
355 |             else:
356 |                 larclasPred[larclass] = teP  
357 |     
358 |     # write file - larcalss
359 |     oldLC = 0            
360 |     for row in submit_csv:
361 |         larclass = int(row[0])
362 |         if larclass != oldLC:
363 |             oldLC = larclass
364 |             i = 0
365 |         with open('submit1.csv', 'ab') as f:
366 |             writer = csv.writer(f)
367 |             writer.writerow([row[0], row[1], larclasPred[larclass][i]]) 
368 |         i+=1
369 |     f1.close()
370 |     f2.close()
371 |       
372 | test(106, 14)      
373 | modelselect(106, 14)
374 | #submit(120)


--------------------------------------------------------------------------------
/tools/csvloader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Oct 24 18:48:57 2017
  4 | 
  5 | @author: wangjun
  6 | 
  7 | 用于从给定的数据集生成训练数据；
  8 | 由于训练程序是按照中类顺序(而非日期顺序)训练的,生成的训练数据需使用Excel按中类
  9 | 进行排序:)
 10 | """
 11 | 
 12 | import csv
 13 | import datetime
 14 | 
 15 | DictHoilday = [1,2,3,49,50,51,52,53,54,55,96,121,173]
 16 | DictBeforeHoilday = [46,47,48,120]
 17 | DictWorkday = [46, 58, 59]
 18 | midClasses = {}
 19 | 
 20 | date = datetime.datetime(2015, 1, 1)
 21 | dailyData = {}
 22 | # index       -0          -1          
 23 | # middle class-sales count-promotions
 24 | # string      -float      -int     
 25 | promotions = []
 26 | totalCount = 0
 27 | totalPay = 0
 28 | lineNum = 1
 29 | dayCount = 1
 30 | 
 31 | dataLog = [{}, {}, {}, {}, {}, {}, {}]
 32 | 
 33 | def getHistory(midclass):
 34 |     total = 0
 35 |     log = []
 36 |     for i in range(0, 7):
 37 |         try:
 38 |             temp = dataLog[i][midclass][0]
 39 |             total += temp
 40 |             log.append(temp)
 41 |         except KeyError:
 42 |             log.append(0)
 43 |     return log[0], log[1], log[2], total/7
 44 | 
 45 | def writeData():
 46 |     global dailyData, promotions, totalCount, totalPay, dayCount, dataLog
 47 |     day = date.day
 48 |     month = date.month
 49 |     week = (date.weekday() + 1) % 7
 50 |     if (dayCount in DictHoilday):
 51 |         holiday = 1
 52 |         beforeHoliday = 0
 53 |     elif (dayCount in DictBeforeHoilday):
 54 |         holiday = 0
 55 |         beforeHoliday = 1
 56 |     elif (dayCount in DictWorkday):
 57 |         holiday = 0
 58 |         if (week==6 or ((dayCount+1) in DictHoilday)):
 59 |             beforeHoliday = 1
 60 |         else:
 61 |             beforeHoliday = 0
 62 |     elif (week==0 or week==6):
 63 |         holiday = 1
 64 |         beforeHoliday = 0
 65 |     elif (week==5):
 66 |         holiday = 0
 67 |         beforeHoliday = 1
 68 |     else:
 69 |         holiday = 0
 70 |         beforeHoliday = 0
 71 |     promotionClass = {}
 72 |     for midclass in promotions:
 73 |         larclass = int(midclass)/100
 74 |         if larclass in promotionClass:
 75 |             promotionClass[larclass] = promotionClass[larclass] + 1;
 76 |         else:
 77 |             promotionClass[larclass] = 1;
 78 |     with open('output.csv', 'ab') as f:
 79 |         writer = csv.writer(f)
 80 |         for midclass in dailyData:
 81 |             l1, l2, l3, la = getHistory(midclass)
 82 |             if (midclass not in midClasses):
 83 |                 continue
 84 |             else:
 85 |                 midClasses[midclass] = 1
 86 |             try:
 87 |                 larclass = int(midclass) / 100
 88 |                 if (larclass in promotionClass):
 89 |                     writer.writerow([midclass, dayCount, month, 
 90 |                                      day, week, beforeHoliday, holiday, 
 91 |                                      dailyData[midclass][1],
 92 |                                      promotionClass[larclass]-dailyData[midclass][1],
 93 |                                      l1, l2, l3, la,
 94 |                                      totalCount, totalPay, dailyData[midclass][0]])
 95 |                 else:
 96 |                     writer.writerow([midclass, dayCount, month,
 97 |                                      day, week, beforeHoliday, holiday, 
 98 |                                      0, 0, l1, l2, l3, la,
 99 |                                      totalCount, totalPay, dailyData[midclass][0]]) 
100 |             except ZeroDivisionError:
101 |                 pass
102 |                 #just neglect it
103 |         for midclass in midClasses:
104 |             l1, l2, l3, la = getHistory(midclass)
105 |             if (midClasses[midclass] == 0):
106 |                 larclass = int(midclass) / 100
107 |                 if (larclass in promotionClass):
108 |                     writer.writerow([midclass, dayCount, month,
109 |                                      day, week, beforeHoliday, holiday, 
110 |                                      0, promotionClass[larclass],
111 |                                      l1, l2, l3, la,
112 |                                      totalCount, totalPay, 0])
113 |                 else:
114 |                     writer.writerow([midclass, dayCount, month,
115 |                                      day, week, beforeHoliday, holiday, 0, 0,
116 |                                      l1, l2, l3, la,
117 |                                      totalCount, totalPay, 0]) 
118 |     dataLog.insert(0, dailyData)
119 |     dataLog.pop()
120 |     dailyData = {}
121 |     promotions = []
122 |     totalCount = 0
123 |     totalPay = 0
124 |     dayCount += 1
125 |     for midclass in midClasses:
126 |         midClasses[midclass] = 0
127 |     
128 | with open('example.csv') as f:
129 |     f_csv = csv.reader(f)
130 |     f_csv.next()
131 |     for row in f_csv:
132 |         if (int(row[0]) > 100):
133 |             midClasses[row[0]] = 0;
134 | 
135 | with open('train.csv') as f:
136 |     f_csv = csv.reader(f)
137 |     f_csv.next()
138 |     for row in f_csv:
139 |         lineNum += 1
140 |         
141 |         # check date
142 |         day = int(row[7]) % 100
143 |         month = int(row[7]) / 100 % 100
144 |         tempdate = datetime.datetime(2015, month, day)
145 |         while (date != tempdate):
146 |             writeData()
147 |             date = date.__add__(datetime.timedelta(1))
148 |                 
149 |         midclass = row[3]
150 |         if (midclass in dailyData):
151 |             #float(row[13]) or 1
152 |             dailyData[midclass][0] = dailyData[midclass][0]+1
153 |             totalCount=totalCount+1
154 |             try:
155 |                 totalPay=totalPay+float(row[14])
156 |             except:
157 |                 pass
158 |         else:
159 |             dailyData[midclass] = [1, 0]
160 |             totalCount=totalCount+1
161 |             try:
162 |                 totalPay=totalPay+float(row[14])
163 |             except:
164 |                 pass
165 |         if (row[16]!='\xb7\xf1'):
166 |             dailyData[midclass][1] = 1
167 |             if (midclass not in promotions):
168 |                 promotions.append(midclass)
169 |     writeData();
170 |         
171 |         
172 |             
173 |             


--------------------------------------------------------------------------------
/tools/csvloader_largeClass.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Oct 24 18:48:57 2017
  4 | 
  5 | @author: wangjun
  6 | 
  7 | 用于从给定的数据集生成大类训练数据；
  8 | 由于训练程序是按照大类顺序(而非日期顺序)训练的,生成的训练数据需使用Excel按中类
  9 | 进行排序:)
 10 | """
 11 | 
 12 | import csv
 13 | import datetime
 14 | 
 15 | DictHoilday = [1,2,3,49,50,51,52,53,54,55,96,121,173]
 16 | DictBeforeHoilday = [46,47,48,120]
 17 | DictWorkday = [46, 58, 59]
 18 | larClasses = {}
 19 | 
 20 | date = datetime.datetime(2015, 1, 1)
 21 | dailyData = {}
 22 | # index       -0          -1          
 23 | # large class -sales count-promotions
 24 | # string      -float      -int     
 25 | totalCount = 0
 26 | totalPay = 0
 27 | lineNum = 1
 28 | dayCount = 1
 29 | 
 30 | dataLog = [{}, {}, {}, {}, {}, {}, {}]
 31 | 
 32 | def getHistory(larclass):
 33 |     total = 0
 34 |     log = []
 35 |     for i in range(0, 7):
 36 |         try:
 37 |             temp = dataLog[i][larclass][0]
 38 |             total += temp
 39 |             log.append(temp)
 40 |         except KeyError:
 41 |             log.append(0)
 42 |     return log[0], log[1], log[2], total/7
 43 | 
 44 | def writeData():
 45 |     global dailyData, totalCount, totalPay, dayCount, dataLog
 46 |     day = date.day
 47 |     month = date.month
 48 |     week = (date.weekday() + 1) % 7
 49 |     if (dayCount in DictHoilday):
 50 |         holiday = 1
 51 |         beforeHoliday = 0
 52 |     elif (dayCount in DictBeforeHoilday):
 53 |         holiday = 0
 54 |         beforeHoliday = 1
 55 |     elif (dayCount in DictWorkday):
 56 |         holiday = 0
 57 |         if (week==6 or ((dayCount+1) in DictHoilday)):
 58 |             beforeHoliday = 1
 59 |         else:
 60 |             beforeHoliday = 0
 61 |     elif (week==0 or week==6):
 62 |         holiday = 1
 63 |         beforeHoliday = 0
 64 |     elif (week==5):
 65 |         holiday = 0
 66 |         beforeHoliday = 1
 67 |     else:
 68 |         holiday = 0
 69 |         beforeHoliday = 0
 70 |     with open('lcoutput.csv', 'ab') as f:
 71 |         writer = csv.writer(f)
 72 |         for larclass in dailyData:
 73 |             l1, l2, l3, la = getHistory(larclass)
 74 |             if (larclass not in larClasses):
 75 |                 continue
 76 |             else:
 77 |                 larClasses[larclass] = 1
 78 |             try:
 79 |                 writer.writerow([larclass, dayCount, month, 
 80 |                                  day, week, beforeHoliday, holiday, 
 81 |                                  dailyData[larclass][1],
 82 |                                  l1, l2, l3, la,
 83 |                                  totalCount, totalPay, dailyData[larclass][0]])
 84 |             except ZeroDivisionError:
 85 |                 pass
 86 |                 #just neglect it
 87 |         for larclass in larClasses:
 88 |             l1, l2, l3, la = getHistory(larclass)
 89 |             if (larClasses[larclass] == 0):
 90 |                 writer.writerow([larclass, dayCount, month,
 91 |                                  day, week, beforeHoliday, holiday, 
 92 |                                  0,
 93 |                                  l1, l2, l3, la,
 94 |                                  totalCount, totalPay, 0])
 95 |     dataLog.insert(0, dailyData)
 96 |     dataLog.pop()
 97 |     dailyData = {}
 98 |     totalCount = 0
 99 |     totalPay = 0
100 |     dayCount += 1
101 |     for larclass in larClasses:
102 |         larClasses[larclass] = 0
103 |     
104 | with open('example.csv') as f:
105 |     f_csv = csv.reader(f)
106 |     f_csv.next()
107 |     for row in f_csv:
108 |         if (int(row[0]) < 100):
109 |             larClasses[row[0]] = 0;
110 | 
111 | with open('train.csv') as f:
112 |     f_csv = csv.reader(f)
113 |     f_csv.next()
114 |     for row in f_csv:
115 |         lineNum += 1
116 |         
117 |         # check date
118 |         day = int(row[7]) % 100
119 |         month = int(row[7]) / 100 % 100
120 |         tempdate = datetime.datetime(2015, month, day)
121 |         while (date != tempdate):
122 |             writeData()
123 |             date = date.__add__(datetime.timedelta(1))
124 |                 
125 |         larclass = row[1]
126 |         if (larclass in dailyData):
127 |             #float(row[13]) or 1
128 |             dailyData[larclass][0] = dailyData[larclass][0]+1
129 |             totalCount=totalCount+1
130 |             try:
131 |                 totalPay=totalPay+float(row[14])
132 |             except:
133 |                 pass
134 |         else:
135 |             dailyData[larclass] = [1, 0]
136 |             totalCount=totalCount+1
137 |             try:
138 |                 totalPay=totalPay+float(row[14])
139 |             except:
140 |                 pass
141 |         if (row[16]!='\xb7\xf1'):
142 |             dailyData[larclass][1] = 1
143 |     writeData();
144 |         
145 |         
146 |             
147 |             


--------------------------------------------------------------------------------
/tools/dataModify.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Nov 03 20:30:09 2017
 4 | 
 5 | @author: wangjun
 6 | """
 7 | 
 8 | import csv
 9 | import pandas as pd
10 | from statsmodels.tsa.statespace.sarimax import SARIMAX  
11 | import datetime as dt
12 | import arimaPredicter
13 | 
14 | dateToModify = [34, 44, 89, 98, 105, 150, 211]
15 | 
16 | ap = arimaPredicter.predicter()
17 | index = ap.createIndex(dt.datetime(2015,1,1), 243)
18 | 
19 | def getData(csvReader, count):
20 |     data = []
21 |     label = []
22 |     try:
23 |         for x in range(0, count):
24 |             row = csvReader.next()
25 |             data.append(row[:-1])
26 |             label.append(int(row[-1]))
27 |         return int(row[0]), data, label
28 |     except StopIteration:
29 |         return 0, [], []
30 |     
31 | def modifyFile(reader, writer, count):
32 |     global dateToModify, index
33 |     while (True):
34 |         clas, data, label = getData(reader, count) 
35 |         if (clas == 0):
36 |             break     
37 |         data0 = pd.Series(label) 
38 |         data0.index = pd.Index(index)         
39 |         try:
40 |             model = SARIMAX(data0, order=(1,1,1), seasonal_order=(0,1,1,7)) 
41 |             result = model.fit() 
42 |         except:
43 |             print("%d: failed to train sarimax model, abort" % clas)
44 |             for i in range(0, count):
45 |                 writer.writerow(data[i] + [label[i]])
46 |             continue       
47 |         for i in dateToModify:
48 |             label[i] = round(result.predict(i, i)[0])
49 |             if (label[i] < 0):
50 |                 label[i] = 0
51 |         for i in range(0, count):
52 |             writer.writerow(data[i] + [label[i]])
53 |             
54 | f1 = open("data.csv", "r")
55 | reader = csv.reader(f1)
56 | f2 = open('datam.csv', 'wb')
57 | writer = csv.writer(f2)
58 | modifyFile(reader, writer, 243)
59 | f1.close()
60 | f2.close()
61 | 
62 | f1 = open("lcdata.csv", "r")
63 | reader = csv.reader(f1)
64 | f2 = open('lcdatam.csv', 'wb')
65 | writer = csv.writer(f2)
66 | modifyFile(reader, writer, 243)
67 | f1.close()
68 | f2.close()
69 | 
70 |         


--------------------------------------------------------------------------------
/tools/fileChecker.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Nov 10 18:55:56 2017
 4 | 
 5 | @author: wangjun
 6 | """
 7 | 
 8 | import csv
 9 | 
10 | f1 = open("submit1.csv", "r")
11 | f1_csv = csv.reader(f1)
12 | 
13 | f2 = open("submit3.csv", "r")
14 | f2_csv = csv.reader(f2)
15 | 
16 | lineNo = 2
17 | row1 = f1_csv.next()
18 | row2 = f2_csv.next()
19 | 
20 | while (True):
21 |     try:
22 |         row1 = f1_csv.next()
23 |         row2 = f2_csv.next()
24 |     except StopIteration:
25 |         break
26 |     if (int(row1[2])!=int(row2[2])):
27 |         print lineNo
28 |         i = input()
29 |     lineNo += 1


--------------------------------------------------------------------------------
/v3/Readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/Readme.txt


--------------------------------------------------------------------------------
/v3/Version_3.py:
--------------------------------------------------------------------------------
  1 | from sklearn.ensemble import RandomForestRegressor
  2 | import numpy as np
  3 | import csv
  4 | 
  5 | 
  6 | mid_class_num = 134
  7 | large_class_num = 14
  8 | class_codes = []
  9 | train_set_x = {}
 10 | train_set_y = {}
 11 | test_set_x = {}
 12 | test_set_y = {}
 13 | May_set_x = {}
 14 | May_set_y = {}
 15 | large_codes = ['10', '11', '12', '13', '15', '20', '21', '22', '23', '30', '31', '32', '33', '34']
 16 | commit_codes = []
 17 | 
 18 | accumulate_err = 0
 19 | 
 20 | 
 21 | # 载入训练和测试模型的数据（不包括5月份的）
 22 | def load_data():
 23 |     with open('train.csv') as input_file:
 24 |         input_csv = csv.reader(input_file)
 25 |         day = 0
 26 |         for row in input_csv:
 27 |             code = row[0]
 28 |             if day == 0:
 29 |                 class_codes.append(code)
 30 |                 train_set_x[code] = []
 31 |                 train_set_y[code] = []
 32 |             x = list(map(float, row[1:-1]))
 33 |             # 将大类的feature增加一项：预测的当天的对应中类customer之和， 初始化为0
 34 |             if code in large_codes:
 35 |                 x.append(0)
 36 |             train_set_x[code].append(x)
 37 |             train_set_y[code].append(float(row[-1]))
 38 |             day = (day + 1) % 100
 39 |     with open('test.csv') as input_file:
 40 |         input_csv = csv.reader(input_file)
 41 |         day = 0
 42 |         for row in input_csv:
 43 |             code = row[0]
 44 |             if day == 0:
 45 |                 test_set_x[code] = []
 46 |                 test_set_y[code] = []
 47 |             x = list(map(float, row[1:-1]))
 48 |             # 将大类的feature增加一项：预测的当天的对应中类customer之和， 初始化为0
 49 |             if code in large_codes:
 50 |                 x.append(0)
 51 |             test_set_x[code].append(x)
 52 |             test_set_y[code].append(float(row[-1]))
 53 |             day = (day + 1) % 20
 54 | 
 55 | 
 56 | def load_May_data():
 57 |     with open('May_input.csv') as input_file:
 58 |         input_csv = csv.reader(input_file)
 59 |         day = 0
 60 |         for row in input_csv:
 61 |             code = row[0]
 62 |             if code in commit_codes:
 63 |                 if day == 0:
 64 |                     May_set_x[code] = []
 65 |                 May_set_x[code].append(list(map(float, row[1:])))
 66 |                 day = (day + 1) % 30
 67 | 
 68 | 
 69 | # 修改大类feature的最后一项（大类中中类的预测销量和）
 70 | def modify_large_feature(type, class_code, pred):
 71 |     class_code = class_code[:2]
 72 |     if type == 'train':
 73 |         for day in range(len(train_set_x[class_code])):
 74 |             train_set_x[class_code][day][-1] += pred[day]
 75 |     if type == 'test':
 76 |         for day in range(len(test_set_x[class_code])):
 77 |             test_set_x[class_code][day][-1] += pred[day]
 78 | 
 79 | 
 80 | def train_test_eval(train_x, train_y, test_x, test_y, params=None):
 81 |     # train
 82 |     if params is None:
 83 |         rf = RandomForestRegressor()
 84 |     else:
 85 |         rf = RandomForestRegressor(n_estimators=params['n_estimators'], oob_score=params['oob_score'])
 86 |     rf.fit(train_x, train_y)
 87 | 
 88 |     # test
 89 |     ypred = np.asarray(list(map(round, rf.predict(test_x))))
 90 | 
 91 |     # evaluation
 92 |     rmse = np.sqrt(((test_y - ypred) ** 2).mean())
 93 |     global accumulate_err
 94 |     accumulate_err += np.sum((test_y - ypred) ** 2)
 95 | 
 96 |     # this is used for modifying large class feature
 97 |     train_predict = rf.predict(train_x)
 98 | 
 99 |     return rf, ypred, rmse, train_predict
100 | 
101 | 
102 | # 为每一个类训练一个模型，如果params为None，则预测5月份的销量；否则用params测试，不预测5月份，并将结果RMSE写到 调参.txt 中
103 | def run_for_classes(params=None):
104 |     output = []
105 |     for code in class_codes:
106 |         if code not in commit_codes:
107 |             continue
108 |         model, ypred, rmse, train_predict = train_test_eval(train_set_x[code], train_set_y[code], test_set_x[code], test_set_y[code], params)
109 |         if code in large_codes:
110 |             modify_large_feature('train', code, train_predict)
111 |             modify_large_feature('test', code, ypred)
112 |         if params is None:
113 |             print('class: ', code, '    RMSE: ', rmse)
114 | 
115 |             # prediction for May
116 |             predict_May(model, code)
117 | 
118 |         else:
119 |             output.append('class: ' + code + '  RMSE: ' + str(rmse) + '\n')
120 | 
121 |     if params is not None:
122 |         global accumulate_err
123 |         with open('调参.txt', 'a') as output_file:
124 |             output_file.write('n_estimators=' + str(params['n_estimators']) + '  oob_score=' + str(params['oob_score']) + '\n')
125 |             output_file.writelines(output)
126 |             output_file.write('total RMSE: ' + str(accumulate_err / 2960))
127 |         accumulate_err = 0
128 | 
129 | 
130 | # 调参
131 | def run_for_classes_params():
132 |     for n_estimators in range(50, 160, 10):
133 |         params = {'n_estimators': n_estimators, 'oob_score': False}
134 |         run_for_classes(params)
135 |         params = {'n_estimators': n_estimators, 'oob_score': True}
136 |         run_for_classes(params)
137 | 
138 | 
139 | def predict_May(rfmodel, code):
140 |     ypred = rfmodel.predict(May_set_x[code])
141 |     ypred = list(map(round, ypred))
142 |     May_set_y[code] = ypred
143 |     large_code = code[:2]
144 |     for day in range(30):
145 |         May_set_x[large_code][day][-1] += ypred[day]
146 | 
147 | 
148 | # 获取提交文件中需要提交的codes，保存在commit_codes中
149 | def codes_list_out():
150 |     global commit_codes
151 |     codes = [0]
152 |     with open('commit_empty.csv') as native_set_file:
153 |         native_csv = csv.reader(native_set_file)
154 |         next(native_csv)
155 |         for row in native_csv:
156 |             if row[0] != codes[-1]:
157 |                 codes.append(row[0])
158 |     commit_codes = codes[1:]
159 | 
160 | if __name__ == '__main__':
161 |     load_data()
162 |     codes_list_out()
163 |     load_May_data()
164 | 
165 |     # 不调参，用默认参数预测5月份，结果保存在字典May_set_y中
166 |     run_for_classes()
167 |     # write the predicted results of May
168 |     with open('submit.csv', 'w', newline='') as output_file:
169 |         output_csv = csv.writer(output_file)
170 |         output_csv.writerow(['编码', '日期', '销量'])
171 |         for code in commit_codes:
172 |             for day in range(30):
173 |                 output_csv.writerow([code, str(20150501 + day), str(int(May_set_y[code][day]))])
174 | 
175 |     '''
176 |     # 调参时调用
177 |     run_for_classes_params()'''
178 | 


--------------------------------------------------------------------------------
/v3/commit_empty.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/commit_empty.csv


--------------------------------------------------------------------------------
/v3/five_fold.py:
--------------------------------------------------------------------------------
  1 | # 五折交叉，用于v5的stacking
  2 | 
  3 | import csv
  4 | import numpy as np
  5 | from sklearn.ensemble import RandomForestRegressor
  6 | 
  7 | 
  8 | commit_codes = []
  9 | all_x = {}
 10 | all_y = {}
 11 | all_pred = {}
 12 | 
 13 | 
 14 | def load_all_data():
 15 |     global all_x, all_y
 16 |     with open('features.csv') as input_file:
 17 |         input_csv = csv.reader(input_file)
 18 |         next(input_csv)
 19 |         for row in input_csv:
 20 |             feature = list(map(float, row[1:-1]))
 21 |             if len(row[0]) == 2:
 22 |                 feature.append(0)
 23 |             if row[0] not in all_x:
 24 |                 all_x[row[0]] = [feature]
 25 |                 all_y[row[0]] = [float(row[-1])]
 26 |             else:
 27 |                 all_x[row[0]].append(feature)
 28 |                 all_y[row[0]].append(float(row[-1]))
 29 | 
 30 | 
 31 | def get_day(date):
 32 |     date = int(date)
 33 |     if date < 20150132:
 34 |         return date - 20150100
 35 |     elif date < 20150229:
 36 |         return date - 20150200 + 31
 37 |     elif date < 20150332:
 38 |         return date - 20150300 + 59
 39 |     else:
 40 |         return date - 20150400 + 89
 41 | 
 42 | 
 43 | # 用不同参数调用five_fold_params_pred
 44 | def five_fold_pred():
 45 |     global all_pred
 46 |     for n_estimators in range(50, 160, 10):
 47 |         print('n_estimators= ', n_estimators)
 48 |         params = {'n_estimators': n_estimators, 'oob_score': False}
 49 |         five_fold_params_pred(params)
 50 |         # 写回文件
 51 |         output = []
 52 |         with open('five_fold_feature.csv') as input_file:
 53 |             input_csv = csv.reader(input_file)
 54 |             output.append(next(input_csv))
 55 |             for row in input_csv:
 56 |                 output.append(row + [str(all_pred[row[0]][get_day(row[1])-1])])
 57 |         with open('five_fold_feature_v3.csv', 'w', newline='') as output_file:
 58 |             output_csv = csv.writer(output_file)
 59 |             for row in output:
 60 |                 output_csv.writerow(row)
 61 | 
 62 |         # 清空all_pred
 63 |         all_pred = {}
 64 | 
 65 | 
 66 | # 用指定参数，5折交叉
 67 | def five_fold_params_pred(params):
 68 |     global commit_codes, all_pred
 69 |     for code in commit_codes:
 70 |         if code not in all_pred:
 71 |             all_pred[code] = np.zeros(120)
 72 |         if code not in all_x:              # 部分商品类原始数据里没有
 73 |             continue
 74 |         for i in range(5):
 75 |             train_x, train_y, test_x = get_fold_set(code, i)
 76 |             rf = RandomForestRegressor(n_estimators=params['n_estimators'], oob_score=params['oob_score'])
 77 |             rf.fit(train_x, train_y)
 78 |             ypred = rf.predict(test_x)
 79 |             # 存入all_pred
 80 |             for index in range(24):
 81 |                 all_pred[code][i*24+index] = ypred[index]
 82 | 
 83 |         # 修改对应大类的最后一个特征值
 84 |         large_code = code[:2]
 85 |         for day in range(120):
 86 |             all_x[large_code][day][-1] += all_pred[code][day]
 87 | 
 88 | 
 89 | def get_fold_set(code, fold_index):
 90 |     train_x, train_y, test_x = [], [], []
 91 |     for i in range(120):
 92 |         if (i >= fold_index * 24) and (i < (fold_index + 1) * 24):
 93 |             test_x.append(all_x[code][i])
 94 |         else:
 95 |             train_x.append(all_x[code][i])
 96 |             train_y.append(all_y[code][i])
 97 |     return train_x, train_y, test_x
 98 | 
 99 | 
100 | # 获取提交文件中需要提交的codes，保存在commit_codes中
101 | def codes_list_out():
102 |     global commit_codes
103 |     codes = [0]
104 |     with open('commit_empty.csv') as native_set_file:
105 |         native_csv = csv.reader(native_set_file)
106 |         next(native_csv)
107 |         for row in native_csv:
108 |             if row[0] != codes[-1]:
109 |                 codes.append(row[0])
110 |     commit_codes = codes[1:]
111 | 
112 | 
113 | # 初始化结果文件
114 | def initialize_file():
115 |     global commit_codes
116 |     with open('five_fold_feature.csv', 'w', newline='') as output_file:
117 |         output_csv = csv.writer(output_file)
118 |         output_csv.writerow(['code', 'date', 'models'])
119 |         for code in commit_codes:
120 |             for date in range(20150101, 20150132):
121 |                 output_csv.writerow([code, str(date)])
122 |             for date in range(20150201, 20150229):
123 |                 output_csv.writerow([code, str(date)])
124 |             for date in range(20150301, 20150332):
125 |                 output_csv.writerow([code, str(date)])
126 |             for date in range(20150401, 20150431):
127 |                 output_csv.writerow([code, str(date)])
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     codes_list_out()
132 |     initialize_file()
133 |     load_all_data()
134 |     five_fold_pred()
135 | 


--------------------------------------------------------------------------------
/v3/preparedata.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | codes = []
 5 | 
 6 | 
 7 | # 计算1-4月份特征保存在features.csv中
 8 | def get_features():
 9 |     holidays = [0, 1, 2, 41, 44, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 93, 94, 95]
10 | 
11 |     def get_date_in_month(day):
12 |         if day <= 31:
13 |             return day
14 |         elif day <= 59:
15 |             return day - 31
16 |         elif day <= 90:
17 |             return day - 59
18 |         else:
19 |             return day - 90
20 | 
21 |     with open('timeseries_customers.csv') as customers_file,\
22 |      open('timeseries_discounts.csv') as discounts_file,\
23 |      open('features.csv', 'w', newline='') as output_file:
24 |         input_customers = csv.reader(customers_file)
25 |         input_discounts = csv.reader(discounts_file)
26 |         output_csv = csv.writer(output_file)
27 |         next(input_customers)
28 |         next(input_discounts)
29 |         output_csv.writerow(['class', 'day_in_week', 'day_in_month', 'holiday', 'discount', 'label'])      # 中类特征
30 |         for row in input_customers:
31 |             class_code = row[0]
32 |             discount_row = next(input_discounts)
33 |             for day in range(1, 121):
34 |                 feature_row = []
35 |                 feature_row.append(class_code)
36 |                 day_in_week = day % 7 + 4
37 |                 feature_row.append(str(day_in_week))
38 |                 feature_row.append(str(get_date_in_month(day)))
39 |                 if day in holidays:
40 |                     feature_row.append('1')
41 |                 else:
42 |                     feature_row.append('0')
43 |                 feature_row.append(discount_row[day])
44 |                 feature_row.append(row[day])
45 |                 output_csv.writerow(feature_row)
46 | 
47 | 
48 | def divide_train_test_set():
49 |     with open('features.csv') as input_file,\
50 |      open('train.csv', 'w', newline='') as train_file,\
51 |      open('test.csv', 'w', newline='') as test_file:
52 |         input_csv = csv.reader(input_file)
53 |         train_csv = csv.writer(train_file)
54 |         test_csv = csv.writer(test_file)
55 |         next(input_csv)
56 |         day = 0
57 |         for row in input_csv:
58 |             if day < 100:
59 |                 train_csv.writerow(row)
60 |                 day += 1
61 |             else:
62 |                 test_csv.writerow(row)
63 |                 day = (day + 1) % 120
64 | 
65 | 
66 | # 计算5月份特征并保存在May_input.csv中，其中大类最后一个特征（大类中中类的预测销量之和）需一边预测一边修改
67 | def compute_May_features():
68 |     def codes_list_out():
69 |         global codes
70 |         codes = [0]
71 |         with open('commit_empty.csv') as native_set_file:
72 |             native_csv = csv.reader(native_set_file)
73 |             next(native_csv)
74 |             for row in native_csv:
75 |                 if row[0] != codes[-1]:
76 |                     codes.append(row[0])
77 |         codes = codes[1:]
78 |         print(codes)
79 | 
80 |     codes_list_out()
81 |     with open('May_input.csv', 'w', newline='') as output_file:
82 |         output_csv = csv.writer(output_file)
83 |         for code in codes:
84 |             for day in range(1, 31):
85 |                 feature = [code, str(day % 7 + 4), str(day), '0', '0']
86 |                 if len(code) == 2:      # 大类
87 |                     feature.append('0')
88 |                 output_csv.writerow(feature)
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     get_features()
93 |     divide_train_test_set()
94 |     compute_May_features()
95 | 


--------------------------------------------------------------------------------
/v3/submit.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/submit.csv


--------------------------------------------------------------------------------
/v3/timeseries_customers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/timeseries_customers.csv


--------------------------------------------------------------------------------
/v3/timeseries_discounts.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/timeseries_discounts.csv


--------------------------------------------------------------------------------
/v5/KNN_interface.py:
--------------------------------------------------------------------------------
 1 | # version 5 KNN算法的调用接口
 2 | 
 3 | from sklearn.neighbors import KNeighborsRegressor
 4 | import numpy as np
 5 | import csv
 6 | 
 7 | 
 8 | # data: array of float, 销量数据
 9 | # pred_length: int，需预测的天数
10 | # D_window（窗口长度）和max_k（最大的k值）
11 | def knn(data, pred_length, D_window=14, max_k=7):
12 |     if pred_length + D_window >= len(data):
13 |         print('ERROR: pred_length or D_window too long')
14 |         return None
15 | 
16 |     ret_ypred = []
17 |     for h in range(4):
18 |         train_feature, train_label = get_train_set(data, h, D_window, pred_length)
19 | 
20 |         e_LOO_arr = np.zeros(max_k)
21 |         for k in range(2, max_k + 1):
22 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
23 |             model.fit(train_feature, train_label)
24 | 
25 |             # 获取k近邻
26 |             dist_list, index_list = model.kneighbors([data[0 - D_window:]])
27 |             k_neighbor_label = []
28 |             for i in index_list[0]:
29 |                 k_neighbor_label.append(train_label[i])
30 | 
31 |             # 基于k近邻的预测值
32 |             ypred = model.predict([data[0-D_window:]])
33 |             ypred = np.asarray(list(map(round, ypred[0])))
34 | 
35 |             # 计算e_LOO
36 |             e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
37 | 
38 |         # 取e_LOO最小的k值
39 |         k_min = np.argmin(e_LOO_arr[1:]) + 2
40 |         model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
41 |         model.fit(train_feature, train_label)
42 |         ypred = model.predict([data[0 - D_window:]])
43 |         ret_ypred += list(map(round, ypred[0]))
44 | 
45 |     return np.asarray(ret_ypred)
46 | 
47 | 
48 | def get_train_set(train_data, h, D, pred_length):
49 |     feature, label = [], []
50 |     block_len = int(pred_length / 4)
51 |     if h != 3:
52 |         for i in range(len(train_data) - D - block_len * (h + 1) + 1):
53 |             feature.append(train_data[i:i + D])
54 |             label.append(train_data[i + D + block_len * h:i + D + block_len * h + block_len])
55 |     else:
56 |         for i in range(len(train_data) - D - pred_length + 1):
57 |             feature.append(train_data[i:i + D])
58 |             label.append(train_data[i + D + 3 * block_len:i + D + pred_length])
59 |     return np.array(feature), np.array(label)
60 | 
61 | 
62 | # 计算LOO，用于k（近邻数）的选择
63 | def LOO(k_neighbor_label, ypred, k):
64 |     ret = 0
65 |     for neighbor in k_neighbor_label:
66 |         ret = ret + ((neighbor - ypred) ** 2).sum()
67 |     ret = ret * k / (k - 1)**2
68 |     # ret = ret / (k)**2
69 |     return ret
70 | 
71 | 
72 | def test():
73 |     with open('timeseries_customers_processed.csv') as input_file:
74 |         input_csv = csv.reader(input_file)
75 |         next(input_csv)
76 |         row = next(input_csv)
77 |         data = list(map(float, row[1:]))
78 |         print(knn(data, 30))
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     test()
83 | 


--------------------------------------------------------------------------------
/v5/Readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/Readme.txt


--------------------------------------------------------------------------------
/v5/Version_5.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | from sklearn.neighbors import KNeighborsRegressor
  3 | import numpy as np
  4 | 
  5 | from modify_submit import change_pred
  6 | 
  7 | 
  8 | def main_fun():
  9 |     class_codes = ['1201', '2011', '12', '15', '20', '22', '23', '30']
 10 |     with open('timeseries_customers_processed.csv') as input_file:
 11 |         input_csv = csv.reader(input_file)
 12 |         next(input_csv)
 13 |         for row in input_csv:
 14 |             if row[0] in class_codes:
 15 |                 # MIMO_KNN_test(row)
 16 |                 # MIMO_KNN_LOO_test(row)
 17 |                 MIMO_KNN_LOO_May(row)
 18 | 
 19 | 
 20 | # 划分数据集测试不同参数（D_window, k），没有预测5月份销量
 21 | def MIMO_KNN_test(data):
 22 |     code = data[0]
 23 |     data = list(map(float, data[1:]))
 24 |     train_data = data[:90]
 25 |     test_data = data[90:]
 26 | 
 27 |     # 对4个时间段分别训练模型，时间段分别为7天、7天、7天、9天
 28 |     D_window = 14
 29 |     for h in range(4):
 30 |         train_feature, train_label = get_train_set(train_data, h, D_window)
 31 |         y_label = get_test_label(test_data, h)
 32 | 
 33 |         for k in range(1, 8):
 34 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
 35 |             model.fit(train_feature, train_label)
 36 | 
 37 |             ypred = model.predict([train_data[0-D_window:]])
 38 |             ypred = np.array(list(map(round, ypred[0])))
 39 | 
 40 |             rmse = np.sqrt(((ypred - y_label) ** 2).mean())
 41 |             print(code, '  h=', h, '  k=', k, '  rmse=', rmse)
 42 | 
 43 | 
 44 | # 划分数据集，实现论文里的方法，没有预测5月份销量
 45 | def MIMO_KNN_LOO_test(data):
 46 |     code = data[0]
 47 |     data = list(map(float, data[1:]))
 48 |     train_data = data[:90]
 49 |     test_data = data[90:]
 50 | 
 51 |     # 对4个时间段分别训练模型，时间段分别为7天、7天、7天、9天
 52 |     D_window = 14
 53 |     max_k = 7
 54 |     for h in range(4):
 55 |         train_feature, train_label = get_train_set(train_data, h, D_window)
 56 |         y_label = get_test_label(test_data, h)
 57 | 
 58 |         e_LOO_arr = np.zeros(max_k)
 59 |         for k in range(2, max_k + 1):
 60 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
 61 |             model.fit(train_feature, train_label)
 62 | 
 63 |             # 获取k近邻
 64 |             dist_list, index_list = model.kneighbors([train_data[0 - D_window:]])
 65 |             k_neighbor_label = []
 66 |             for i in index_list[0]:
 67 |                 k_neighbor_label.append(train_label[i])
 68 | 
 69 |             # 基于k近邻的预测值
 70 |             ypred = model.predict([train_data[0-D_window:]])
 71 |             ypred = np.asarray(list(map(round, ypred[0])))
 72 |             rmse = np.sqrt(((ypred - y_label) ** 2).mean())
 73 |             print(code, '  h=', h, '  k=', k, '  rmse=', rmse)
 74 | 
 75 |             # 计算e_LOO
 76 |             e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
 77 | 
 78 |         # 取e_LOO最小的k值
 79 |         k_min = np.argmin(e_LOO_arr[1:]) + 2
 80 |         print('k_min=', k_min)
 81 | 
 82 | 
 83 | # 使用整个数据集，实现论文里的方法，预测5月份销量
 84 | def MIMO_KNN_LOO_May(data):
 85 |     code = data[0]
 86 |     data = list(map(float, data[1:]))
 87 | 
 88 |     D_window = 14
 89 |     max_k = 7
 90 |     pred_May = []
 91 |     for h in range(4):
 92 |         train_feature, train_label = get_train_set(data, h, D_window)
 93 |         e_LOO_arr = np.zeros(max_k)
 94 |         for k in range(2, max_k + 1):
 95 |             model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
 96 |             model.fit(train_feature, train_label)
 97 | 
 98 |             # 获取k近邻
 99 |             dist_list, index_list = model.kneighbors([data[0 - D_window:]])
100 |             k_neighbor_label = []
101 |             for i in index_list[0]:
102 |                 k_neighbor_label.append(train_label[i])
103 | 
104 |             # 基于k近邻的预测值
105 |             ypred = model.predict([data[0 - D_window:]])
106 |             ypred = np.asarray(list(map(round, ypred[0])))
107 | 
108 |             # 计算e_LOO
109 |             e_LOO_arr[k - 1] = LOO(k_neighbor_label, ypred, k)
110 | 
111 |         # 取e_LOO最小的k值
112 |         k_min = np.argmin(e_LOO_arr[1:]) + 2
113 | 
114 |         # 令k=k_min，做预测
115 |         model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
116 |         model.fit(train_feature, train_label)
117 |         ypred = model.predict([data[0 - D_window:]])
118 |         ypred = list(map(round, ypred[0]))
119 |         pred_May = pred_May + ypred
120 | 
121 |     print(pred_May)
122 |     # 替换文件里编码为code的预测值
123 |     change_pred(code, pred_May)
124 | 
125 | 
126 | # 计算LOO，用于k（近邻数）的选择
127 | def LOO(k_neighbor_label, ypred, k):
128 |     ret = 0
129 |     for neighbor in k_neighbor_label:
130 |         ret = ret + ((neighbor - ypred) ** 2).sum()
131 |     ret = ret * k / (k - 1)**2
132 |     # ret = ret / (k)**2
133 |     return ret
134 | 
135 | 
136 | def get_train_set(train_data, h, D):
137 |     feature, label = [], []
138 |     if h != 3:
139 |         for i in range(len(train_data) - D - 7 * (h+1) + 1):
140 |             feature.append(train_data[i:i+D])
141 |             label.append(train_data[i+D+7*h:i+D+7*h+7])
142 |     else:
143 |         for i in range(len(train_data) - D - 30 + 1):
144 |             feature.append(train_data[i:i+D])
145 |             label.append(train_data[i+D+21:i+D+30])
146 |     return np.array(feature), np.array(label)
147 | 
148 | 
149 | def get_test_label(test_data, h):
150 |     if h != 3:
151 |         return test_data[7*h:7*h+7]
152 |     else:
153 |         return test_data[21:]
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     main_fun()
158 | 


--------------------------------------------------------------------------------
/v5/commit_empty.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/commit_empty.csv


--------------------------------------------------------------------------------
/v5/modify_submit.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | 
 4 | # 读取原预测文件，预测结果取整再写回去
 5 | def get_round():
 6 |     rows = []
 7 |     with open('submit.csv') as input_file:
 8 |         input_csv = csv.reader(input_file)
 9 |         rows.append(next(input_csv))
10 |         for row in input_csv:
11 |             row[2] = str(int(round(float(row[2]))))
12 |             rows.append(row)
13 |     with open('submit.csv', 'w', newline='') as output_file:
14 |         output_csv = csv.writer(output_file)
15 |         for row in rows:
16 |             output_csv.writerow(row)
17 | 
18 | 
19 | # 将预测文件中编码为code的类别预测值用pred替换
20 | def change_pred(code, pred):
21 |     rows = []
22 |     file_name = 'submit_WJ_2.csv'
23 |     with open(file_name) as input_file:
24 |         input_csv = csv.reader(input_file)
25 |         rows.append(next(input_csv))
26 |         i = 0
27 |         for row in input_csv:
28 |             if row[0] == code:
29 |                 rows.append([code, row[1], str(pred[i])])
30 |                 i += 1
31 |             else:
32 |                 rows.append(row)
33 |     with open(file_name, 'w', newline='') as output_file:
34 |         output_csv = csv.writer(output_file)
35 |         for row in rows:
36 |             output_csv.writerow(row)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     get_round()
41 | 


--------------------------------------------------------------------------------
/v5/submit.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/submit.csv


--------------------------------------------------------------------------------
/v5/timeseries_customers_processed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/timeseries_customers_processed.csv


--------------------------------------------------------------------------------
/v5/调参.txt:
--------------------------------------------------------------------------------
  1 | 1201   h= 0   k= 1   rmse= 42.6430366113
  2 | 1201   h= 0   k= 2   rmse= 38.3723718188
  3 | 1201   h= 0   k= 3   rmse= 32.4761047772
  4 | 1201   h= 0   k= 4   rmse= 24.5028359411
  5 | 1201   h= 0   k= 5   rmse= 24.2706714272
  6 | 1201   h= 0   k= 6   rmse= 23.3642368486
  7 | 1201   h= 0   k= 7   rmse= 25.0468866937
  8 | 1201   h= 1   k= 1   rmse= 31.5300310362
  9 | 1201   h= 1   k= 2   rmse= 28.4413534227
 10 | 1201   h= 1   k= 3   rmse= 23.3969834544
 11 | 1201   h= 1   k= 4   rmse= 21.5311928775
 12 | 1201   h= 1   k= 5   rmse= 17.0535644291
 13 | 1201   h= 1   k= 6   rmse= 15.5524493595
 14 | 1201   h= 1   k= 7   rmse= 17.4633370662
 15 | 1201   h= 2   k= 1   rmse= 38.0844925321
 16 | 1201   h= 2   k= 2   rmse= 28.5215343758
 17 | 1201   h= 2   k= 3   rmse= 23.8729502535
 18 | 1201   h= 2   k= 4   rmse= 24.2934772971
 19 | 1201   h= 2   k= 5   rmse= 24.4475628222
 20 | 1201   h= 2   k= 6   rmse= 26.420499459
 21 | 1201   h= 2   k= 7   rmse= 24.835233271
 22 | 1201   h= 3   k= 1   rmse= 36.0986303218
 23 | 1201   h= 3   k= 2   rmse= 29.8369045237
 24 | 1201   h= 3   k= 3   rmse= 29.5872383746
 25 | 1201   h= 3   k= 4   rmse= 33.2802005146
 26 | 1201   h= 3   k= 5   rmse= 31.1755055639
 27 | 1201   h= 3   k= 6   rmse= 31.1048216768
 28 | 1201   h= 3   k= 7   rmse= 29.6407753314
 29 | 12   h= 0   k= 1   rmse= 53.8582796394
 30 | 12   h= 0   k= 2   rmse= 44.5673280603
 31 | 12   h= 0   k= 3   rmse= 34.0183858095
 32 | 12   h= 0   k= 4   rmse= 32.9145546344
 33 | 12   h= 0   k= 5   rmse= 29.392663649
 34 | 12   h= 0   k= 6   rmse= 26.9584608416
 35 | 12   h= 0   k= 7   rmse= 28.3394613257
 36 | 12   h= 1   k= 1   rmse= 69.6593753305
 37 | 12   h= 1   k= 2   rmse= 40.9992685927
 38 | 12   h= 1   k= 3   rmse= 40.0368671351
 39 | 12   h= 1   k= 4   rmse= 41.3502184792
 40 | 12   h= 1   k= 5   rmse= 36.9253007747
 41 | 12   h= 1   k= 6   rmse= 34.0657271442
 42 | 12   h= 1   k= 7   rmse= 33.3902866927
 43 | 12   h= 2   k= 1   rmse= 44.7931755006
 44 | 12   h= 2   k= 2   rmse= 52.5043373322
 45 | 12   h= 2   k= 3   rmse= 47.9634527977
 46 | 12   h= 2   k= 4   rmse= 45.7079428355
 47 | 12   h= 2   k= 5   rmse= 41.747281894
 48 | 12   h= 2   k= 6   rmse= 40.4535372974
 49 | 12   h= 2   k= 7   rmse= 39.1421180701
 50 | 12   h= 3   k= 1   rmse= 43.1470354331
 51 | 12   h= 3   k= 2   rmse= 44.093202437
 52 | 12   h= 3   k= 3   rmse= 36.2472381216
 53 | 12   h= 3   k= 4   rmse= 34.4079060788
 54 | 12   h= 3   k= 5   rmse= 38.8412886812
 55 | 12   h= 3   k= 6   rmse= 38.0493486071
 56 | 12   h= 3   k= 7   rmse= 37.641989556
 57 | 15   h= 0   k= 1   rmse= 14.0813960347
 58 | 15   h= 0   k= 2   rmse= 12.2485595361
 59 | 15   h= 0   k= 3   rmse= 13.2287257726
 60 | 15   h= 0   k= 4   rmse= 12.7065671652
 61 | 15   h= 0   k= 5   rmse= 11.87624956
 62 | 15   h= 0   k= 6   rmse= 13.4246432771
 63 | 15   h= 0   k= 7   rmse= 13.2146747442
 64 | 15   h= 1   k= 1   rmse= 9.57675757834
 65 | 15   h= 1   k= 2   rmse= 10.9716280299
 66 | 15   h= 1   k= 3   rmse= 11.3157687678
 67 | 15   h= 1   k= 4   rmse= 10.8028703988
 68 | 15   h= 1   k= 5   rmse= 12.8554179205
 69 | 15   h= 1   k= 6   rmse= 12.5323010033
 70 | 15   h= 1   k= 7   rmse= 12.388370937
 71 | 15   h= 2   k= 1   rmse= 20.4904158781
 72 | 15   h= 2   k= 2   rmse= 12.0169917472
 73 | 15   h= 2   k= 3   rmse= 11.8937080614
 74 | 15   h= 2   k= 4   rmse= 11.1624704725
 75 | 15   h= 2   k= 5   rmse= 11.7760027097
 76 | 15   h= 2   k= 6   rmse= 12.1629264637
 77 | 15   h= 2   k= 7   rmse= 13.0868435863
 78 | 15   h= 3   k= 1   rmse= 31.3581462037
 79 | 15   h= 3   k= 2   rmse= 20.8123719358
 80 | 15   h= 3   k= 3   rmse= 18.6966951072
 81 | 15   h= 3   k= 4   rmse= 13.589438938
 82 | 15   h= 3   k= 5   rmse= 12.2614526733
 83 | 15   h= 3   k= 6   rmse= 13.2781398948
 84 | 15   h= 3   k= 7   rmse= 12.7837448865
 85 | 20   h= 0   k= 1   rmse= 19.6650523954
 86 | 20   h= 0   k= 2   rmse= 17.5970236087
 87 | 20   h= 0   k= 3   rmse= 16.9153042486
 88 | 20   h= 0   k= 4   rmse= 16.7590587119
 89 | 20   h= 0   k= 5   rmse= 16.8402285481
 90 | 20   h= 0   k= 6   rmse= 16.5004886991
 91 | 20   h= 0   k= 7   rmse= 15.6761515174
 92 | 20   h= 1   k= 1   rmse= 11.5201686496
 93 | 20   h= 1   k= 2   rmse= 9.63450635631
 94 | 20   h= 1   k= 3   rmse= 7.58729930368
 95 | 20   h= 1   k= 4   rmse= 8.10614695274
 96 | 20   h= 1   k= 5   rmse= 9.25628043697
 97 | 20   h= 1   k= 6   rmse= 7.5033965945
 98 | 20   h= 1   k= 7   rmse= 7.24532229897
 99 | 20   h= 2   k= 1   rmse= 22.5134880207
100 | 20   h= 2   k= 2   rmse= 19.3281403697
101 | 20   h= 2   k= 3   rmse= 17.5829934327
102 | 20   h= 2   k= 4   rmse= 16.5980608769
103 | 20   h= 2   k= 5   rmse= 17.2165487439
104 | 20   h= 2   k= 6   rmse= 16.4409280022
105 | 20   h= 2   k= 7   rmse= 15.524009627
106 | 20   h= 3   k= 1   rmse= 26.1023626006
107 | 20   h= 3   k= 2   rmse= 33.697276393
108 | 20   h= 3   k= 3   rmse= 29.951316307
109 | 20   h= 3   k= 4   rmse= 32.3881422814
110 | 20   h= 3   k= 5   rmse= 25.7046785895
111 | 20   h= 3   k= 6   rmse= 25.2773090489
112 | 20   h= 3   k= 7   rmse= 22.2029257966
113 | 22   h= 0   k= 1   rmse= 43.8438788691
114 | 22   h= 0   k= 2   rmse= 41.6797254107
115 | 22   h= 0   k= 3   rmse= 38.884937818
116 | 22   h= 0   k= 4   rmse= 32.1492490424
117 | 22   h= 0   k= 5   rmse= 31.6650243057
118 | 22   h= 0   k= 6   rmse= 33.4756367623
119 | 22   h= 0   k= 7   rmse= 33.0505710863
120 | 22   h= 1   k= 1   rmse= 29.3841551083
121 | 22   h= 1   k= 2   rmse= 23.6434437635
122 | 22   h= 1   k= 3   rmse= 19.4591808033
123 | 22   h= 1   k= 4   rmse= 19.5137028576
124 | 22   h= 1   k= 5   rmse= 16.9122752479
125 | 22   h= 1   k= 6   rmse= 17.1294598104
126 | 22   h= 1   k= 7   rmse= 16.8492357209
127 | 22   h= 2   k= 1   rmse= 38.7243002335
128 | 22   h= 2   k= 2   rmse= 32.7553576964
129 | 22   h= 2   k= 3   rmse= 33.8979605757
130 | 22   h= 2   k= 4   rmse= 30.356791558
131 | 22   h= 2   k= 5   rmse= 28.7923710234
132 | 22   h= 2   k= 6   rmse= 25.7167889433
133 | 22   h= 2   k= 7   rmse= 31.4764977499
134 | 22   h= 3   k= 1   rmse= 127.657092765
135 | 22   h= 3   k= 2   rmse= 109.449161924
136 | 22   h= 3   k= 3   rmse= 86.6052707902
137 | 22   h= 3   k= 4   rmse= 75.037769277
138 | 22   h= 3   k= 5   rmse= 69.8723081655
139 | 22   h= 3   k= 6   rmse= 62.136255331
140 | 22   h= 3   k= 7   rmse= 57.9065491146
141 | 23   h= 0   k= 1   rmse= 5.63154381269
142 | 23   h= 0   k= 2   rmse= 4.63173518114
143 | 23   h= 0   k= 3   rmse= 4.83828724199
144 | 23   h= 0   k= 4   rmse= 5.80938123219
145 | 23   h= 0   k= 5   rmse= 5.68562994596
146 | 23   h= 0   k= 6   rmse= 4.35150609445
147 | 23   h= 0   k= 7   rmse= 3.76632918272
148 | 23   h= 1   k= 1   rmse= 6.4142698059
149 | 23   h= 1   k= 2   rmse= 6.7005057888
150 | 23   h= 1   k= 3   rmse= 7.15141892803
151 | 23   h= 1   k= 4   rmse= 6.56498281641
152 | 23   h= 1   k= 5   rmse= 9.63436179487
153 | 23   h= 1   k= 6   rmse= 10.8737237449
154 | 23   h= 1   k= 7   rmse= 10.0319859996
155 | 23   h= 2   k= 1   rmse= 5.8064004094
156 | 23   h= 2   k= 2   rmse= 5.92811066778
157 | 23   h= 2   k= 3   rmse= 7.75757150579
158 | 23   h= 2   k= 4   rmse= 7.67441476216
159 | 23   h= 2   k= 5   rmse= 7.50348547005
160 | 23   h= 2   k= 6   rmse= 6.80538086404
161 | 23   h= 2   k= 7   rmse= 7.34530943247
162 | 23   h= 3   k= 1   rmse= 31.3209195267
163 | 23   h= 3   k= 2   rmse= 26.1043465949
164 | 23   h= 3   k= 3   rmse= 26.5938520968
165 | 23   h= 3   k= 4   rmse= 20.6796134379
166 | 23   h= 3   k= 5   rmse= 18.5126222927
167 | 23   h= 3   k= 6   rmse= 17.3251045002
168 | 23   h= 3   k= 7   rmse= 15.8480509754
169 | 30   h= 0   k= 1   rmse= 17.4396920025
170 | 30   h= 0   k= 2   rmse= 15.286577467
171 | 30   h= 0   k= 3   rmse= 14.799299084
172 | 30   h= 0   k= 4   rmse= 13.4640063851
173 | 30   h= 0   k= 5   rmse= 12.5409692762
174 | 30   h= 0   k= 6   rmse= 12.2409109388
175 | 30   h= 0   k= 7   rmse= 11.5748669357
176 | 30   h= 1   k= 1   rmse= 12.5470542929
177 | 30   h= 1   k= 2   rmse= 11.1238979074
178 | 30   h= 1   k= 3   rmse= 10.0442549022
179 | 30   h= 1   k= 4   rmse= 10.4489658474
180 | 30   h= 1   k= 5   rmse= 9.76706560335
181 | 30   h= 1   k= 6   rmse= 9.5492204658
182 | 30   h= 1   k= 7   rmse= 8.02405103911
183 | 30   h= 2   k= 1   rmse= 13.7995859151
184 | 30   h= 2   k= 2   rmse= 11.9178406627
185 | 30   h= 2   k= 3   rmse= 10.9421559178
186 | 30   h= 2   k= 4   rmse= 11.4155579741
187 | 30   h= 2   k= 5   rmse= 11.1698815423
188 | 30   h= 2   k= 6   rmse= 10.3885622264
189 | 30   h= 2   k= 7   rmse= 9.75646178754
190 | 30   h= 3   k= 1   rmse= 18.5082564159
191 | 30   h= 3   k= 2   rmse= 16.269989379
192 | 30   h= 3   k= 3   rmse= 13.5020710817
193 | 30   h= 3   k= 4   rmse= 13.4553609332
194 | 30   h= 3   k= 5   rmse= 12.1959416121
195 | 30   h= 3   k= 6   rmse= 12.5498267393
196 | 30   h= 3   k= 7   rmse= 12.7345557171


--------------------------------------------------------------------------------
/v6_stacking/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.1 (C:\Software\Anaconda\Anaconda3\python.exe)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/v6_stacking/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/v6_stacking.iml" filepath="$PROJECT_DIR$/.idea/v6_stacking.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/v6_stacking/.idea/v6_stacking.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="projectConfiguration" value="Nosetests" />
10 |     <option name="PROJECT_TEST_RUNNER" value="Nosetests" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/v6_stacking/Version6_stacking.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from sklearn import linear_model
 3 | from sklearn.ensemble import RandomForestRegressor
 4 | import xgboost as xgb
 5 | import numpy as np
 6 | 
 7 | 
 8 | train_set_x = {}
 9 | train_set_y = {}
10 | test_set_x = {}
11 | test_set_y = {}
12 | commit_codes = []
13 | 
14 | 
15 | # 载入训练和测试模型的数据（不包括5月份的）
16 | def load_data():
17 |     with open('train.csv') as input_file:
18 |         input_csv = csv.reader(input_file)
19 |         day = 0
20 |         for row in input_csv:
21 |             code = row[0]
22 |             if day == 0:
23 |                 train_set_x[code] = []
24 |                 train_set_y[code] = []
25 |             x = list(map(float, row[2:-1]))
26 |             train_set_x[code].append(x)
27 |             train_set_y[code].append(float(row[-1]))
28 |             day = (day + 1) % 100
29 |     with open('test.csv') as input_file:
30 |         input_csv = csv.reader(input_file)
31 |         day = 0
32 |         for row in input_csv:
33 |             code = row[0]
34 |             if day == 0:
35 |                 test_set_x[code] = []
36 |                 test_set_y[code] = []
37 |             x = list(map(float, row[2:-1]))
38 |             test_set_x[code].append(x)
39 |             test_set_y[code].append(float(row[-1]))
40 |             day = (day + 1) % 20
41 | 
42 | 
43 | # 获取提交文件中需要提交的codes，保存在commit_codes中
44 | def codes_list_out():
45 |     global commit_codes
46 |     codes = [0]
47 |     with open('commit_empty.csv') as native_set_file:
48 |         native_csv = csv.reader(native_set_file)
49 |         next(native_csv)
50 |         for row in native_csv:
51 |             if row[0] != codes[-1]:
52 |                 codes.append(row[0])
53 |     commit_codes = codes[1:]
54 | 
55 | 
56 | def train_test_eval():
57 |     for code in commit_codes:
58 |         # model = linear_model.LinearRegression()
59 |         model = RandomForestRegressor()
60 |         model.fit(train_set_x[code], train_set_y[code])
61 |         ypred = model.predict(test_set_x[code])
62 |         ypred = np.array(list(map(round, ypred)))
63 |         rmse = np.sqrt(((test_set_y[code] - ypred) ** 2).mean())
64 |         print(code, '  rmse=', rmse)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     codes_list_out()
69 |     load_data()
70 |     train_test_eval()
71 | 


--------------------------------------------------------------------------------
/v6_stacking/commit_empty.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v6_stacking/commit_empty.csv


--------------------------------------------------------------------------------
/v6_stacking/cv/arima_cv.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import datetime as dt
  9 | import numpy as np
 10 | 
 11 | import csv
 12 | import arimaPredicter
 13 | import dataLoader
 14 | 
 15 | index = [dt.datetime(2015,1,x) for x in range(1, 32)]
 16 | index = index + [dt.datetime(2015,2,x) for x in (range(1 ,29))]
 17 | index = index + [dt.datetime(2015,3,x) for x in range(1, 32)]
 18 | index = index + [dt.datetime(2015,4,x) for x in range(1, 31)]
 19 |     
 20 | def sariamOutput():
 21 |     loader = dataLoader.loader("datam.csv", "lcdatam.csv")
 22 |     loader.setSize(120, 0, 0)
 23 |     
 24 |     f1 = open("result01.csv", "wb")
 25 |     writer1 = csv.writer(f1)
 26 |     f2 = open("result11.csv", "wb")
 27 |     writer2 = csv.writer(f2)
 28 |     f3 = open("result12.csv", "wb")
 29 |     writer3 = csv.writer(f3)
 30 |     
 31 |     ap = arimaPredicter.predicter();
 32 |     ap.setIndex(index)
 33 |     
 34 |     while (True):
 35 |         midclass, _, trainData, _, _ = loader.getNextMidClass()  
 36 |         if (midclass == 0):
 37 |             break
 38 |         
 39 |         ap.setPara(midclass, (0, 1))
 40 |         try:
 41 |             model = ap.sarimaTrain(midclass, trainData)
 42 |             result = ap.sarimaPredict(model, 30)
 43 |         except:
 44 |             result = np.zeros(30)
 45 |         for i in range(0, 30):
 46 |             writer1.writerow([midclass, "201505%02d" % (i+1), result[i]])
 47 |         
 48 |     
 49 |         ap.setPara(midclass, (1, 1))
 50 |         try:
 51 |             model = ap.sarimaTrain(midclass, trainData)
 52 |             result = ap.sarimaPredict(model, 30)
 53 |         except:
 54 |             result = np.zeros(30)
 55 |         for i in range(0, 30):
 56 |             writer2.writerow([midclass, "201505%02d" % (i+1), result[i]])
 57 |             
 58 |         ap.setPara(midclass, (1, 2))
 59 |         try:
 60 |             model = ap.sarimaTrain(midclass, trainData)
 61 |             result = ap.sarimaPredict(model, 30)
 62 |         except:
 63 |             result = np.zeros(30)
 64 |         for i in range(0, 30):
 65 |             writer3.writerow([midclass, "201505%02d" % (i+1), result[i]])
 66 |     
 67 |     
 68 |     while (True):
 69 |         larclass, _, trainData, _, _ = loader.getNextLarClass()
 70 |         if (larclass == 0):
 71 |             break
 72 |         
 73 |         ap.setPara(larclass, (0, 1))
 74 |         try:
 75 |             model = ap.sarimaTrain(larclass, trainData)
 76 |             result = ap.sarimaPredict(model, 30)
 77 |         except:
 78 |             result = np.zeros(30)
 79 |         for i in range(0, 30):
 80 |             writer1.writerow([larclass, "201505%02d" % (i+1), result[i]])
 81 |         
 82 |     
 83 |         ap.setPara(larclass, (1, 1))
 84 |         try:
 85 |             model = ap.sarimaTrain(larclass, trainData)
 86 |             result = ap.sarimaPredict(model, 30)
 87 |         except:
 88 |             result = np.zeros(30)
 89 |         for i in range(0, 30):
 90 |             writer2.writerow([larclass, "201505%02d" % (i+1), result[i]])
 91 |             
 92 |         ap.setPara(larclass, (1, 2))
 93 |         try:
 94 |             model = ap.sarimaTrain(larclass, trainData)
 95 |             result = ap.sarimaPredict(model, 30)
 96 |         except:
 97 |             result = np.zeros(30)
 98 |         for i in range(0, 30):
 99 |             writer3.writerow([larclass, "201505%02d" % (i+1), result[i]])
100 |             
101 |     f1.close()
102 |     f2.close()
103 |     f3.close()
104 |     loader.closeFiles()
105 | 
106 | sariamOutput()


--------------------------------------------------------------------------------
/v6_stacking/cv/xgboost_cv.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Spyder Editor
  4 | 
  5 | This is a temporary script file.
  6 | """
  7 | 
  8 | import xgboost as xgb
  9 | from numpy import array
 10 | import csv
 11 | import datetime as dt
 12 | 
 13 | larclasPred = {}
 14 | larclasLabl = {}
 15 | totalBias = 0
 16 | totalCount = 0
 17 | 
 18 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)]
 19 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))]
 20 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)]
 21 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))]
 22 | 
 23 | modelChoose = []
 24 | lcModelChoose = []
 25 | arimaParaChoose = {}
 26 | 
 27 | def getData(csvReader, trainCount, testCount):
 28 |     trainData = []
 29 |     testData = []
 30 |     trainLabel = []
 31 |     testLabel = []
 32 |     try:
 33 |         for x in range(0, trainCount):
 34 |             row = csvReader.next()
 35 |             """
 36 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 37 |                     float(row[7]), float(row[8]), float(row[9]), float(row[10]),
 38 |                     float(row[11]), float(row[12])]
 39 |             """
 40 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 41 |                     float(row[7]), float(row[8])]
 42 |             trainData.append(data)
 43 |             trainLabel.append(float(row[15]))
 44 |         for x in range(0, testCount):
 45 |             row = csvReader.next()
 46 |             """
 47 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 48 |                     float(row[7]), float(row[8]), float(row[9]), float(row[10]),
 49 |                     float(row[11]), float(row[12])]
 50 |             """
 51 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 52 |                     float(row[7]), float(row[8])]
 53 |             testData.append(data)
 54 |             testLabel.append(float(row[15]))
 55 |         return int(row[0]), trainData, trainLabel, testData, testLabel
 56 |     except StopIteration:
 57 |         return 0, [], [], [], []
 58 |     
 59 | def getLCData(csvReader, trainCount, testCount):
 60 |     trainData = []
 61 |     testData = []
 62 |     trainLabel = []
 63 |     testLabel = []
 64 |     try:
 65 |         for x in range(0, trainCount):
 66 |             row = csvReader.next()
 67 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 68 |                     float(row[7])]
 69 |             trainData.append(data)
 70 |             trainLabel.append(float(row[14]))
 71 |         for x in range(0, testCount):
 72 |             row = csvReader.next()
 73 |             data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
 74 |                     float(row[7])]
 75 |             testData.append(data)
 76 |             testLabel.append(float(row[14]))
 77 |         return int(row[0]), trainData, trainLabel, testData, testLabel
 78 |     except StopIteration:
 79 |         return 0, [], [], [], []
 80 | 
 81 | def xgboostPredict(trainData, trainLabel, dataToPredict):
 82 |     dtrain = xgb.DMatrix(trainData, trainLabel)
 83 |     params = {"objective": "reg:linear"}
 84 |     gbm = xgb.train(dtrain=dtrain, params=params)
 85 |     return gbm.predict(xgb.DMatrix(dataToPredict))
 86 |     
 87 | def simData(data):
 88 |     ret = data[:]
 89 |     for i in range(0, len(ret)):
 90 |         for j in range(4, len(ret[i])):
 91 |             ret[i][j] = 0
 92 |     return ret
 93 |     
 94 | def xgboostCV(trainSize): 
 95 |     global larclasPred
 96 |     larclasPred = {}
 97 |     f1 = open("datam.csv", "r")
 98 |     data_csv = csv.reader(f1)
 99 |     f3 = open("lcdatam.csv", "r")
100 |     lc_data_csv = csv.reader(f3)
101 |     f4 = open('xgboost_cv.csv', 'wb')
102 |     writer = csv.writer(f4)
103 |     
104 |     split = [int(trainSize/5), int(2*trainSize/5), 
105 |              int(3*trainSize/5), int(4*trainSize/5)]
106 |     
107 |     while (True):
108 |         midclass, trD, trL, teD, teL = getData(data_csv, trainSize, 0)
109 |         if (midclass == 0):
110 |             break
111 |         else:
112 |             trd1 = trD[split[0]:]
113 |             trl1 = trL[split[0]:]
114 |             ted1 = simData(trD[:split[0]])
115 |             tep1 = xgboostPredict(array(trd1), array(trl1), array(ted1))
116 |             
117 |             trd2 = trD[:split[0]]+trD[split[1]:]
118 |             trl2 = trL[:split[0]]+trL[split[1]:]
119 |             ted2 = simData(trD[split[0]:split[1]])
120 |             tep2 = xgboostPredict(array(trd2), array(trl2), array(ted2))
121 |             
122 |             trd3 = trD[:split[1]]+trD[split[2]:]
123 |             trl3 = trL[:split[1]]+trL[split[2]:]
124 |             ted3 = simData(trD[split[1]:split[2]])
125 |             tep3 = xgboostPredict(array(trd3), array(trl3), array(ted3))
126 |             
127 |             trd4 = trD[:split[2]]+trD[split[3]:]
128 |             trl4 = trL[:split[2]]+trL[split[3]:]
129 |             ted4 = simData(trD[split[2]:split[3]])
130 |             tep4 = xgboostPredict(array(trd4), array(trl4), array(ted4))
131 |             
132 |             trd5 = trD[:split[3]]
133 |             trl5 = trL[:split[3]]
134 |             ted5 = simData(trD[split[3]:])
135 |             tep5 = xgboostPredict(array(trd5), array(trl5), array(ted5))
136 |             
137 |             ans = list(tep1) + list(tep2) + list(tep3) + list(tep4) + list(tep5)
138 |             
139 |             for i in range(0, trainSize):
140 |                 writer.writerow([midclass, dtIndex[i].strftime("%Y%m%d"), 
141 |                                  ans[i]])
142 |     
143 |     while (True):
144 |         larclass, trD, trL, teD, teL = getLCData(lc_data_csv, trainSize, 0)
145 |         if (larclass == 0):
146 |             break
147 |         else:
148 |             trd1 = trD[split[0]:]
149 |             trl1 = trL[split[0]:]
150 |             ted1 = simData(trD[:split[0]])
151 |             tep1 = xgboostPredict(array(trd1), array(trl1), array(ted1))
152 |             
153 |             trd2 = trD[:split[0]]+trD[split[1]:]
154 |             trl2 = trL[:split[0]]+trL[split[1]:]
155 |             ted2 = simData(trD[split[0]:split[1]])
156 |             tep2 = xgboostPredict(array(trd2), array(trl2), array(ted2))
157 |             
158 |             trd3 = trD[:split[1]]+trD[split[2]:]
159 |             trl3 = trL[:split[1]]+trL[split[2]:]
160 |             ted3 = simData(trD[split[1]:split[2]])
161 |             tep3 = xgboostPredict(array(trd3), array(trl3), array(ted3))
162 |             
163 |             trd4 = trD[:split[2]]+trD[split[3]:]
164 |             trl4 = trL[:split[2]]+trL[split[3]:]
165 |             ted4 = simData(trD[split[2]:split[3]])
166 |             tep4 = xgboostPredict(array(trd4), array(trl4), array(ted4))
167 |             
168 |             trd5 = trD[:split[3]]
169 |             trl5 = trL[:split[3]]
170 |             ted5 = simData(trD[split[3]:])
171 |             tep5 = xgboostPredict(array(trd5), array(trl5), array(ted5))
172 |             
173 |             ans = list(tep1) + list(tep2) + list(tep3) + list(tep4) + list(tep5)
174 |             
175 |             for i in range(0, trainSize):
176 |                 writer.writerow([larclass, dtIndex[i].strftime("%Y%m%d"), 
177 |                                  ans[i]])
178 | 
179 |     f1.close()
180 |     f3.close()
181 |     f4.close()
182 |            
183 | xgboostCV(120)


--------------------------------------------------------------------------------
/v6_stacking/prepare_data.py:
--------------------------------------------------------------------------------
 1 | # 把不同模型结果合并在一个文件中
 2 | 
 3 | import csv
 4 | 
 5 | commit_codes = []
 6 | 
 7 | 
 8 | # 获取提交文件中需要提交的codes，保存在commit_codes中
 9 | def codes_list_out():
10 |     global commit_codes
11 |     codes = [0]
12 |     with open('commit_empty.csv') as native_set_file:
13 |         native_csv = csv.reader(native_set_file)
14 |         next(native_csv)
15 |         for row in native_csv:
16 |             if row[0] != codes[-1]:
17 |                 codes.append(row[0])
18 |     commit_codes = codes[1:]
19 | 
20 | 
21 | def get_day(date):
22 |     date = int(date)
23 |     if date < 20150132:
24 |         return date - 20150100
25 |     elif date < 20150229:
26 |         return date - 20150200 + 31
27 |     elif date < 20150332:
28 |         return date - 20150300 + 59
29 |     else:
30 |         return date - 20150400 + 90
31 | 
32 | 
33 | def merge_file():
34 |     features = {}
35 |     with open('five_fold_feature_v3.csv') as input_file:
36 |         input_csv = csv.reader(input_file)
37 |         next(input_csv)
38 |         for row in input_csv:
39 |             if row[0] not in features:
40 |                 features[row[0]] = [row]
41 |             else:
42 |                 features[row[0]].append(row)
43 |     with open('five_fold_feature_xgboost.csv') as input_file:
44 |         input_csv = csv.reader(input_file)
45 |         for row in input_csv:
46 |             if row[0] in features:
47 |                 features[row[0]][get_day(row[1])-1] = features[row[0]][get_day(row[1])-1] + row[2:]
48 |     # 最后一列是label
49 |     with open('timeseries_customers.csv') as input_file:
50 |         input_csv = csv.reader(input_file)
51 |         for row in input_csv:
52 |             if row[0] in features:
53 |                 for day in range(120):
54 |                     features[row[0]][day].append(row[day+1])
55 |     with open('merged_feature.csv', 'w', newline='') as output_file:
56 |         output_csv = csv.writer(output_file)
57 |         for code in commit_codes:
58 |             for row in features[code]:
59 |                 output_csv.writerow(row)
60 | 
61 | 
62 | def divide_train_test_set():
63 |     with open('merged_feature.csv') as input_file,\
64 |      open('train.csv', 'w', newline='') as train_file,\
65 |      open('test.csv', 'w', newline='') as test_file:
66 |         input_csv = csv.reader(input_file)
67 |         train_csv = csv.writer(train_file)
68 |         test_csv = csv.writer(test_file)
69 |         day = 0
70 |         for row in input_csv:
71 |             if day < 100:
72 |                 train_csv.writerow(row)
73 |                 day += 1
74 |             else:
75 |                 test_csv.writerow(row)
76 |                 day = (day + 1) % 120
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     codes_list_out()
81 |     merge_file()
82 |     divide_train_test_set()
83 | 


--------------------------------------------------------------------------------
/v6_stacking/timeseries_customers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v6_stacking/timeseries_customers.csv


--------------------------------------------------------------------------------
/xgboostPredicter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Mon Nov 27 21:53:16 2017
 4 | 
 5 | @author: wangjun
 6 | """
 7 | 
 8 | import xgboost as xgb
 9 | from numpy import array
10 | import datetime as dt
11 | import numpy
12 | 
13 | class predicter:
14 |     def __init__(self, params = {"objective":"reg:linear", "max_depth":1, "gamma":2}):
15 |         self.params = params
16 |   
17 |     def setDefaultParams(self, params):
18 |         self.params = params
19 |     
20 |     def xgboostTrain(self, trainData, trainLabel, params = {}):
21 |         if (type(trainData)!=numpy.ndarray):
22 |             trainData = array(trainData)
23 |         if (type(trainLabel)!=numpy.ndarray):
24 |             trainLabel = array(trainLabel)
25 |         dTrain = xgb.DMatrix(trainData, trainLabel)
26 |         if (len(params)==0):
27 |             params = self.params
28 |         model = xgb.train(dtrain=dTrain, params=params)
29 |         return model
30 |     
31 |     @staticmethod
32 |     def xgboostPredict(model, dataToPredict):
33 |         if (type(dataToPredict)!=numpy.ndarray):
34 |             dataToPredict = array(dataToPredict)
35 |         return model.predict(xgb.DMatrix(dataToPredict))    
36 | 
37 |     @staticmethod
38 |     def simulateFeature(trainData, musk):
39 |         for feature in trainData:
40 |             for i in musk:
41 |                 feature[i] = 0
42 |              
43 |     @staticmethod
44 |     def createFeature(date_from, length, zeros, DictHoilday, DictBeforeHoilday, 
45 |                     DictWorkday):
46 |         delta = dt.timedelta(days=1)
47 |         now = date_from
48 |         index = []
49 |         for i in range(0, length):
50 |             index.append(now)
51 |             now = now + delta
52 |         feature = []
53 |         empty = [0 for x in range(0, zeros+4)]
54 |         for i in range(0, length):
55 |             x = empty[:]
56 |             x[0] = index[i].day
57 |             x[1] = (index[i].weekday() + 1) % 7
58 |             dayCount = i + 1
59 |             if (dayCount in DictHoilday):
60 |                 x[3] = 1
61 |             elif (dayCount in DictBeforeHoilday):
62 |                 x[2] = 1
63 |             elif (dayCount in DictWorkday):
64 |                 if (x[1]==6 or ((dayCount+1) in DictHoilday)):
65 |                     x[2] = 1
66 |             elif (x[1]==0 or x[1]==6):
67 |                 x[3] = 1
68 |             elif (x[1]==5):
69 |                 x[2] = 1
70 |             feature.append(x)
71 |         return feature   


--------------------------------------------------------------------------------