├── .gitignore
├── README.md
├── arimaPredicter.py
├── arima_knn
├── KNN_interface.py
├── KNN_interface.pyc
├── Version_5.py
├── arima_knn.py
└── modify_submit.py
├── arima_xgboost
├── arima_xgboost.py
├── arima_xgboost_imporve.py
├── arima_xgboost_multi.py
└── statanalysis.py
├── data
├── datam.csv
├── example.csv
├── lcdatam.csv
├── submit0.csv
├── submit1.csv
└── train.csv
├── dataLoader.py
├── doc
├── compare.docx
├── manual.py
├── parareport.txt
├── report.pdf
├── report.txt
├── ~$ompare.docx
└── 特征选择.xlsx
├── plot_pic
├── 1001_customers.jpg
├── 1002_customers.jpg
├── 1004_customers.jpg
├── 1005_customers.jpg
├── 1006_customers.jpg
├── 1007_customers.jpg
├── 1099_customers.jpg
├── 10_customers.jpg
├── 1101_customers.jpg
├── 1102_customers.jpg
├── 1103_customers.jpg
├── 1104_customers.jpg
├── 11_customers.jpg
├── 1201_customers.jpg
├── 1202_customers.jpg
├── 1203_customers.jpg
├── 1205_customers.jpg
├── 12_customers.jpg
├── 1301_customers.jpg
├── 1302_customers.jpg
├── 1306_customers.jpg
├── 1308_customers.jpg
├── 1399_customers.jpg
├── 13_customers.jpg
├── 1401_customers.jpg
├── 1402_customers.jpg
├── 1403_customers.jpg
├── 1404_customers.jpg
├── 14_customers.jpg
├── 1501_customers.jpg
├── 1502_customers.jpg
├── 1503_customers.jpg
├── 1504_customers.jpg
├── 1505_customers.jpg
├── 1507_customers.jpg
├── 1508_customers.jpg
├── 1509_customers.jpg
├── 1510_customers.jpg
├── 1511_customers.jpg
├── 1512_customers.jpg
├── 1513_customers.jpg
├── 1514_customers.jpg
├── 1515_customers.jpg
├── 1516_customers.jpg
├── 1517_customers.jpg
├── 1518_customers.jpg
├── 1519_customers.jpg
├── 1520_customers.jpg
├── 1521_customers.jpg
├── 15_customers.jpg
├── 2001_customers.jpg
├── 2002_customers.jpg
├── 2003_customers.jpg
├── 2004_customers.jpg
├── 2005_customers.jpg
├── 2006_customers.jpg
├── 2007_customers.jpg
├── 2008_customers.jpg
├── 2009_customers.jpg
├── 2010_customers.jpg
├── 2011_customers.jpg
├── 2012_customers.jpg
├── 2013_customers.jpg
├── 2014_customers.jpg
├── 2015_customers.jpg
├── 20_customers.jpg
├── 2101_customers.jpg
├── 2102_customers.jpg
├── 2103_customers.jpg
├── 2104_customers.jpg
├── 2105_customers.jpg
├── 2106_customers.jpg
├── 2107_customers.jpg
├── 2108_customers.jpg
├── 21_customers.jpg
├── 2201_customers.jpg
├── 2202_customers.jpg
├── 2203_customers.jpg
├── 2204_customers.jpg
├── 2205_customers.jpg
├── 2206_customers.jpg
├── 2207_customers.jpg
├── 2208_customers.jpg
├── 2209_customers.jpg
├── 2210_customers.jpg
├── 2211_customers.jpg
├── 2212_customers.jpg
├── 22_customers.jpg
├── 2301_customers.jpg
├── 2302_customers.jpg
├── 2303_customers.jpg
├── 2304_customers.jpg
├── 2305_customers.jpg
├── 2306_customers.jpg
├── 2307_customers.jpg
├── 2308_customers.jpg
├── 2309_customers.jpg
├── 2310_customers.jpg
├── 2311_customers.jpg
├── 2312_customers.jpg
├── 2313_customers.jpg
├── 2314_customers.jpg
├── 2315_customers.jpg
├── 2316_customers.jpg
├── 2317_customers.jpg
├── 23_customers.jpg
├── 3001_customers.jpg
├── 3002_customers.jpg
├── 3003_customers.jpg
├── 3004_customers.jpg
├── 3005_customers.jpg
├── 3006_customers.jpg
├── 3007_customers.jpg
├── 3008_customers.jpg
├── 3009_customers.jpg
├── 3010_customers.jpg
├── 3011_customers.jpg
├── 3012_customers.jpg
├── 3013_customers.jpg
├── 3014_customers.jpg
├── 3015_customers.jpg
├── 3016_customers.jpg
├── 3017_customers.jpg
├── 3018_customers.jpg
├── 30_customers.jpg
├── 3101_customers.jpg
├── 3102_customers.jpg
├── 3104_customers.jpg
├── 3105_customers.jpg
├── 3106_customers.jpg
├── 3107_customers.jpg
├── 3108_customers.jpg
├── 3109_customers.jpg
├── 3110_customers.jpg
├── 3111_customers.jpg
├── 3112_customers.jpg
├── 3113_customers.jpg
├── 3114_customers.jpg
├── 3115_customers.jpg
├── 3116_customers.jpg
├── 3117_customers.jpg
├── 3118_customers.jpg
├── 3119_customers.jpg
├── 3120_customers.jpg
├── 3121_customers.jpg
├── 3122_customers.jpg
├── 3125_customers.jpg
├── 3126_customers.jpg
├── 3128_customers.jpg
├── 31_customers.jpg
├── 3208_customers.jpg
├── 3212_customers.jpg
├── 3213_customers.jpg
├── 3215_customers.jpg
├── 3216_customers.jpg
├── 3217_customers.jpg
├── 3218_customers.jpg
├── 3227_customers.jpg
├── 32_customers.jpg
├── 3301_customers.jpg
├── 3303_customers.jpg
├── 3311_customers.jpg
├── 3313_customers.jpg
├── 3314_customers.jpg
├── 3315_customers.jpg
├── 3316_customers.jpg
├── 3317_customers.jpg
├── 3319_customers.jpg
├── 3320_customers.jpg
├── 3321_customers.jpg
├── 3322_customers.jpg
├── 3323_customers.jpg
├── 3325_customers.jpg
├── 3326_customers.jpg
├── 3328_customers.jpg
├── 3330_customers.jpg
├── 33_customers.jpg
├── 3401_customers.jpg
├── 3402_customers.jpg
├── 3403_customers.jpg
├── 3404_customers.jpg
├── 3405_customers.jpg
├── 3406_customers.jpg
├── 3407_customers.jpg
├── 3408_customers.jpg
├── 3409_customers.jpg
├── 3410_customers.jpg
├── 3412_customers.jpg
├── 3413_customers.jpg
├── 3414_customers.jpg
├── 3415_customers.jpg
├── 3416_customers.jpg
├── 3417_customers.jpg
├── 3419_customers.jpg
├── 3421_customers.jpg
├── 3423_customers.jpg
├── 3424_customers.jpg
├── 3426_customers.jpg
├── 3427_customers.jpg
├── 3428_customers.jpg
├── 3429_customers.jpg
├── 3431_customers.jpg
├── 3432_customers.jpg
├── 3436_customers.jpg
├── 34_customers.jpg
└── 异常日期.txt
├── rnn
├── test.py
├── test2.py
└── test3.py
├── tools
├── backup.py
├── csvloader.py
├── csvloader_largeClass.py
├── dataModify.py
└── fileChecker.py
├── v3
├── May_input.csv
├── Readme.txt
├── Version_3.py
├── commit_empty.csv
├── features.csv
├── five_fold.py
├── five_fold_feature_v3.csv
├── preparedata.py
├── submit.csv
├── test.csv
├── timeseries_customers.csv
├── timeseries_discounts.csv
├── train.csv
└── 调参.txt
├── v5
├── KNN_interface.py
├── Readme.txt
├── Version_5.py
├── commit_empty.csv
├── modify_submit.py
├── submit.csv
├── timeseries_customers_processed.csv
└── 调参.txt
├── v6_stacking
├── .idea
│ ├── misc.xml
│ ├── modules.xml
│ ├── v6_stacking.iml
│ └── workspace.xml
├── Version6_stacking.py
├── commit_empty.csv
├── cv
│ ├── arima01.csv
│ ├── arima11.csv
│ ├── arima12.csv
│ ├── arima_cv.py
│ ├── result01.csv
│ ├── result11.csv
│ ├── result12.csv
│ ├── xgboost_cv.csv
│ └── xgboost_cv.py
├── five_fold_feature_v3.csv
├── five_fold_feature_xgboost.csv
├── merged_feature.csv
├── prepare_data.py
├── test.csv
├── timeseries_customers.csv
└── train.csv
└── xgboostPredicter.py
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.pyc
3 | report
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # BDCI2017
2 |
3 | 2017年CCF大数据与计算智能大赛-小超市供销存管理优化
4 |
5 | ## 参赛人员
6 |
7 | Wang Jun cnwj@mail.ustc.edu.cn
8 | Wang Fei wf314159@mail.ustc.edu.cn
9 |
10 | ## 算法说明与结果报告
11 |
12 | 见doc文件夹下report.pdf
13 |
14 | ## 文件说明
15 |
16 | ### arimaPredicter.py
17 |
18 | 封装后的Sarima预测器
19 |
20 | ### xgboostPredicter.py
21 |
22 | 封装后的xgboost预测器
23 |
24 | ### dataLoader.py
25 |
26 | 封装后的数据读取类
27 |
28 | ### data文件夹
29 |
30 | 比赛数据
31 |
32 | train.csv 比赛给定的训练数据
33 |
34 | example.csv 比赛给定的结果样本
35 |
36 | datam.csv 预处理后的中类样本
37 |
38 | lcdatam.csv 预处理后的大类样本
39 |
40 | submit0.csv submit1.csv 比赛中提交的两个文件
41 |
42 | ### tools文件夹
43 |
44 | 用于预处理的工具
45 |
46 | ### doc文件夹
47 |
48 | 相关文档
49 |
50 | report.pdf 实验报告
51 |
52 | manual.py Sarima预测器与xgboost预测器的使用指南
53 |
54 | ### arima_knn文件夹
55 |
56 | 基于arima、knn的集成学习
57 |
58 | ### arima_xgboost文件夹
59 |
60 | 基于arima、xgboost的集成学习
61 |
62 | arima_xgboost_multi.py 是实验最终用于预测的集成学习预测器
63 |
64 | ### plot_pic文件夹
65 |
66 | 销量-时间图
67 |
68 | ### rnn文件夹
69 |
70 | 基于LSTM的学习器(未封装,最终未使用)
71 |
72 | ### v3文件夹
73 |
74 | 基于随机森林的学习器(未封装,最终未使用)
75 |
76 | ### v5文件夹
77 |
78 | 基于knn的学习器(未封装,最终未使用)
79 |
80 | ### v6_stacking文件夹
81 |
82 | 基于stacking的集成学习预测器
83 |
84 |
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/arimaPredicter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Wed Nov 15 12:25:37 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | from numpy import array
9 | from numpy import log
10 | from numpy import exp
11 | import math
12 |
13 | import datetime as dt
14 | import pandas as pd
15 | from statsmodels.tsa.statespace.sarimax import SARIMAX
16 | import statsmodels.api as sm
17 |
18 | import matplotlib.pylab as plt
19 | from statsmodels.tsa.stattools import adfuller
20 |
21 | class predicter():
22 | def __init__(self):
23 | self.ParaChoose = {}
24 | self.dtIndex = []
25 |
26 | def setIndex(self, index):
27 | self.dtIndex = index[:]
28 |
29 | def getIndex(self):
30 | return self.dtIndex
31 |
32 | def createIndex(self, date_from, length):
33 | delta = dt.timedelta(days=1)
34 | now = date_from
35 | self.dtIndex = []
36 | for i in range(0, length):
37 | self.dtIndex.append(now)
38 | now = now + delta
39 | return self.dtIndex
40 |
41 | def setPara(self, clas, para):
42 | if (type(para)!=tuple or len(para)!=2):
43 | raise TypeError("timeserise should be (ar, ma)")
44 | self.ParaChoose[clas] = para
45 |
46 | def getPara(self):
47 | return self.ParaChoose
48 |
49 | def test_stationarity(self, timeseries):
50 | #Determing rolling statistics
51 | if (type(timeseries) == list):
52 | length = len(timeseries)
53 | timeseries = pd.Series(timeseries)
54 | timeseries.index = pd.Index(self.dtIndex[0:length])
55 | elif (type(timeseries) != pd.core.series.Series):
56 | raise TypeError("timeserise should be a list or series")
57 | rolmean = timeseries.rolling(window=12,center=False).mean()
58 | rolstd = timeseries.rolling(window=12,center=False).std()
59 |
60 | #Plot rolling statistics:
61 | plt.plot(timeseries, color='blue',label='Original')
62 | plt.plot(rolmean, color='red', label='Rolling Mean')
63 | plt.plot(rolstd, color='black', label = 'Rolling Std')
64 | plt.legend(loc='best')
65 | plt.title('Rolling Mean & Standard Deviation')
66 | plt.show(block=False)
67 |
68 | #Perform Dickey-Fuller test:
69 | print('Results of Dickey-Fuller Test:')
70 | dftest = adfuller(timeseries, autolag='AIC')
71 | dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
72 | for key,value in dftest[4].items():
73 | dfoutput['Critical Value (%s)'%key] = value
74 | print(dfoutput)
75 |
76 | #Get AR and MA parameter
77 | fig = plt.figure(figsize=(12,8))
78 | ax1=fig.add_subplot(211)
79 | fig = sm.graphics.tsa.plot_acf(timeseries, lags=20, ax=ax1)
80 | ax2 = fig.add_subplot(212)
81 | fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2)
82 | plt.show(block=False)
83 |
84 | def sarimaTrain(self, trainLabel, classNo=0, para=()):
85 | dataLength = len(trainLabel)
86 | data = pd.Series(trainLabel)
87 | for i in range(0, dataLength):
88 | data[i] = log(data[i] + 1)
89 | index = self.dtIndex[0:dataLength]
90 | data.index = pd.Index(index)
91 |
92 | if (len(para) != 2):
93 | try:
94 | (ar, ma) = self.ParaChoose[classNo]
95 | except KeyError:
96 | print("%d: parameter not set, use (1,1) default" % classNo)
97 | (ar, ma) = (1, 1)
98 | return SARIMAX(data, order=(ar,1,ma), seasonal_order=(0,1,1,7)).fit()
99 | else:
100 | return SARIMAX(data, order=(para[0], 1, para[1]), seasonal_order=(0,1,1,7)).fit()
101 |
102 | def sarimaParaSelect(self, classNo, trainLabel, testLabel, useAic=False):
103 | dataLength = len(trainLabel)
104 | data = pd.Series(trainLabel)
105 | for i in range(0, dataLength):
106 | data[i] = log(data[i] + 1)
107 | index = self.dtIndex[0:dataLength]
108 | data.index = pd.Index(index)
109 |
110 | minBias = 99999.0
111 | minAic = 99999.0
112 | (ar, ma) = (0, 0)
113 | label = array(testLabel)
114 | for p, q in [(1, 1), (0, 1), (1, 2), (2, 0), (2, 1), (2, 2)]:
115 | try:
116 | model = SARIMAX(data, order=(p,1,q), seasonal_order=(0,1,1,7)).fit()
117 | output = array(model.forecast(len(testLabel)))
118 | for i in range(0, len(testLabel)):
119 | output[i] = exp(output[i]) - 1
120 | bias = math.sqrt(sum((output-label)*(output-label))/len(testLabel))
121 | if (bias < minBias and (useAic == False or model.aic < minAic)):
122 | (ar, ma) = (p, q)
123 | minBias = bias
124 | minAic = model.aic
125 | bestOutput = output
126 | except:
127 | pass
128 |
129 | if (minBias < 90000.0):
130 | self.ParaChoose[classNo] = (ar, ma)
131 | return ((ar, ma), bestOutput)
132 | else:
133 | raise ValueError
134 |
135 | def checkBias(self, model, trainLabel):
136 | dataLength = len(trainLabel)
137 | data = pd.Series(trainLabel)
138 | index = self.dtIndex[0:dataLength]
139 | data.index = pd.Index(index)
140 |
141 | pred = model.predict()
142 | plt.plot(data, color='blue',label='Original')
143 | plt.plot(pred, color='red', label='Predicted')
144 | plt.show(block=False)
145 | return list(data - pred)
146 |
147 | @staticmethod
148 | def sarimaPredict(model, predictLength):
149 | output = model.forecast(predictLength)
150 | for i in range(0, predictLength):
151 | output[i] = exp(output[i]) - 1
152 | return array(output)
153 |
--------------------------------------------------------------------------------
/arima_knn/KNN_interface.py:
--------------------------------------------------------------------------------
1 | # IDE not support Chinese
2 |
3 | from sklearn.neighbors import KNeighborsRegressor
4 | import numpy as np
5 | import csv
6 |
7 | def knn(data, pred_length, D_window=14, max_k=7):
8 | if pred_length + D_window >= len(data):
9 | print('ERROR: pred_length or D_window too long')
10 | return None
11 |
12 | ret_ypred = []
13 | for h in range(4):
14 | train_feature, train_label = get_train_set(data, h, D_window, pred_length)
15 |
16 | e_LOO_arr = np.zeros(max_k)
17 | for k in range(2, max_k + 1):
18 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
19 | model.fit(train_feature, train_label)
20 |
21 | dist_list, index_list = model.kneighbors([data[0 - D_window:]])
22 | k_neighbor_label = []
23 | for i in index_list[0]:
24 | k_neighbor_label.append(train_label[i])
25 |
26 | ypred = model.predict([data[0-D_window:]])
27 | ypred = np.asarray(list(map(round, ypred[0])))
28 |
29 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
30 |
31 | k_min = np.argmin(e_LOO_arr[1:]) + 2
32 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
33 | model.fit(train_feature, train_label)
34 | ypred = model.predict([data[0 - D_window:]])
35 | ret_ypred += list(map(round, ypred[0]))
36 |
37 | return np.asarray(ret_ypred)
38 |
39 |
40 | def get_train_set(train_data, h, D, pred_length):
41 | feature, label = [], []
42 | block_len = int(pred_length / 4)
43 | if h != 3:
44 | for i in range(len(train_data) - D - block_len * (h + 1) + 1):
45 | feature.append(train_data[i:i + D])
46 | label.append(train_data[i + D + block_len * h:i + D + block_len * h + block_len])
47 | else:
48 | for i in range(len(train_data) - D - pred_length + 1):
49 | feature.append(train_data[i:i + D])
50 | label.append(train_data[i + D + 3 * block_len:i + D + pred_length])
51 | return np.array(feature), np.array(label)
52 |
53 | def LOO(k_neighbor_label, ypred, k):
54 | ret = 0
55 | for neighbor in k_neighbor_label:
56 | ret = ret + ((neighbor - ypred) ** 2).sum()
57 | ret = ret * k / (k - 1)**2
58 | # ret = ret / (k)**2
59 | return ret
60 |
61 |
62 | def test():
63 | with open('timeseries_customers_processed.csv') as input_file:
64 | input_csv = csv.reader(input_file)
65 | next(input_csv)
66 | row = next(input_csv)
67 | data = list(map(float, row[1:]))
68 | print(knn(data, 30))
69 |
70 |
71 | if __name__ == '__main__':
72 | test()
73 |
--------------------------------------------------------------------------------
/arima_knn/KNN_interface.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/arima_knn/KNN_interface.pyc
--------------------------------------------------------------------------------
/arima_knn/Version_5.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from sklearn.neighbors import KNeighborsRegressor
3 | import numpy as np
4 |
5 | from modify_submit import change_pred
6 |
7 |
8 | def main_fun():
9 | class_codes = ['1201', '2011', '12', '15', '20', '22', '23', '30']
10 | with open('timeseries_customers_processed.csv') as input_file:
11 | input_csv = csv.reader(input_file)
12 | next(input_csv)
13 | for row in input_csv:
14 | if row[0] in class_codes:
15 | # MIMO_KNN_test(row)
16 | # MIMO_KNN_LOO_test(row)
17 | MIMO_KNN_LOO_May(row)
18 |
19 |
20 | # 划分数据集测试不同参数(D_window, k),没有预测5月份销量
21 | def MIMO_KNN_test(data):
22 | code = data[0]
23 | data = list(map(float, data[1:]))
24 | train_data = data[:90]
25 | test_data = data[90:]
26 |
27 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天
28 | D_window = 14
29 | for h in range(4):
30 | train_feature, train_label = get_train_set(train_data, h, D_window)
31 | y_label = get_test_label(test_data, h)
32 |
33 | for k in range(1, 8):
34 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
35 | model.fit(train_feature, train_label)
36 |
37 | ypred = model.predict([train_data[0-D_window:]])
38 | ypred = np.array(list(map(round, ypred[0])))
39 |
40 | rmse = np.sqrt(((ypred - y_label) ** 2).mean())
41 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse)
42 |
43 |
44 | # 划分数据集,实现论文里的方法,没有预测5月份销量
45 | def MIMO_KNN_LOO_test(data):
46 | code = data[0]
47 | data = list(map(float, data[1:]))
48 | train_data = data[:90]
49 | test_data = data[90:]
50 |
51 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天
52 | D_window = 14
53 | max_k = 7
54 | for h in range(4):
55 | train_feature, train_label = get_train_set(train_data, h, D_window)
56 | y_label = get_test_label(test_data, h)
57 |
58 | e_LOO_arr = np.zeros(max_k)
59 | for k in range(2, max_k + 1):
60 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
61 | model.fit(train_feature, train_label)
62 |
63 | # 获取k近邻
64 | dist_list, index_list = model.kneighbors([train_data[0 - D_window:]])
65 | k_neighbor_label = []
66 | for i in index_list[0]:
67 | k_neighbor_label.append(train_label[i])
68 |
69 | # 基于k近邻的预测值
70 | ypred = model.predict([train_data[0-D_window:]])
71 | ypred = np.asarray(list(map(round, ypred[0])))
72 | rmse = np.sqrt(((ypred - y_label) ** 2).mean())
73 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse)
74 |
75 | # 计算e_LOO
76 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
77 |
78 | # 取e_LOO最小的k值
79 | k_min = np.argmin(e_LOO_arr[1:]) + 2
80 | print('k_min=', k_min)
81 |
82 |
83 | # 使用整个数据集,实现论文里的方法,预测5月份销量
84 | def MIMO_KNN_LOO_May(data):
85 | code = data[0]
86 | data = list(map(float, data[1:]))
87 |
88 | D_window = 14
89 | max_k = 7
90 | pred_May = []
91 | for h in range(4):
92 | train_feature, train_label = get_train_set(data, h, D_window)
93 | e_LOO_arr = np.zeros(max_k)
94 | for k in range(2, max_k + 1):
95 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
96 | model.fit(train_feature, train_label)
97 |
98 | # 获取k近邻
99 | dist_list, index_list = model.kneighbors([data[0 - D_window:]])
100 | k_neighbor_label = []
101 | for i in index_list[0]:
102 | k_neighbor_label.append(train_label[i])
103 |
104 | # 基于k近邻的预测值
105 | ypred = model.predict([data[0 - D_window:]])
106 | ypred = np.asarray(list(map(round, ypred[0])))
107 |
108 | # 计算e_LOO
109 | e_LOO_arr[k - 1] = LOO(k_neighbor_label, ypred, k)
110 |
111 | # 取e_LOO最小的k值
112 | k_min = np.argmin(e_LOO_arr[1:]) + 2
113 |
114 | # 令k=k_min,做预测
115 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
116 | model.fit(train_feature, train_label)
117 | ypred = model.predict([data[0 - D_window:]])
118 | ypred = list(map(round, ypred[0]))
119 | pred_May = pred_May + ypred
120 |
121 | print(pred_May)
122 | # 替换文件里编码为code的预测值
123 | change_pred(code, pred_May)
124 |
125 |
126 | # 计算LOO,用于k(近邻数)的选择
127 | def LOO(k_neighbor_label, ypred, k):
128 | ret = 0
129 | for neighbor in k_neighbor_label:
130 | ret = ret + ((neighbor - ypred) ** 2).sum()
131 | ret = ret * k / (k - 1)**2
132 | # ret = ret / (k)**2
133 | return ret
134 |
135 |
136 | def get_train_set(train_data, h, D):
137 | feature, label = [], []
138 | if h != 3:
139 | for i in range(len(train_data) - D - 7 * (h+1) + 1):
140 | feature.append(train_data[i:i+D])
141 | label.append(train_data[i+D+7*h:i+D+7*h+7])
142 | else:
143 | for i in range(len(train_data) - D - 30 + 1):
144 | feature.append(train_data[i:i+D])
145 | label.append(train_data[i+D+21:i+D+30])
146 | return np.array(feature), np.array(label)
147 |
148 |
149 | def get_test_label(test_data, h):
150 | if h != 3:
151 | return test_data[7*h:7*h+7]
152 | else:
153 | return test_data[21:]
154 |
155 |
156 | if __name__ == '__main__':
157 | main_fun()
158 |
--------------------------------------------------------------------------------
/arima_knn/arima_knn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | from numpy import array
9 | from numpy import zeros
10 | import csv
11 | import math
12 | import datetime as dt
13 |
14 | import arimaPredicter
15 | import dataLoader
16 | import KNN_interface
17 |
18 | larclasPred = {}
19 | larclasLabl = {}
20 | totalBias = 0
21 | totalCount = 0
22 |
23 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)]
24 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))]
25 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)]
26 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))]
27 |
28 | modelChoose = []
29 | lcModelChoose = []
30 |
31 | ap = arimaPredicter.predicter()
32 | ap.setIndex(dtIndex)
33 |
34 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
35 | with open('compare.csv', 'ab') as f:
36 | writer = csv.writer(f)
37 | count = 1
38 | writer.writerow([midclass, accuracy])
39 | for x in trainLabl:
40 | writer.writerow([count, x])
41 | count += 1
42 | for x in range(0, len(testPred)):
43 | writer.writerow([count, testLabl[x], testPred[x]])
44 | count += 1
45 |
46 | def modelselect(trainSize, testSize, skipSize = 0):
47 | global larclasPred, totalBias, totalCount, modelChoose, lcModelChoose, ap
48 | larclasPred = {}
49 | totalBias = 0
50 | totalCount = 0
51 | modelChoose = []
52 | lcModelChoose = []
53 |
54 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
55 | loader.setSize(trainSize, testSize, skipSize)
56 |
57 | # middle class
58 | while (True):
59 | midclass, trD, trL, _, teL = loader.getNextMidClass()
60 | if (midclass == 0):
61 | break
62 | else:
63 | # sarima model
64 | try:
65 | model = ap.sarimaTrain(midclass, trL, teL)
66 | teP1 = ap.sarimaPredict(model, testSize)
67 | except:
68 | teP1 = zeros(testSize)
69 |
70 | # kNN model
71 | try:
72 | teP2 = KNN_interface.knn(trL, testSize)
73 | except:
74 | print("Warning: kNN train fail")
75 | teP2 = zeros(testSize)
76 |
77 | # just zero
78 | teP3 = zeros(testSize)
79 |
80 | # count bias of midclass and update larclass
81 | label = array(teL)
82 | larclass = int(midclass/100)
83 | totalCount += testSize
84 |
85 | bias1 = sum((teP1-label)*(teP1-label))
86 | bias2 = sum((teP2-label)*(teP2-label))
87 | bias3 = sum((teP3-label)*(teP3-label))
88 | if (bias3 <= bias1 and bias3 <= bias2):
89 | totalBias += bias3
90 | bias3 = math.sqrt(bias3/testSize)
91 | print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3)
92 | modelChoose.append(3)
93 | if (larclass in larclasPred):
94 | larclasPred[larclass] += teP3
95 | else:
96 | larclasPred[larclass] = teP3
97 | elif (bias1 <= bias2):
98 | totalBias += bias1
99 | bias1 = math.sqrt(bias1/testSize)
100 | print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1)
101 | modelChoose.append(1)
102 | if (larclass in larclasPred):
103 | larclasPred[larclass] += teP1
104 | else:
105 | larclasPred[larclass] = teP1
106 | else:
107 | totalBias += bias2
108 | bias2 = math.sqrt(bias2/testSize)
109 | print "(Midclass %d select kNN, accuracy: %f)" % (midclass, bias2)
110 | modelChoose.append(2)
111 | if (larclass in larclasPred):
112 | larclasPred[larclass] += teP2
113 | else:
114 | larclasPred[larclass] = teP2
115 |
116 | # large class
117 | while (True):
118 | larclass, trD, trL, _, teL = loader.getNextLarClass()
119 | if (larclass == 0):
120 | break
121 | else:
122 | # sarima model
123 | try:
124 | model = ap.sarimaTrain(larclass, trL, teL)
125 | teP1 = ap.sarimaPredict(model, testSize)
126 | except:
127 | teP1 = zeros(testSize)
128 |
129 | # knn model
130 | try:
131 | teP2 = KNN_interface.knn(trL, testSize)
132 | except:
133 | print("Warning: kNN train fail")
134 | teP2 = zeros(testSize)
135 |
136 | # sum of midclasses
137 | teP3 = larclasPred[larclass]
138 |
139 | # count bias of midclass and update larclass
140 | label = array(teL)
141 | totalCount += testSize
142 |
143 | bias1 = sum((teP1-label)*(teP1-label))
144 | bias2 = sum((teP2-label)*(teP2-label))
145 | bias3 = sum((teP3-label)*(teP3-label))
146 | if (bias3 <= bias1 and bias3 <= bias2):
147 | totalBias += bias3
148 | bias3 = math.sqrt(bias3/testSize)
149 | print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3)
150 | lcModelChoose.append(3)
151 | elif (bias1 <= bias2):
152 | totalBias += bias1
153 | bias1 = math.sqrt(bias1/testSize)
154 | print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1)
155 | lcModelChoose.append(1)
156 | else:
157 | totalBias += bias2
158 | bias2 = math.sqrt(bias2/testSize)
159 | print "(Larclass %d select kNN, accuracy: %f)" % (larclass, bias2)
160 | lcModelChoose.append(2)
161 |
162 | totalBias = math.sqrt(totalBias/totalCount)
163 | print "(Predict finished, accuracy: %f)" % (totalBias)
164 | loader.closeFiles()
165 |
166 | def submit(trainSize):
167 | global larclasPred, ap
168 | larclasPred = {}
169 |
170 | f1 = open("submit.csv", "r")
171 | submit_csv = csv.reader(f1)
172 | submit_csv.next()
173 | f2 = open('submit1.csv', 'wb')
174 | writer = csv.writer(f2)
175 |
176 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
177 | loader.setSize(trainSize)
178 |
179 | # middle class
180 | current = 0
181 | while (True):
182 | midclass, trD, trL, teD, teL = loader.getNextMidClass()
183 | if (midclass == 0):
184 | break
185 | else:
186 | if (modelChoose[current] == 1):
187 | try:
188 | model = ap.sarimaTrain(midclass, trL)
189 | teP = ap.sarimaPredict(model, 30)
190 | except:
191 | print("%d: failed to use arima, use kNN instead" % midclass)
192 | teP = KNN_interface.knn(trL, 30)
193 | elif (modelChoose[current] == 2):
194 | teP = KNN_interface.knn(trL, 30)
195 | else:
196 | teP = zeros(30)
197 | current += 1
198 |
199 | for x in teP:
200 | x_int = round(x)
201 | row = submit_csv.next()
202 | if (int(row[0]) != midclass):
203 | raise KeyError
204 | writer.writerow([row[0], row[1], x_int])
205 |
206 | # count larclass
207 | larclass = int(midclass/100)
208 | if (larclass in larclasPred):
209 | larclasPred[larclass] += teP
210 | else:
211 | larclasPred[larclass] = teP
212 |
213 | # large class
214 | current = 0
215 | while (True):
216 | larclass, trD, trL, teD, teL = loader.getNextLarClass()
217 | if (larclass == 0):
218 | break
219 | else:
220 | if (lcModelChoose[current] == 1):
221 | try:
222 | model = ap.sarimaTrain(larclass, trL)
223 | teP = ap.sarimaPredict(model, 30)
224 | except:
225 | print("%d: failed to use arima, use kNN instead" % larclass)
226 | teP = KNN_interface.knn(trL, 30)
227 | elif (lcModelChoose[current] == 2):
228 | teP = KNN_interface.knn(trL, 30)
229 | else:
230 | teP = larclasPred[larclass]
231 | current += 1
232 |
233 | # write file - larclass
234 | for x in teP:
235 | x_int = round(x)
236 | row = submit_csv.next()
237 | if (int(row[0]) != larclass):
238 | raise KeyError
239 | writer.writerow([row[0], row[1], x_int])
240 |
241 | f1.close()
242 | f2.close()
243 | loader.closeFiles()
244 |
245 | modelselect(75, 30, 15)
246 | """
247 | with open("report.txt", "w") as f:
248 | for clas in arimaParaChoose:
249 | f.writelines("class %d: (%d,%d)\n" % (clas, arimaParaChoose[clas][0], arimaParaChoose[clas][1]))
250 | """
251 | submit(120)
--------------------------------------------------------------------------------
/arima_knn/modify_submit.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 |
4 | # 读取原预测文件,预测结果取整再写回去
5 | def get_round():
6 | rows = []
7 | with open('submit.csv') as input_file:
8 | input_csv = csv.reader(input_file)
9 | rows.append(next(input_csv))
10 | for row in input_csv:
11 | row[2] = str(int(round(float(row[2]))))
12 | rows.append(row)
13 | with open('submit.csv', 'w', newline='') as output_file:
14 | output_csv = csv.writer(output_file)
15 | for row in rows:
16 | output_csv.writerow(row)
17 |
18 |
19 | # 将预测文件中编码为code的类别预测值用pred替换
20 | def change_pred(code, pred):
21 | rows = []
22 | file_name = 'submit_WJ_2.csv'
23 | with open(file_name) as input_file:
24 | input_csv = csv.reader(input_file)
25 | rows.append(next(input_csv))
26 | i = 0
27 | for row in input_csv:
28 | if row[0] == code:
29 | rows.append([code, row[1], str(pred[i])])
30 | i += 1
31 | else:
32 | rows.append(row)
33 | with open(file_name, 'w', newline='') as output_file:
34 | output_csv = csv.writer(output_file)
35 | for row in rows:
36 | output_csv.writerow(row)
37 |
38 |
39 | if __name__ == '__main__':
40 | get_round()
41 |
--------------------------------------------------------------------------------
/arima_xgboost/arima_xgboost.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import xgboost as xgb
9 | import arimaPredicter
10 | import dataLoader
11 |
12 | from numpy import array
13 | from numpy import zeros
14 | import csv
15 | import math
16 | import datetime as dt
17 |
18 | ap = arimaPredicter.predicter()
19 | ap.createIndex(dt.datetime(2015,1,1), 243)
20 |
21 | modelChoose = {}
22 |
23 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
24 | with open('compare.csv', 'ab') as f:
25 | writer = csv.writer(f)
26 | count = 1
27 | writer.writerow([midclass, accuracy])
28 | for x in trainLabl:
29 | writer.writerow([count, x])
30 | count += 1
31 | for x in range(0, len(testPred)):
32 | writer.writerow([count, testLabl[x], testPred[x]])
33 | count += 1
34 |
35 | def xgboostPredict(trainData, trainLabel, dataToPredict,
36 | params = {"objective":"reg:linear", "max_depth":1, "gamma":2}):
37 | dtrain = xgb.DMatrix(trainData, trainLabel)
38 | gbm = xgb.train(dtrain=dtrain, params=params)
39 | return gbm.predict(xgb.DMatrix(dataToPredict))
40 |
41 | def simulateFeature(trainData, musk):
42 | for feature in trainData:
43 | for i in musk:
44 | feature[i] = 0
45 |
46 | def createFeature(date_from, length, zeros, DictHoilday, DictBeforeHoilday,
47 | DictWorkday):
48 | delta = dt.timedelta(days=1)
49 | now = date_from
50 | index = []
51 | for i in range(0, length):
52 | index.append(now)
53 | now = now + delta
54 | feature = []
55 | empty = [0 for x in range(0, zeros+4)]
56 | for i in range(0, length):
57 | x = empty[:]
58 | x[0] = index[i].day
59 | x[1] = (index[i].weekday() + 1) % 7
60 | dayCount = i + 1
61 | if (dayCount in DictHoilday):
62 | x[3] = 1
63 | elif (dayCount in DictBeforeHoilday):
64 | x[2] = 1
65 | elif (dayCount in DictWorkday):
66 | if (x[1]==6 or ((dayCount+1) in DictHoilday)):
67 | x[2] = 1
68 | elif (x[1]==0 or x[1]==6):
69 | x[3] = 1
70 | elif (x[1]==5):
71 | x[2] = 1
72 | feature.append(x)
73 | return feature
74 |
75 | def setModel(clas, model):
76 | global modelChoose
77 | if (clas not in modelChoose):
78 | modelChoose[clas] = model
79 | elif (model < modelChoose[clas]):
80 | modelChoose[clas] = model
81 |
82 | def modelselect(trainSize, testSize, skipSize = 0):
83 | larclasPred = {}
84 | totalBias = 0
85 | totalCount = 0
86 |
87 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
88 | loader.setSize(trainSize, testSize, skipSize)
89 |
90 | # middle class
91 | while (True):
92 | midclass, trD, trL, teD, teL = loader.getNextMidClass()
93 | if (midclass == 0):
94 | break
95 | else:
96 |
97 | # sarima model
98 | try:
99 | model = ap.sarimaTrain(midclass, trL, teL)
100 | teP1 = ap.sarimaPredict(model, testSize)
101 | except:
102 | teP1 = zeros(testSize)
103 |
104 | # xgboost model
105 | simulateFeature(teD, [-2, -1])
106 | try:
107 | teP2 = xgboostPredict(array(trD), array(trL), array(teD))
108 | except:
109 | teP2 = zeros(testSize)
110 |
111 | # just zero
112 | teP3 = zeros(testSize)
113 |
114 | # count bias of midclass and update larclass
115 | label = array(teL)
116 | larclass = int(midclass/100)
117 | totalCount += testSize
118 |
119 | bias1 = sum((teP1-label)*(teP1-label))
120 | bias2 = sum((teP2-label)*(teP2-label))
121 | bias3 = sum((teP3-label)*(teP3-label))
122 | if (bias3 <= bias1 and bias3 <= bias2):
123 | totalBias += bias3
124 | bias3 = math.sqrt(bias3/testSize)
125 | print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3)
126 | setModel(midclass, 3)
127 | if (larclass in larclasPred):
128 | larclasPred[larclass] += teP3
129 | else:
130 | larclasPred[larclass] = teP3
131 | elif (bias1 <= bias2):
132 | totalBias += bias1
133 | bias1 = math.sqrt(bias1/testSize)
134 | print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1)
135 | setModel(midclass, 1)
136 | if (larclass in larclasPred):
137 | larclasPred[larclass] += teP1
138 | else:
139 | larclasPred[larclass] = teP1
140 | else:
141 | totalBias += bias2
142 | bias2 = math.sqrt(bias2/testSize)
143 | print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2)
144 | setModel(midclass, 2)
145 | if (larclass in larclasPred):
146 | larclasPred[larclass] += teP2
147 | else:
148 | larclasPred[larclass] = teP2
149 |
150 | # large class
151 | while (True):
152 | larclass, trD, trL, teD, teL = loader.getNextLarClass()
153 | if (larclass == 0):
154 | break
155 | else:
156 |
157 | # sarima model
158 | try:
159 | model = ap.sarimaTrain(larclass, trL, teL)
160 | teP1 = ap.sarimaPredict(model, testSize)
161 | except:
162 | teP1 = zeros(testSize)
163 |
164 | # xgboost model
165 | simulateFeature(teD, [-2, -1])
166 | try:
167 | teP2 = xgboostPredict(array(trD), array(trL), array(teD))
168 | except:
169 | teP2 = zeros(testSize)
170 |
171 | # sum of midclasses
172 | try:
173 | teP3 = larclasPred[larclass]
174 | except:
175 | teP3 = zeros(testSize)
176 |
177 | # count bias of midclass and update larclass
178 | label = array(teL)
179 | totalCount += testSize
180 |
181 | bias1 = sum((teP1-label)*(teP1-label))
182 | bias2 = sum((teP2-label)*(teP2-label))
183 | bias3 = sum((teP3-label)*(teP3-label))
184 | if (bias3 <= bias1 and bias3 <= bias2):
185 | totalBias += bias3
186 | bias3 = math.sqrt(bias3/testSize)
187 | print "(Larclass %d select SUM, accuracy: %f)" % (larclass, bias3)
188 | setModel(larclass, 3)
189 | elif (bias1 <= bias2):
190 | totalBias += bias1
191 | bias1 = math.sqrt(bias1/testSize)
192 | print "(Larclass %d select SARIMA, accuracy: %f)" % (larclass, bias1)
193 | setModel(larclass, 1)
194 | else:
195 | totalBias += bias2
196 | bias2 = math.sqrt(bias2/testSize)
197 | print "(Larclass %d select XGBOOST, accuracy: %f)" % (larclass, bias2)
198 | setModel(larclass, 2)
199 |
200 | totalBias = math.sqrt(totalBias/totalCount)
201 | print "(Predict finished, accuracy: %f)" % (totalBias)
202 | loader.closeFiles()
203 |
204 | def submit(trainSize):
205 | global larclasPred
206 | larclasPred = {}
207 | f1 = open("example.csv", "r")
208 | submit_csv = csv.reader(f1)
209 | row = submit_csv.next()
210 | f2 = open('submit.csv', 'wb')
211 | writer = csv.writer(f2)
212 | writer.writerow(row)
213 |
214 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
215 | loader.setSize(trainSize)
216 |
217 | preDate = range(0, 9) + range(10, 59)
218 |
219 | # middle class
220 | goal = createFeature(dt.datetime(2015,9,1), 59, 2,
221 | range(31, 38), [30], [39, 40])
222 |
223 | while (True):
224 | midclass, trD, trL, teD, teL = loader.getNextMidClass()
225 | if (midclass == 0):
226 | break
227 | else:
228 | if (modelChoose[midclass] == 1):
229 | try:
230 | model = ap.sarimaTrain(midclass, trL)
231 | teP = ap.sarimaPredict(model, 59)
232 | except:
233 | print("%d: failed to use arima, use xgboost instead" % midclass)
234 | teP = xgboostPredict(array(trD), array(trL), array(goal))
235 | elif (modelChoose[midclass] == 2):
236 | teP = xgboostPredict(array(trD), array(trL), array(goal))
237 | else:
238 | teP = zeros(59)
239 |
240 | for i in preDate:
241 | x_int = round(teP[i])
242 | if (x_int < 0):
243 | x_int = 0
244 | row = submit_csv.next()
245 | if (int(row[0]) != midclass):
246 | raise KeyError
247 | writer.writerow([row[0], row[1], x_int])
248 |
249 | # count larclass
250 | larclass = int(midclass/100)
251 | if (larclass in larclasPred):
252 | larclasPred[larclass] += teP
253 | else:
254 | larclasPred[larclass] = teP
255 |
256 | # large class
257 | goal = createFeature(dt.datetime(2015,9,1), 59, 1,
258 | range(31, 38), [30], [39, 40])
259 |
260 | while (True):
261 | larclass, trD, trL, teD, teL = loader.getNextLarClass()
262 | if (larclass == 0):
263 | break
264 | else:
265 | if (modelChoose[larclass] == 1):
266 | try:
267 | model = ap.sarimaTrain(larclass, trL)
268 | teP = ap.sarimaPredict(model, 59)
269 | except:
270 | print("%d: failed to use arima, use xgboost instead" % larclass)
271 | teP = xgboostPredict(array(trD), array(trL), array(goal))
272 | elif (modelChoose[larclass] == 2):
273 | teP = xgboostPredict(array(trD), array(trL), array(goal))
274 | else:
275 | try:
276 | teP = larclasPred[larclass]
277 | except:
278 | teP = zeros(59)
279 |
280 | # write file - midclass
281 | for i in preDate:
282 | x_int = round(teP[i])
283 | if (x_int < 0):
284 | x_int = 0
285 | row = submit_csv.next()
286 | if (int(row[0]) != larclass):
287 | raise KeyError
288 | writer.writerow([row[0], row[1], x_int])
289 |
290 | f1.close()
291 | f2.close()
292 | loader.closeFiles()
293 |
294 | modelselect(200, 43, 0)
295 | para = ap.getPara()
296 | submit(243)
--------------------------------------------------------------------------------
/arima_xgboost/arima_xgboost_multi.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import arimaPredicter
9 | import dataLoader
10 | import xgboostPredicter
11 |
12 | from numpy import array
13 | from numpy import zeros
14 | import csv
15 | import math
16 | import datetime as dt
17 |
18 | aps = []
19 | for i in range(0, 3):
20 | ap = arimaPredicter.predicter()
21 | ap.createIndex(dt.datetime(2015,1,1), 243)
22 | aps.append(ap)
23 |
24 | xgp = xgboostPredicter.predicter()
25 |
26 | modelChoose = {}
27 |
28 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
29 | with open('compare.csv', 'ab') as f:
30 | writer = csv.writer(f)
31 | count = 1
32 | writer.writerow([midclass, accuracy])
33 | for x in trainLabl:
34 | writer.writerow([count, x])
35 | count += 1
36 | for x in range(0, len(testPred)):
37 | writer.writerow([count, testLabl[x], testPred[x]])
38 | count += 1
39 |
40 | def setModel(clas, model):
41 | global modelChoose
42 | if (clas not in modelChoose):
43 | modelChoose[clas] = [model]
44 | elif (model < modelChoose[clas]):
45 | modelChoose[clas].append(model)
46 |
47 | def trainAndCompare(ap, clas, trD, trL, teD, teL, teP3):
48 | testSize = len(teL)
49 | # sarima model
50 | try:
51 | (_, teP1) = ap.sarimaParaSelect(clas, trL, teL)
52 | except:
53 | teP1 = zeros(testSize)
54 |
55 | # xgboost model
56 | xgp.simulateFeature(teD, [-2, -1])
57 | try:
58 | model = xgp.xgboostTrain(trD, trL)
59 | teP2 = xgp.xgboostPredict(model, teD)
60 | except:
61 | teP2 = zeros(testSize)
62 |
63 | label = array(teL)
64 | bias1 = sum((teP1-label)*(teP1-label))
65 | bias2 = sum((teP2-label)*(teP2-label))
66 | bias3 = sum((teP3-label)*(teP3-label))
67 | if (bias3 <= bias1 and bias3 <= bias2):
68 | return (3, bias3, teP3)
69 | elif (bias1 <= bias2):
70 | return (1, bias1, teP1)
71 | else:
72 | return (2, bias2, teP2)
73 |
74 | def modelselect(ap, trainSize, testSize, skipSize = 0):
75 | larclasPred = {}
76 | totalBias = 0
77 | totalCount = 0
78 |
79 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
80 | loader.setSize(trainSize, testSize, skipSize)
81 |
82 | # middle class
83 | while (True):
84 | midclass, trD, trL, teD, teL = loader.getNextMidClass()
85 | if (midclass == 0):
86 | break
87 | else:
88 | (model, bias, teP) = trainAndCompare(ap, midclass, trD, trL, teD, teL, zeros(testSize))
89 |
90 | larclass = int(midclass/100)
91 | totalCount += testSize
92 | totalBias += bias
93 | bias = math.sqrt(bias/testSize)
94 | print("(Midclass %d select model %d, accuracy: %f)" % (midclass, model, bias))
95 | setModel(midclass, model)
96 | if (larclass in larclasPred):
97 | larclasPred[larclass] += teP
98 | else:
99 | larclasPred[larclass] = teP
100 |
101 | # large class
102 | while (True):
103 | larclass, trD, trL, teD, teL = loader.getNextLarClass()
104 | if (larclass == 0):
105 | break
106 | else:
107 | if (larclass in larclasPred):
108 | (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, larclasPred[larclass])
109 | else:
110 | (model, bias, teP) = trainAndCompare(ap, larclass, trD, trL, teD, teL, zeros(testSize))
111 |
112 | totalCount += testSize
113 | totalBias += bias
114 | bias = math.sqrt(bias/testSize)
115 | print("(Larclass %d select model %d, accuracy: %f)" % (larclass, model, bias))
116 | setModel(larclass, model)
117 |
118 | totalBias = math.sqrt(totalBias/totalCount)
119 | print("(Predict finished, accuracy: %f)" % (totalBias))
120 | loader.closeFiles()
121 |
122 | def writeClass(clas, result, dates, checker, writer):
123 | for i in dates:
124 | x_int = round(result[i])
125 | if (x_int < 0):
126 | x_int = 0
127 | row = checker.next()
128 | if (int(row[0]) != clas):
129 | raise KeyError
130 | writer.writerow([row[0], row[1], x_int])
131 |
132 | def predictClass(clas, cvSize, trD, trL, teD, teP3):
133 | teP = zeros(59)
134 | count = cvSize
135 | for i in range(0, cvSize):
136 | if (modelChoose[clas][i] == 1):
137 | try:
138 | model = aps[i].sarimaTrain(trL, clas)
139 | teP += aps[i].sarimaPredict(model, 59)
140 | except:
141 | print("%d: failed to use arima" % clas)
142 | count -= 1
143 | elif (modelChoose[clas][i] == 2):
144 | model = xgp.xgboostTrain(trD, trL)
145 | teP += xgp.xgboostPredict(model, teD)
146 | else:
147 | teP += teP3
148 |
149 | if (count == 0):
150 | print("%d: failed to use arima at all, only use xgboost" % clas)
151 | model = xgp.xgboostTrain(trD, trL)
152 | teP = xgp.xgboostPredict(model, teD)
153 | else:
154 | teP = teP / count
155 | return teP
156 |
157 |
158 | def submit(trainSize, cvSize):
159 | larclasPred = {}
160 | f1 = open("example.csv", "r")
161 | submit_csv = csv.reader(f1)
162 | row = submit_csv.next()
163 | f2 = open('submit.csv', 'wb')
164 | writer = csv.writer(f2)
165 | writer.writerow(row)
166 |
167 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
168 | loader.setSize(trainSize)
169 |
170 | preDate = range(0, 9) + range(10, 59)
171 |
172 | # middle class
173 | goal = xgp.createFeature(dt.datetime(2015,9,1), 59, 2,
174 | range(31, 38), [30], [39, 40])
175 |
176 | while (True):
177 | midclass, trD, trL, teD, teL = loader.getNextMidClass()
178 | if (midclass == 0):
179 | break
180 | else:
181 | teP = predictClass(midclass, cvSize, trD, trL, goal, zeros(59))
182 | writeClass(midclass, teP, preDate, submit_csv, writer)
183 |
184 | # count larclass
185 | larclass = int(midclass/100)
186 | if (larclass in larclasPred):
187 | larclasPred[larclass] += teP
188 | else:
189 | larclasPred[larclass] = teP
190 |
191 | # large class
192 | goal = xgp.createFeature(dt.datetime(2015,9,1), 59, 1,
193 | range(31, 38), [30], [39, 40])
194 |
195 | while (True):
196 | larclass, trD, trL, teD, teL = loader.getNextLarClass()
197 | if (larclass == 0):
198 | break
199 | else:
200 | if (larclass in larclasPred):
201 | teP = predictClass(larclass, cvSize, trD, trL, goal, larclasPred[larclass])
202 | else:
203 | teP = predictClass(larclass, cvSize, trD, trL, goal, zeros(59))
204 | writeClass(larclass, teP, preDate, submit_csv, writer)
205 |
206 | f1.close()
207 | f2.close()
208 | loader.closeFiles()
209 |
210 | modelselect(aps[0], 210, 28, 5)
211 | modelselect(aps[1], 180, 28, 35)
212 | modelselect(aps[2], 150, 28, 65)
213 | submit(243, 3)
--------------------------------------------------------------------------------
/arima_xgboost/statanalysis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import datetime as dt
9 | import pandas as pd
10 | import numpy as np
11 |
12 | import csv
13 | import math
14 | import arimaPredicter
15 |
16 | temp = []
17 |
18 | index = [dt.datetime(2015,1,x) for x in range(1, 32)]
19 | index = index + [dt.datetime(2015,2,x) for x in (range(1 ,29))]
20 | index = index + [dt.datetime(2015,3,x) for x in range(1, 32)]
21 | index = index + [dt.datetime(2015,4,x) for x in range(1, 31)]
22 |
23 | def getData(csvReader, trainCount, testCount):
24 | trainData = []
25 | testData = []
26 | trainLabel = []
27 | testLabel = []
28 | try:
29 | for x in range(0, trainCount):
30 | row = next(csvReader)
31 | """
32 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
33 | float(row[7]), float(row[8]), float(row[9]), float(row[10]),
34 | float(row[11]), float(row[12])]
35 | """
36 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
37 | float(row[7]), float(row[8])]
38 | trainData.append(data)
39 | trainLabel.append(float(row[15]))
40 | for x in range(0, testCount):
41 | row = next(csvReader)
42 | """
43 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
44 | float(row[7]), float(row[8]), float(row[9]), float(row[10]),
45 | float(row[11]), float(row[12])]
46 | """
47 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
48 | float(row[7]), float(row[8])]
49 | testData.append(data)
50 | testLabel.append(float(row[15]))
51 | return int(row[0]), trainData, trainLabel, testData, testLabel
52 | except StopIteration:
53 | return 0, [], [], [], []
54 |
55 | def getLCData(csvReader, trainCount, testCount):
56 | trainData = []
57 | testData = []
58 | trainLabel = []
59 | testLabel = []
60 | try:
61 | for x in range(0, trainCount):
62 | row = next(csvReader)
63 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
64 | float(row[7])]
65 | trainData.append(data)
66 | trainLabel.append(float(row[14]))
67 | for x in range(0, testCount):
68 | row = next(csvReader)
69 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
70 | float(row[7])]
71 | testData.append(data)
72 | testLabel.append(float(row[14]))
73 | return int(row[0]), trainData, trainLabel, testData, testLabel
74 | except StopIteration:
75 | return 0, [], [], [], []
76 |
77 | def getBias(label, pred):
78 | a1 = np.array(label)
79 | a2 = np.array(pred)
80 | if (a1.__len__() != a2.__len__()):
81 | raise ValueError("length not equel")
82 | m = a1 - a2
83 | return math.sqrt(sum(m*m)/a1.__len__())
84 |
85 | def sariamTest():
86 | f = open("datam.csv", "r")
87 | f_csv = csv.reader(f)
88 |
89 | # writer = open("report.txt", "w")
90 |
91 | ap = arimaPredicter.predicter();
92 | ap.setIndex(index)
93 |
94 | for i in range(0, 10):
95 | midclass, trD, trL, teD, teL = getData(f_csv, 120, 0)
96 | if (midclass == 0):
97 | break
98 |
99 | trainData = trL[:99]
100 | testData = trL[99:]
101 |
102 | ap.test_stationarity(trL)
103 |
104 | greatfit = (0, 0, 0)
105 | minaic = 99999
106 |
107 | for p in range(0, 3):
108 | for q in range(0, 3):
109 | try:
110 | ap.setPara(midclass, (p, q))
111 | model = ap.sarimaTrain(midclass, trainData)
112 | if (model.aic < minaic):
113 | minaic = model.aic
114 | greatfit = (p, 1, q)
115 | result = ap.sarimaPredict(model, len(testData))
116 | print("(%d,%d) %f %f\n" % (p, q, model.aic, getBias(testData, result)))
117 |
118 | except:
119 | pass
120 |
121 | print("midclass %d: %d %d\n" % (midclass, greatfit[0], greatfit[2]))
122 |
123 | f.close()
124 | #writer.close()
125 | """
126 | def test_Ljung_Box(timeseries, l):
127 | acf, q, p = sm.tsa.acf(timeseries, nlags=l, qstat=True)
128 | out = np.c_[range(1, l+1), acf[1:], q, p]
129 | output=pd.DataFrame(out, columns=['lag', "AC", "Q", "P-value"])
130 | output = output.set_index('lag')
131 | print output
132 |
133 | import arch
134 |
135 | def sariamGarchTest():
136 | global larclasPred, larclasLabl, totalBias, totalCount, temp
137 | f = open("datam.csv", "r")
138 | f_csv = csv.reader(f)
139 |
140 | for i in range(0, 1):
141 | midclass, trD, trL, teD, teL = getData(f_csv, 120, 0)
142 | if (midclass == 0):
143 | break
144 | # print trL
145 | data0 = pd.Series(trL)
146 | data0.index = pd.Index(index)
147 |
148 | trainData = data0[:dt.datetime(2015,4,9)]
149 | testData = data0[dt.datetime(2015,4,10):]
150 |
151 | model = SARIMAX(trainData, order=(1,1,1), seasonal_order=(0,1,1,7))
152 | result = model.fit()
153 |
154 | at = trainData - result.fittedvalues
155 | #plt.plot(at, color='red')
156 | #plt.show(block=False)
157 |
158 | at2 = np.square(at)
159 | plt.plot(at2, color='red')
160 | plt.show(block=False)
161 | #test_Ljung_Box(at2, 10)
162 |
163 | amodel = arch.arch_model(at2)
164 | aresult = amodel.fit(disp='off')
165 | aresult.summary()
166 | temp.append(aresult)
167 | output1 = result.forecast(trL.__len__()-trainData.__len__())
168 | forecasts = aresult.forecast(horizon=5, start=dt.datetime(2015,4,9))
169 | print forecasts.mean[dt.datetime(2015,4,9):]
170 | print forecasts.variance[dt.datetime(2015,4,9):]
171 | f.close()
172 | """
173 | sariamTest()
--------------------------------------------------------------------------------
/data/train.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/data/train.csv
--------------------------------------------------------------------------------
/dataLoader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import csv
9 |
10 | class loader():
11 | def __init__(self, midClassFile = "", LarClassFile = ""):
12 | if (midClassFile != ""):
13 | self.mid_f = open(midClassFile, "r")
14 | self.mid_f_csv = csv.reader(self.mid_f)
15 | else:
16 | self.mid_f = None
17 | self.mid_f_csv = None
18 | if (LarClassFile != ""):
19 | self.lar_f = open(LarClassFile, "r")
20 | self.lar_f_csv = csv.reader(self.lar_f)
21 | else:
22 | self.lar_f = None
23 | self.lar_f_csv = None
24 | self.trainCount = 120
25 | self.testCount = 0
26 | self.skipCount = 0
27 | self.midClassFeature = range(3, 9)
28 | self.midSuffix = []
29 | self.larClassFeature = range(3, 8)
30 | self.larSuffix = []
31 |
32 |
33 | def setFile(self, midClassFile = "", LarClassFile = ""):
34 | if (midClassFile != ""):
35 | try:
36 | self.mid_f.close()
37 | except:
38 | pass
39 | self.mid_f = open(midClassFile, "r")
40 | self.mid_f_csv = csv.reader(self.mid_f)
41 | if (LarClassFile != ""):
42 | try:
43 | self.lar_f.close()
44 | except:
45 | pass
46 | self.lar_f = open(LarClassFile, "r")
47 | self.lar_f_csv = csv.reader(self.lar_f)
48 |
49 | def closeFiles(self):
50 | try:
51 | self.mid_f.close()
52 | except:
53 | pass
54 | try:
55 | self.lar_f.close()
56 | except:
57 | pass
58 |
59 | def setSize(self, train, test = 0, skip = 0):
60 | self.trainCount = train
61 | self.testCount = test
62 | self.skipCount = skip
63 |
64 | def setMidClassFeature(self, feature=[], suffix=[]):
65 | self.midClassFeature = feature
66 | self.midSuffix = suffix
67 |
68 | def setLarClassFeature(self, feature=[], suffix=[]):
69 | self.larClassFeature = feature
70 | self.larSuffix = suffix
71 |
72 | def getNextMidClass(self):
73 | trainData = []
74 | testData = []
75 | trainLabel = []
76 | testLabel = []
77 | try:
78 | for x in range(0, self.trainCount):
79 | row = next(self.mid_f_csv)
80 | data = []
81 | for y in self.midClassFeature:
82 | data.append(float(row[y]))
83 | data = data + self.midSuffix
84 | trainData.append(data)
85 | trainLabel.append(float(row[-1]))
86 |
87 | for x in range(0, self.testCount):
88 | row = next(self.mid_f_csv)
89 | data = []
90 | for y in self.midClassFeature:
91 | data.append(float(row[y]))
92 | data = data + self.midSuffix
93 | testData.append(data)
94 | testLabel.append(float(row[-1]))
95 |
96 | for x in range(0, self.skipCount):
97 | next(self.mid_f_csv)
98 | return int(row[0]), trainData, trainLabel, testData, testLabel
99 | except StopIteration:
100 | return 0, [], [], [], []
101 |
102 | def getNextLarClass(self):
103 | trainData = []
104 | testData = []
105 | trainLabel = []
106 | testLabel = []
107 | try:
108 | for x in range(0, self.trainCount):
109 | row = next(self.lar_f_csv)
110 | data = []
111 | for y in self.larClassFeature:
112 | data.append(float(row[y]))
113 | data = data + self.larSuffix
114 | trainData.append(data)
115 | trainLabel.append(float(row[-1]))
116 |
117 | for x in range(0, self.testCount):
118 | row = next(self.lar_f_csv)
119 | data = []
120 | for y in self.larClassFeature:
121 | data.append(float(row[y]))
122 | data = data + self.larSuffix
123 | testData.append(data)
124 | testLabel.append(float(row[-1]))
125 |
126 | for x in range(0, self.skipCount):
127 | next(self.lar_f_csv)
128 | return int(row[0]), trainData, trainLabel, testData, testLabel
129 | except StopIteration:
130 | return 0, [], [], [], []
--------------------------------------------------------------------------------
/doc/compare.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/compare.docx
--------------------------------------------------------------------------------
/doc/manual.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Nov 28 15:14:17 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | # xgboostPredicter, arimaPredicter快速上手指南
9 | # 最后更新 20171128
10 |
11 | import arimaPredicter
12 | import dataLoader
13 | import xgboostPredicter
14 |
15 | import matplotlib.pylab as plt
16 | import datetime as dt
17 |
18 | #读取训练数据,用其他方式读取也可以
19 | loader = dataLoader.loader("datam.csv")
20 | loader.setSize(200, 43, 0)
21 | midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass()
22 |
23 | plt.plot(trainLabel)
24 | plt.title('Train Label')
25 | plt.show(block=False)
26 |
27 | def arimaPredict():
28 | # 首先创建类实例
29 | ap = arimaPredicter.predicter()
30 | # 设置索引,函数的第一个参数是训练数据开始的日期,第二次参数是索引的长度,索引
31 | # 长度不小于训练数据的长度即可
32 | ap.createIndex(dt.datetime(2015,1,1), 243)
33 | # 可以直接调用sarimaTrain函数训练arima模型,只需将训练标签输入即可
34 | model = ap.sarimaTrain(trainLabel)
35 | # 得到模型后调用sarimaPredict函数便可以预测紧接着训练数据之后若干天的预测值,
36 | # 两个参数分别为先前得到的模型与预测序列的长度
37 | # 这是一个静态函数,可以直接通过类名调用
38 | predictLabel = ap.sarimaPredict(model, 43)
39 | # 这样便可以得到结果
40 | plt.plot(testLabel, color='blue',label='actual')
41 | plt.plot(predictLabel, color='red',label='predict')
42 | plt.title('ARIMA(default)')
43 | plt.show(block=False)
44 |
45 | # 事实上,在sarimaTrain函数中,你也可以指定ARIMA模型的两个参数(ar, ma)
46 | model = ap.sarimaTrain(trainLabel, para=(2, 2))
47 | # 如果参数指定得当,结果将更好,反之更糟糕
48 | predictLabel = ap.sarimaPredict(model, 43)
49 | plt.plot(testLabel, color='blue',label='actual')
50 | plt.plot(predictLabel, color='red',label='predict')
51 | plt.title('ARIMA(2, 2)')
52 | plt.show(block=False)
53 |
54 | # 如果你不知道该指定什么参数,那么可以使用sarimaParaSelect函数选择参数,该函数
55 | # 的输入为类别名称,训练集,测试集及决定在参数选择时是否参考AIC的布尔变量
56 | # 目前来看,在参数选择时是否参考AIC的结果差不多
57 | # 函数执行后将会返回最优的参数以及测试集上的运行结果,同时实例中也会以类别名称为
58 | # 键存储这个最优参数
59 | para, _ = ap.sarimaParaSelect(1001, trainLabel[:-50], trainLabel[-50:], True)
60 |
61 | # 由于最优参数已被存储,再次训练是指明类别名称即可
62 | model = ap.sarimaTrain(trainLabel, classNo=1001)
63 | # 预测的方式始终相同
64 | predictLabel = ap.sarimaPredict(model, 43)
65 | plt.plot(testLabel, color='blue',label='actual')
66 | plt.plot(predictLabel, color='red',label='predict')
67 | plt.title('ARIMA(%d, %d)' % (para[0], para[1]))
68 | plt.show(block=False)
69 |
70 | # 需要注意的是,当模型不等收敛时,sarimaTrain函数与sarimaParaSelect函数都有可能
71 | # 抛出异常
72 |
73 | def xgboostPredict():
74 | # 首先创建类实例
75 | xgp = xgboostPredicter.predicter()
76 |
77 | # 可以直接调用xgboostTrain函数训练xgboost模型,输入为训练集的特征和对应的标签
78 | model = xgp.xgboostTrain(trainData, trainLabel)
79 |
80 | # 得到模型后调用xgboostPredict函数便可以根据测试集的特征得到对应的预测值
81 | # 这是一个静态函数,可以直接通过类名调用
82 | predictLabel = xgp.xgboostPredict(model, testData)
83 | # 这样便可以得到结果
84 | plt.plot(testLabel, color='blue',label='actual')
85 | plt.plot(predictLabel, color='red',label='predict')
86 | plt.title('xgboost(default)')
87 | plt.show(block=False)
88 |
89 | # 在predicter类中,还有两个静态的工具函数:
90 | # simulateFeature函数用于将特征向量的某些位清空,如
91 | xgp.simulateFeature(testData, [-2, -1])
92 | # 可以清空测试集中所有特征向量的后两位(在我的特征定义中对应促销信息),这将使
93 | # 在测试集上的结果更加真实
94 | predictLabel = xgp.xgboostPredict(model, testData)
95 | plt.plot(testLabel, color='blue',label='actual')
96 | plt.plot(predictLabel, color='red',label='predict')
97 | plt.title('xgboost(default)')
98 | plt.show(block=False)
99 |
100 | # createFeature函数用于创建测试用的特征向量,但只有在你的特征定义与我的一致时
101 | # 才能使用它
102 | # 其输入参数为 (开始日期,长度,后缀零数量,节假日列表,节假日前一天列表,工作日列表)
103 | # 列表均为对应日期的序号,从1开始计数;需要注意的是,周末自动算节假日,周五自动
104 | # 算节假日前一天,例如
105 | data = xgp.createFeature(dt.datetime(2015,9,1), 7, 1, [4], [3], [6])
106 | # 的输出为
107 | for x in data:
108 | print(x)
109 |
110 | arimaPredict()
111 | xgboostPredict()
--------------------------------------------------------------------------------
/doc/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/report.pdf
--------------------------------------------------------------------------------
/doc/report.txt:
--------------------------------------------------------------------------------
1 | (Midclass 1001 select SARIMA, accuracy: 4.164104)
2 | (Midclass 1002 select SARIMA, accuracy: 0.530263)
3 | (Midclass 1004 select SARIMA, accuracy: 2.009439)
4 | (Midclass 1005 select ZERO, accuracy: 0.377964)
5 | (Midclass 1099 select SARIMA, accuracy: 0.195678)
6 | (Midclass 1101 select SARIMA, accuracy: 1.562947)
7 | (Midclass 1102 select SARIMA, accuracy: 0.875281)
8 | (Midclass 1103 select SARIMA, accuracy: 0.415788)
9 | (Midclass 1201 select SARIMA, accuracy: 22.207918)
10 | (Midclass 1202 select SARIMA, accuracy: 2.736437)
11 | (Midclass 1203 select SARIMA, accuracy: 11.184439)
12 | (Midclass 1205 select SARIMA, accuracy: 0.846538)
13 | (Midclass 1301 select XGBOOST, accuracy: 1.802102)
14 | (Midclass 1302 select SARIMA, accuracy: 1.113745)
15 | (Midclass 1306 select XGBOOST, accuracy: 0.668731)
16 | (Midclass 1308 select SARIMA, accuracy: 3.409499)
17 | (Midclass 1501 select XGBOOST, accuracy: 1.607822)
18 | (Midclass 1502 select SARIMA, accuracy: 0.649425)
19 | (Midclass 1503 select XGBOOST, accuracy: 0.854608)
20 | (Midclass 1504 select ZERO, accuracy: 0.577350)
21 | (Midclass 1505 select SARIMA, accuracy: 5.885219)
22 | (Midclass 1507 select ZERO, accuracy: 0.000000)
23 | (Midclass 1508 select SARIMA, accuracy: 0.998574)
24 | (Midclass 1510 select SARIMA, accuracy: 1.002930)
25 | (Midclass 1511 select SARIMA, accuracy: 0.207669)
26 | (Midclass 1512 select XGBOOST, accuracy: 0.536687)
27 | (Midclass 1513 select SARIMA, accuracy: 1.473901)
28 | (Midclass 1515 select SARIMA, accuracy: 1.343283)
29 | (Midclass 1516 select XGBOOST, accuracy: 1.640316)
30 | (Midclass 1517 select XGBOOST, accuracy: 2.080571)
31 | (Midclass 1518 select SARIMA, accuracy: 5.853074)
32 | (Midclass 1519 select XGBOOST, accuracy: 1.276925)
33 | (Midclass 1521 select SARIMA, accuracy: 3.578035)
34 | (Midclass 2001 select ZERO, accuracy: 2.203893)
35 | (Midclass 2002 select SARIMA, accuracy: 1.330328)
36 | (Midclass 2003 select XGBOOST, accuracy: 1.401267)
37 | (Midclass 2004 select SARIMA, accuracy: 0.204031)
38 | (Midclass 2005 select SARIMA, accuracy: 1.984291)
39 | (Midclass 2006 select XGBOOST, accuracy: 3.567574)
40 | (Midclass 2007 select SARIMA, accuracy: 2.002037)
41 | (Midclass 2008 select SARIMA, accuracy: 2.831727)
42 | (Midclass 2009 select SARIMA, accuracy: 2.368810)
43 | (Midclass 2010 select XGBOOST, accuracy: 1.422014)
44 | (Midclass 2011 select SARIMA, accuracy: 7.890765)
45 | (Midclass 2012 select XGBOOST, accuracy: 0.779130)
46 | (Midclass 2013 select SARIMA, accuracy: 3.032853)
47 | (Midclass 2014 select SARIMA, accuracy: 4.112845)
48 | (Midclass 2015 select XGBOOST, accuracy: 1.717115)
49 | (Midclass 2101 select XGBOOST, accuracy: 0.531832)
50 | (Midclass 2102 select ZERO, accuracy: 0.377964)
51 | (Midclass 2103 select SARIMA, accuracy: 0.951623)
52 | (Midclass 2104 select SARIMA, accuracy: 1.087212)
53 | (Midclass 2105 select SARIMA, accuracy: 1.209021)
54 | (Midclass 2106 select XGBOOST, accuracy: 0.528829)
55 | (Midclass 2107 select ZERO, accuracy: 0.000000)
56 | (Midclass 2201 select SARIMA, accuracy: 5.789172)
57 | (Midclass 2202 select SARIMA, accuracy: 7.402780)
58 | (Midclass 2203 select SARIMA, accuracy: 7.110155)
59 | (Midclass 2204 select SARIMA, accuracy: 3.052818)
60 | (Midclass 2205 select SARIMA, accuracy: 3.604804)
61 | (Midclass 2206 select SARIMA, accuracy: 4.279676)
62 | (Midclass 2207 select SARIMA, accuracy: 2.793577)
63 | (Midclass 2208 select SARIMA, accuracy: 1.795416)
64 | (Midclass 2209 select XGBOOST, accuracy: 1.610905)
65 | (Midclass 2210 select SARIMA, accuracy: 3.808497)
66 | (Midclass 2211 select ZERO, accuracy: 0.377964)
67 | (Midclass 2212 select XGBOOST, accuracy: 1.402607)
68 | (Midclass 2301 select XGBOOST, accuracy: 1.707330)
69 | (Midclass 2302 select SARIMA, accuracy: 1.730345)
70 | (Midclass 2303 select XGBOOST, accuracy: 2.240427)
71 | (Midclass 2304 select XGBOOST, accuracy: 0.650331)
72 | (Midclass 2305 select SARIMA, accuracy: 1.866917)
73 | (Midclass 2306 select XGBOOST, accuracy: 3.693004)
74 | (Midclass 2307 select SARIMA, accuracy: 1.606624)
75 | (Midclass 2309 select XGBOOST, accuracy: 1.696085)
76 | (Midclass 2310 select ZERO, accuracy: 0.617213)
77 | (Midclass 2311 select XGBOOST, accuracy: 1.108243)
78 | (Midclass 2312 select SARIMA, accuracy: 0.478464)
79 | (Midclass 2313 select ZERO, accuracy: 0.308607)
80 | (Midclass 2314 select XGBOOST, accuracy: 1.468442)
81 | (Midclass 2316 select XGBOOST, accuracy: 1.258186)
82 | (Midclass 2317 select XGBOOST, accuracy: 0.640845)
83 | (Midclass 3001 select ZERO, accuracy: 0.308607)
84 | (Midclass 3002 select SARIMA, accuracy: 1.270771)
85 | (Midclass 3003 select SARIMA, accuracy: 0.708163)
86 | (Midclass 3004 select XGBOOST, accuracy: 0.470117)
87 | (Midclass 3005 select XGBOOST, accuracy: 0.946125)
88 | (Midclass 3006 select SARIMA, accuracy: 2.675442)
89 | (Midclass 3007 select SARIMA, accuracy: 1.639240)
90 | (Midclass 3008 select SARIMA, accuracy: 1.766410)
91 | (Midclass 3010 select SARIMA, accuracy: 0.838993)
92 | (Midclass 3011 select SARIMA, accuracy: 0.640106)
93 | (Midclass 3013 select SARIMA, accuracy: 2.473312)
94 | (Midclass 3014 select ZERO, accuracy: 0.218218)
95 | (Midclass 3016 select SARIMA, accuracy: 3.851208)
96 | (Midclass 3017 select SARIMA, accuracy: 0.888357)
97 | (Midclass 3018 select SARIMA, accuracy: 3.428816)
98 | (Midclass 3102 select ZERO, accuracy: 0.218218)
99 | (Midclass 3105 select XGBOOST, accuracy: 0.214763)
100 | (Midclass 3107 select SARIMA, accuracy: 0.215656)
101 | (Midclass 3109 select ZERO, accuracy: 0.218218)
102 | (Midclass 3110 select XGBOOST, accuracy: 0.681508)
103 | (Midclass 3112 select SARIMA, accuracy: 0.673105)
104 | (Midclass 3113 select SARIMA, accuracy: 0.196265)
105 | (Midclass 3114 select SARIMA, accuracy: 0.820782)
106 | (Midclass 3116 select SARIMA, accuracy: 0.859559)
107 | (Midclass 3117 select ZERO, accuracy: 0.690066)
108 | (Midclass 3118 select XGBOOST, accuracy: 1.138621)
109 | (Midclass 3119 select ZERO, accuracy: 0.308607)
110 | (Midclass 3125 select ZERO, accuracy: 0.000000)
111 | (Midclass 3126 select SARIMA, accuracy: 0.710316)
112 | (Midclass 3208 select ZERO, accuracy: 0.000000)
113 | (Midclass 3217 select ZERO, accuracy: 0.000000)
114 | (Midclass 3227 select SARIMA, accuracy: 0.470535)
115 | (Midclass 3311 select ZERO, accuracy: 0.000000)
116 | (Midclass 3316 select ZERO, accuracy: 0.000000)
117 | (Midclass 3319 select SARIMA, accuracy: 1.418897)
118 | (Midclass 3320 select XGBOOST, accuracy: 0.681791)
119 | (Midclass 3321 select ZERO, accuracy: 0.845154)
120 | (Midclass 3322 select ZERO, accuracy: 0.218218)
121 | (Midclass 3323 select ZERO, accuracy: 0.308607)
122 | (Midclass 3325 select SARIMA, accuracy: 0.306011)
123 | (Midclass 3326 select ZERO, accuracy: 0.000000)
124 | (Midclass 3402 select SARIMA, accuracy: 0.505201)
125 | (Midclass 3403 select ZERO, accuracy: 0.218218)
126 | (Midclass 3407 select XGBOOST, accuracy: 1.583984)
127 | (Midclass 3408 select ZERO, accuracy: 0.218218)
128 | (Midclass 3413 select ZERO, accuracy: 0.000000)
129 | (Midclass 3415 select SARIMA, accuracy: 0.871680)
130 | (Midclass 3417 select XGBOOST, accuracy: 0.377431)
131 | (Midclass 3423 select SARIMA, accuracy: 0.438360)
132 | (Midclass 3424 select XGBOOST, accuracy: 1.109004)
133 | (Midclass 3426 select XGBOOST, accuracy: 0.215652)
134 | (Midclass 3431 select SARIMA, accuracy: 0.555214)
135 | (Larclass 10 select SUM, accuracy: 5.288813)
136 | (Larclass 11 select SUM, accuracy: 1.967995)
137 | (Larclass 12 select SARIMA, accuracy: 29.097950)
138 | (Larclass 13 select SARIMA, accuracy: 3.669651)
139 | (Larclass 15 select SARIMA, accuracy: 15.189662)
140 | (Larclass 20 select SARIMA, accuracy: 13.969971)
141 | (Larclass 21 select SUM, accuracy: 2.007923)
142 | (Larclass 22 select SUM, accuracy: 22.782286)
143 | (Larclass 23 select SARIMA, accuracy: 9.731009)
144 | (Larclass 30 select SARIMA, accuracy: 8.978236)
145 | (Larclass 31 select SUM, accuracy: 2.468272)
146 | (Larclass 32 select SARIMA, accuracy: 0.447503)
147 | (Larclass 33 select SARIMA, accuracy: 2.195191)
148 | (Larclass 34 select SARIMA, accuracy: 2.465107)
149 |
150 | 1502: failed to use arima, use xgboost instead
151 | 2302: failed to use arima, use xgboost instead
152 | 3017: failed to use arima, use xgboost instead
153 | 3018: failed to use arima, use xgboost instead
154 |
--------------------------------------------------------------------------------
/doc/~$ompare.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/~$ompare.docx
--------------------------------------------------------------------------------
/doc/特征选择.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/doc/特征选择.xlsx
--------------------------------------------------------------------------------
/plot_pic/1001_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1001_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1002_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1002_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1004_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1004_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1005_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1005_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1006_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1006_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1007_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1007_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1099_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1099_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/10_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/10_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1101_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1101_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1102_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1102_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1103_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1103_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1104_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1104_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/11_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/11_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1201_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1201_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1202_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1202_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1203_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1203_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1205_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1205_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/12_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/12_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1301_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1301_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1302_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1302_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1306_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1306_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1308_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1308_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1399_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1399_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/13_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/13_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1401_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1401_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1402_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1402_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1403_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1403_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1404_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1404_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/14_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/14_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1501_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1501_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1502_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1502_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1503_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1503_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1504_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1504_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1505_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1505_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1507_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1507_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1508_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1508_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1509_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1509_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1510_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1510_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1511_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1511_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1512_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1512_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1513_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1513_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1514_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1514_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1515_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1515_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1516_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1516_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1517_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1517_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1518_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1518_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1519_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1519_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1520_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1520_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/1521_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/1521_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/15_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/15_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2001_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2001_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2002_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2002_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2003_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2003_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2004_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2004_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2005_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2005_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2006_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2006_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2007_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2007_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2008_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2008_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2009_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2009_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2010_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2010_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2011_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2011_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2012_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2012_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2013_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2013_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2014_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2014_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2015_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2015_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/20_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/20_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2101_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2101_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2102_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2102_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2103_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2103_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2104_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2104_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2105_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2105_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2106_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2106_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2107_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2107_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2108_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2108_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/21_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/21_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2201_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2201_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2202_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2202_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2203_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2203_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2204_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2204_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2205_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2205_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2206_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2206_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2207_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2207_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2208_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2208_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2209_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2209_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2210_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2210_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2211_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2211_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2212_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2212_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/22_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/22_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2301_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2301_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2302_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2302_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2303_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2303_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2304_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2304_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2305_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2305_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2306_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2306_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2307_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2307_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2308_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2308_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2309_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2309_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2310_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2310_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2311_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2311_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2312_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2312_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2313_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2313_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2314_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2314_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2315_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2315_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2316_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2316_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/2317_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/2317_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/23_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/23_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3001_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3001_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3002_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3002_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3003_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3003_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3004_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3004_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3005_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3005_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3006_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3006_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3007_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3007_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3008_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3008_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3009_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3009_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3010_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3010_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3011_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3011_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3012_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3012_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3013_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3013_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3014_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3014_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3015_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3015_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3016_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3016_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3017_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3017_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3018_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3018_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/30_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/30_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3101_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3101_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3102_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3102_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3104_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3104_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3105_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3105_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3106_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3106_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3107_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3107_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3108_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3108_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3109_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3109_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3110_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3110_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3111_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3111_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3112_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3112_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3113_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3113_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3114_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3114_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3115_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3115_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3116_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3116_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3117_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3117_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3118_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3118_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3119_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3119_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3120_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3120_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3121_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3121_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3122_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3122_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3125_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3125_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3126_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3126_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3128_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3128_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/31_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/31_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3208_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3208_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3212_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3212_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3213_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3213_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3215_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3215_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3216_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3216_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3217_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3217_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3218_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3218_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3227_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3227_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/32_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/32_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3301_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3301_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3303_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3303_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3311_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3311_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3313_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3313_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3314_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3314_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3315_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3315_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3316_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3316_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3317_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3317_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3319_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3319_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3320_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3320_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3321_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3321_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3322_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3322_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3323_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3323_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3325_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3325_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3326_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3326_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3328_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3328_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3330_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3330_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/33_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/33_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3401_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3401_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3402_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3402_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3403_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3403_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3404_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3404_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3405_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3405_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3406_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3406_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3407_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3407_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3408_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3408_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3409_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3409_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3410_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3410_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3412_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3412_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3413_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3413_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3414_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3414_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3415_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3415_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3416_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3416_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3417_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3417_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3419_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3419_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3421_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3421_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3423_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3423_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3424_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3424_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3426_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3426_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3427_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3427_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3428_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3428_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3429_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3429_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3431_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3431_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3432_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3432_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/3436_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/3436_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/34_customers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/34_customers.jpg
--------------------------------------------------------------------------------
/plot_pic/异常日期.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/plot_pic/异常日期.txt
--------------------------------------------------------------------------------
/rnn/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Thu Nov 30 22:01:26 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | # Naive LSTM to learn three-char window to one-char mapping
9 | import numpy
10 | from keras.models import Sequential
11 | from keras.layers import Dense
12 | from keras.layers import LSTM
13 | from keras.utils import np_utils
14 |
15 | # fix random seed for reproducibility
16 | numpy.random.seed(7)
17 | # define the raw dataset
18 | alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
19 | # create mapping of characters to integers (0-25) and the reverse
20 | char_to_int = dict((c, i) for i, c in enumerate(alphabet))
21 | int_to_char = dict((i, c) for i, c in enumerate(alphabet))
22 | # prepare the dataset of input to output pairs encoded as integers
23 | seq_length = 3
24 | dataX = []
25 | dataY = []
26 | for i in range(0, len(alphabet) - seq_length, 1):
27 | seq_in = alphabet[i:i + seq_length]
28 | seq_out = alphabet[i + seq_length]
29 | dataX.append([char_to_int[char] for char in seq_in])
30 | dataY.append(char_to_int[seq_out])
31 | print(seq_in, '->', seq_out)
32 | # reshape X to be [samples, time steps, features]
33 | X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
34 | # normalize
35 | X = X / float(len(alphabet))
36 | # one hot encode the output variable
37 | y = np_utils.to_categorical(dataY)
38 | # create and fit the model
39 | model = Sequential()
40 | model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
41 | model.add(Dense(y.shape[1], activation='softmax'))
42 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
43 | model.fit(X, y, nb_epoch=500, batch_size=1, verbose=2)
44 | # summarize performance of the model
45 | scores = model.evaluate(X, y, verbose=0)
46 | print("Model Accuracy: %.2f%%" % (scores[1]*100))
47 | # demonstrate some model predictions
48 | for pattern in dataX:
49 | x = numpy.reshape(pattern, (1, 1, len(pattern)))
50 | x = x / float(len(alphabet))
51 | prediction = model.predict(x, verbose=0)
52 | index = numpy.argmax(prediction)
53 | result = int_to_char[index]
54 | seq_in = [int_to_char[value] for value in pattern]
55 | print(seq_in, "->", result)
--------------------------------------------------------------------------------
/rnn/test2.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Thu Nov 30 22:16:06 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | import numpy as np
9 | import math
10 |
11 | from keras.models import Sequential
12 | from keras.layers import Dense
13 | from keras.layers import LSTM
14 | import dataLoader
15 |
16 | import matplotlib.pyplot as plt
17 | import xgboostPredicter
18 |
19 | loader = dataLoader.loader("datam.csv")
20 | loader.setSize(200, 43, 0)
21 | midclass, trainData, trainLabel, testData, testLabel = loader.getNextMidClass()
22 | loader.closeFiles()
23 |
24 | seq_length = 0
25 | data_max = 35
26 | dataX = []
27 | dataY = []
28 |
29 | trainLabelN = []
30 | for i in range(0, len(trainLabel)):
31 | trainLabelN.append(trainLabel[i] / data_max)
32 |
33 | for i in range(0, len(trainLabelN) - seq_length):
34 | dataX.append(trainData[i+seq_length]+trainLabelN[i:i+seq_length])
35 | dataY.append(trainLabelN[i+seq_length])
36 |
37 | X = np.reshape(dataX, (len(dataX), 1, len(trainData[0])+seq_length))
38 | Y = np.reshape(dataY, (len(dataY), 1))
39 |
40 | model = Sequential()
41 | model.add(LSTM(6, input_shape=(X.shape[1], X.shape[2]), batch_size=1, stateful=True))
42 | model.add(Dense(1))
43 | model.compile(loss='mean_squared_error', optimizer='adam')
44 | model.fit(X, Y, nb_epoch=300, batch_size=1, verbose=1)
45 |
46 | #history = trainLabelN[-1*seq_length:]
47 | predLabel = []
48 | for i in range(0, len(testLabel)):
49 | #feature = np.array(testData[i]+history).reshape(1, 1, len(trainData[0])+seq_length)
50 | feature = np.array(testData[i]).reshape(1, 1, len(trainData[0]))
51 | predict = model.predict(feature)
52 | predLabel.append(predict[0][0]*data_max)
53 | #history.pop(0)
54 | #history.append(predict)
55 |
56 | predLabel = np.array(predLabel)
57 | testLabel = np.array(testLabel)
58 | bias = sum((predLabel-testLabel)*(predLabel-testLabel))
59 | bias = math.sqrt(bias/len(testLabel))
60 | print(bias)
61 | plt.plot(predLabel, color='blue',label='predict')
62 | plt.plot(testLabel, color='red', label='origan')
63 | plt.show(block=False)
64 |
65 | def xgboostPredict(trainData, trainLabel, testData):
66 |
67 | xgp = xgboostPredicter.predicter()
68 | model = xgp.xgboostTrain(trainData, trainLabel)
69 | predLabel = xgp.xgboostPredict(model, testData)
70 | return predLabel
71 |
72 | predLabel = xgboostPredict(trainData, trainLabel, testData)
73 | predLabel = np.array(predLabel)
74 | testLabel = np.array(testLabel)
75 | bias = sum((predLabel-testLabel)*(predLabel-testLabel))
76 | bias = math.sqrt(bias/len(testLabel))
77 | print(bias)
78 | plt.plot(predLabel, color='blue',label='predict')
79 | plt.plot(testLabel, color='red', label='origan')
80 | plt.show(block=False)
81 |
--------------------------------------------------------------------------------
/rnn/test3.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | '''Example script showing how to use a stateful LSTM model
4 | and how its stateless counterpart performs.
5 |
6 | More documentation about the Keras LSTM model can be found at
7 | https://keras.io/layers/recurrent/#lstm
8 |
9 | The models are trained on an input/output pair, where
10 | the input is a generated uniformly distributed
11 | random sequence of length = "input_len",
12 | and the output is a moving average of the input with window length = "tsteps".
13 | Both "input_len" and "tsteps" are defined in the "editable parameters" section.
14 | A larger "tsteps" value means that the LSTM will need more memory
15 | to figure out the input-output relationship.
16 | This memory length is controlled by the "lahead" variable (more details below).
17 | The rest of the parameters are:
18 | - input_len: the length of the generated input sequence
19 | - lahead: the input sequence length that the LSTM
20 | is trained on for each output point
21 | - batch_size, epochs: same parameters as in the model.fit(...) function
22 | When lahead > 1, the model input is preprocessed to a "rolling window view"
23 | of the data, with the window length = "lahead".
24 | This is similar to sklearn's "view_as_windows"
25 | with "window_shape" being a single number
26 | Ref: http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows
27 | When lahead < tsteps, only the stateful LSTM converges because its
28 | statefulness allows it to see beyond the capability that lahead
29 | gave it to fit the n-point average. The stateless LSTM does not have
30 | this capability, and hence is limited by its "lahead" parameter,
31 | which is not sufficient to see the n-point average.
32 | When lahead >= tsteps, both the stateful and stateless LSTM converge.
33 | '''
34 | from __future__ import print_function
35 | import numpy as np
36 | import matplotlib.pyplot as plt
37 | import pandas as pd
38 | from keras.models import Sequential
39 | from keras.layers import Dense, LSTM
40 |
41 | # ----------------------------------------------------------
42 | # EDITABLE PARAMETERS
43 | # Read the documentation in the script head for more details
44 | # ----------------------------------------------------------
45 |
46 | # length of input
47 | input_len = 1000
48 |
49 | # The window length of the moving average used to generate
50 | # the output from the input in the input/output pair used
51 | # to train the LSTM
52 | # e.g. if tsteps=2 and input=[1, 2, 3, 4, 5],
53 | # then output=[1.5, 2.5, 3.5, 4.5]
54 | tsteps = 2
55 |
56 | # The input sequence length that the LSTM is trained on for each output point
57 | lahead = 5
58 |
59 | # training parameters passed to "model.fit(...)"
60 | batch_size = 1
61 | epochs = 10
62 |
63 | # ------------
64 | # MAIN PROGRAM
65 | # ------------
66 |
67 | print("*" * 33)
68 | if lahead >= tsteps:
69 | print("STATELESS LSTM WILL ALSO CONVERGE")
70 | else:
71 | print("STATELESS LSTM WILL NOT CONVERGE")
72 | print("*" * 33)
73 |
74 | np.random.seed(1986)
75 |
76 | print('Generating Data...')
77 |
78 |
79 | def gen_uniform_amp(amp=1, xn=10000):
80 | """Generates uniform random data between
81 | -amp and +amp
82 | and of length xn
83 | Arguments:
84 | amp: maximum/minimum range of uniform data
85 | xn: length of series
86 | """
87 | data_input = np.random.uniform(-1 * amp, +1 * amp, xn)
88 | data_input = pd.DataFrame(data_input)
89 | return data_input
90 |
91 | # Since the output is a moving average of the input,
92 | # the first few points of output will be NaN
93 | # and will be dropped from the generated data
94 | # before training the LSTM.
95 | # Also, when lahead > 1,
96 | # the preprocessing step later of "rolling window view"
97 | # will also cause some points to be lost.
98 | # For aesthetic reasons,
99 | # in order to maintain generated data length = input_len after pre-processing,
100 | # add a few points to account for the values that will be lost.
101 | to_drop = max(tsteps - 1, lahead - 1)
102 | data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop)
103 |
104 | # set the target to be a N-point average of the input
105 | expected_output = data_input.rolling(window=tsteps, center=False).mean()
106 |
107 | # when lahead > 1, need to convert the input to "rolling window view"
108 | # https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html
109 | if lahead > 1:
110 | data_input = np.repeat(data_input.values, repeats=lahead, axis=1)
111 | data_input = pd.DataFrame(data_input)
112 | for i, c in enumerate(data_input.columns):
113 | data_input[c] = data_input[c].shift(i)
114 |
115 | # drop the nan
116 | expected_output = expected_output[to_drop:]
117 | data_input = data_input[to_drop:]
118 |
119 | print('Input shape:', data_input.shape)
120 | print('Output shape:', expected_output.shape)
121 | print('Input head: ')
122 | print(data_input.head())
123 | print('Output head: ')
124 | print(expected_output.head())
125 | print('Input tail: ')
126 | print(data_input.tail())
127 | print('Output tail: ')
128 | print(expected_output.tail())
129 |
130 | print('Plotting input and expected output')
131 | plt.plot(data_input[0][:10], '.')
132 | plt.plot(expected_output[0][:10], '-')
133 | plt.legend(['Input', 'Expected output'])
134 | plt.title('Input')
135 | plt.show()
136 |
137 |
138 | def create_model(stateful: bool):
139 | model = Sequential()
140 | model.add(LSTM(20,
141 | input_shape=(lahead, 1),
142 | batch_size=batch_size,
143 | stateful=stateful))
144 | model.add(Dense(1))
145 | model.compile(loss='mse', optimizer='adam')
146 | return model
147 |
148 | print('Creating Stateful Model...')
149 | model_stateful = create_model(stateful=True)
150 |
151 |
152 | # split train/test data
153 | def split_data(x, y, ratio: int = 0.8):
154 | to_train = int(input_len * ratio)
155 | # tweak to match with batch_size
156 | to_train -= to_train % batch_size
157 |
158 | x_train = x[:to_train]
159 | y_train = y[:to_train]
160 | x_test = x[to_train:]
161 | y_test = y[to_train:]
162 |
163 | # tweak to match with batch_size
164 | to_drop = x.shape[0] % batch_size
165 | if to_drop > 0:
166 | x_test = x_test[:-1 * to_drop]
167 | y_test = y_test[:-1 * to_drop]
168 |
169 | # some reshaping
170 | reshape_3 = lambda x: x.values.reshape((x.shape[0], x.shape[1], 1))
171 | x_train = reshape_3(x_train)
172 | x_test = reshape_3(x_test)
173 |
174 | reshape_2 = lambda x: x.values.reshape((x.shape[0], 1))
175 | y_train = reshape_2(y_train)
176 | y_test = reshape_2(y_test)
177 |
178 | return (x_train, y_train), (x_test, y_test)
179 |
180 |
181 | (x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output)
182 | print('x_train.shape: ', x_train.shape)
183 | print('y_train.shape: ', y_train.shape)
184 | print('x_test.shape: ', x_test.shape)
185 | print('y_test.shape: ', y_test.shape)
186 |
187 | print('Training')
188 | for i in range(epochs):
189 | print('Epoch', i + 1, '/', epochs)
190 | # Note that the last state for sample i in a batch will
191 | # be used as initial state for sample i in the next batch.
192 | # Thus we are simultaneously training on batch_size series with
193 | # lower resolution than the original series contained in data_input.
194 | # Each of these series are offset by one step and can be
195 | # extracted with data_input[i::batch_size].
196 | model_stateful.fit(x_train,
197 | y_train,
198 | batch_size=batch_size,
199 | epochs=1,
200 | verbose=1,
201 | validation_data=(x_test, y_test),
202 | shuffle=False)
203 | model_stateful.reset_states()
204 |
205 | print('Predicting')
206 | predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size)
207 |
208 | print('Creating Stateless Model...')
209 | model_stateless = create_model(stateful=False)
210 |
211 | print('Training')
212 | model_stateless.fit(x_train,
213 | y_train,
214 | batch_size=batch_size,
215 | epochs=epochs,
216 | verbose=1,
217 | validation_data=(x_test, y_test),
218 | shuffle=False)
219 |
220 | print('Predicting')
221 | predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size)
222 |
223 | # ----------------------------
224 |
225 | print('Plotting Results')
226 | plt.subplot(3, 1, 1)
227 | plt.plot(y_test)
228 | plt.title('Expected')
229 | plt.subplot(3, 1, 2)
230 | # drop the first "tsteps-1" because it is not possible to predict them
231 | # since the "previous" timesteps to use do not exist
232 | plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:])
233 | plt.title('Stateful: Expected - Predicted')
234 | plt.subplot(3, 1, 3)
235 | plt.plot((y_test - predicted_stateless).flatten())
236 | plt.title('Stateless: Expected - Predicted')
237 | plt.show()
--------------------------------------------------------------------------------
/tools/backup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import xgboost as xgb
9 | from numpy import array
10 | from numpy import zeros
11 | import csv
12 | import math
13 |
14 | import pandas as pd
15 | from statsmodels.tsa.statespace.sarimax import SARIMAX
16 | import statsmodels.api as sm
17 | import datetime as dt
18 | import matplotlib.pylab as plt
19 | from statsmodels.tsa.stattools import adfuller
20 |
21 | larclasPred = {}
22 | larclasLabl = {}
23 | totalBias = 0
24 | totalCount = 0
25 |
26 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)]
27 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))]
28 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)]
29 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))]
30 |
31 | modelChoose = []
32 |
33 | def getData(csvReader, trainCount, testCount):
34 | trainData = []
35 | testData = []
36 | trainLabel = []
37 | testLabel = []
38 | try:
39 | for x in range(0, trainCount):
40 | row = csvReader.next()
41 | """
42 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
43 | float(row[7]), float(row[8]), float(row[9]), float(row[10]),
44 | float(row[11]), float(row[12])]
45 | """
46 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
47 | float(row[7]), float(row[8])]
48 | trainData.append(data)
49 | trainLabel.append(float(row[15]))
50 | for x in range(0, testCount):
51 | row = csvReader.next()
52 | """
53 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
54 | float(row[7]), float(row[8]), float(row[9]), float(row[10]),
55 | float(row[11]), float(row[12])]
56 | """
57 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
58 | float(row[7]), float(row[8])]
59 | testData.append(data)
60 | testLabel.append(float(row[15]))
61 | return int(row[0]), trainData, trainLabel, testData, testLabel
62 | except StopIteration:
63 | return 0, [], [], [], []
64 |
65 | def dataLog(midclass, accuracy, trainLabl, testPred, testLabl):
66 | with open('compare.csv', 'ab') as f:
67 | writer = csv.writer(f)
68 | count = 1
69 | writer.writerow([midclass, accuracy])
70 | for x in trainLabl:
71 | writer.writerow([count, x])
72 | count += 1
73 | for x in range(0, len(testPred)):
74 | writer.writerow([count, testLabl[x], testPred[x]])
75 | count += 1
76 |
77 | def xgboostPredict(trainData, trainLabel, dataToPredict):
78 | dtrain = xgb.DMatrix(trainData, trainLabel)
79 | params = {"objective": "reg:linear"}
80 | gbm = xgb.train(dtrain=dtrain, params=params)
81 | return gbm.predict(xgb.DMatrix(dataToPredict))
82 |
83 | def test_stationarity(timeseries):
84 |
85 | #Determing rolling statistics
86 | rolmean = timeseries.rolling(window=12,center=False).mean()
87 | rolstd = timeseries.rolling(window=12,center=False).std()
88 |
89 | #Plot rolling statistics:
90 | plt.plot(timeseries, color='blue',label='Original')
91 | plt.plot(rolmean, color='red', label='Rolling Mean')
92 | plt.plot(rolstd, color='black', label = 'Rolling Std')
93 | plt.legend(loc='best')
94 | plt.title('Rolling Mean & Standard Deviation')
95 | plt.show(block=False)
96 |
97 | #Perform Dickey-Fuller test:
98 | print 'Results of Dickey-Fuller Test:'
99 | dftest = adfuller(timeseries, autolag='AIC')
100 | dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
101 | for key,value in dftest[4].items():
102 | dfoutput['Critical Value (%s)'%key] = value
103 | print dfoutput
104 |
105 | #Get AR and MA parameter
106 | fig = plt.figure(figsize=(12,8))
107 | ax1=fig.add_subplot(211)
108 | fig = sm.graphics.tsa.plot_acf(timeseries, lags=20, ax=ax1)
109 | ax2 = fig.add_subplot(212)
110 | fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2)
111 | plt.show(block=False)
112 |
113 | def sarimaTrain(trainLabel):
114 | dataLength = trainLabel.__len__()
115 | data = pd.Series(trainLabel)
116 | index = dtIndex[0:dataLength]
117 | data.index = pd.Index(index)
118 |
119 | model = SARIMAX(data, order=(1,1,1), seasonal_order=(0,1,1,7))
120 | return model.fit()
121 |
122 | def sarimaPredict(model, predictLength):
123 | output = model.forecast(predictLength)
124 | return array(output)
125 |
126 | def sarimaBias(model, trainLabel):
127 | dataLength = trainLabel.__len__()
128 | data = pd.Series(trainLabel)
129 | index = dtIndex[0:dataLength]
130 | data.index = pd.Index(index)
131 |
132 | pred = model.predict()
133 | """
134 | plt.plot(data, color='blue',label='Original')
135 | plt.plot(pred, color='red', label='Predicted')
136 | plt.show(block=False)
137 | """
138 | return list(data - pred)
139 |
140 | def modelselect(trainSize, testSize):
141 | global larclasPred, larclasLabl, totalBias, totalCount
142 | larclasPred = {}
143 | larclasLabl = {}
144 | totalBias = 0
145 | totalCount = 0
146 | modelChoose = []
147 | f = open("data.csv", "r")
148 | f_csv = csv.reader(f)
149 |
150 | teD = []
151 | for i in range(31-testSize, 31):
152 | x = [i, (i+2)%7, 0, 0, 0, 0]
153 | if (x[1] == 6 or x[1]==0):
154 | x[3] = 1
155 | elif (x[1] == 5):
156 | x[2] = 1
157 | teD.append(x)
158 |
159 | while (True):
160 | midclass, trD, trL, _, teL = getData(f_csv, trainSize, testSize)
161 | if (midclass == 0):
162 | break
163 | else:
164 |
165 | # sarima model
166 | try:
167 | model = sarimaTrain(trL)
168 | teP1 = sarimaPredict(model, testSize)
169 | except:
170 | teP1 = zeros(testSize)
171 |
172 | # xgboost model
173 | try:
174 | teP2 = xgboostPredict(array(trD), array(trL), array(teD))
175 | except:
176 | teP2 = zeros(testSize)
177 |
178 | # just zero
179 | teP3 = zeros(testSize)
180 |
181 | # count bias of midclass and update larclass
182 | label = array(teL)
183 | larclass = int(midclass/100)
184 | totalCount += testSize
185 |
186 | bias1 = sum((teP1-label)*(teP1-label))
187 | bias2 = sum((teP2-label)*(teP2-label))
188 | bias3 = sum((teP3-label)*(teP3-label))
189 | if (bias3 < bias1 and bias3 < bias2):
190 | totalBias += bias3
191 | bias3 = math.sqrt(bias3/testSize)
192 | print "(Midclass %d select ZERO, accuracy: %f)" % (midclass, bias3)
193 | modelChoose.append(3)
194 | if (larclass in larclasPred):
195 | larclasPred[larclass] += teP3
196 | else:
197 | larclasPred[larclass] = teP3
198 | elif (bias1 < bias2):
199 | totalBias += bias1
200 | bias1 = math.sqrt(bias1/testSize)
201 | print "(Midclass %d select SARIMA, accuracy: %f)" % (midclass, bias1)
202 | modelChoose.append(1)
203 | if (larclass in larclasPred):
204 | larclasPred[larclass] += teP1
205 | else:
206 | larclasPred[larclass] = teP1
207 | else:
208 | totalBias += bias2
209 | bias2 = math.sqrt(bias2/testSize)
210 | print "(Midclass %d select XGBOOST, accuracy: %f)" % (midclass, bias2)
211 | modelChoose.append(2)
212 | if (larclass in larclasPred):
213 | larclasPred[larclass] += teP2
214 | else:
215 | larclasPred[larclass] = teP2
216 |
217 | if (larclass in larclasLabl):
218 | larclasLabl[larclass] += label
219 | else:
220 | larclasLabl[larclass] = label
221 | #dataLog(midclass, bias, trL, teP, teL)
222 |
223 | # print bias of large class
224 | for larclass in larclasPred:
225 | bias = sum((larclasLabl[larclass] - larclasPred[larclass])*
226 | (larclasLabl[larclass] - larclasPred[larclass]))
227 | totalBias += bias
228 | totalCount += testSize
229 | bias = math.sqrt(bias/testSize)
230 | print "(Larclass %d predict finished, accuracy: %f)" % (larclass, bias)
231 |
232 | totalBias = math.sqrt(totalBias/totalCount)
233 | print "(Predict finished, accuracy: %f)" % (totalBias)
234 | f.close()
235 |
236 | def test(trainSize, testSize):
237 | global larclasPred, larclasLabl, totalBias, totalCount
238 | larclasPred = {}
239 | larclasLabl = {}
240 | totalBias = 0
241 | totalCount = 0
242 | f = open("data.csv", "r")
243 | f_csv = csv.reader(f)
244 |
245 | teD = []
246 | for i in range(31-testSize, 31):
247 | x = [i, (i+2)%7, 0, 0, 0, 0]
248 | if (x[1] == 6 or x[1]==0):
249 | x[3] = 1
250 | elif (x[1] == 5):
251 | x[2] = 1
252 | teD.append(x)
253 |
254 | while (True):
255 | midclass, trD, trL, _, teL = getData(f_csv, trainSize, testSize)
256 | if (midclass == 0):
257 | break
258 | else:
259 | try:
260 | model = sarimaTrain(trL)
261 | teP = sarimaPredict(model, testSize)
262 | except:
263 | teP = xgboostPredict(array(trD), array(trL), array(teD))
264 |
265 | # count bias of midclass
266 | bias = 0.0
267 | for i in range(0, testSize):
268 | bias += (teP[i]-teL[i])*(teP[i]-teL[i]);
269 | totalBias += bias
270 | totalCount += testSize
271 | bias = math.sqrt(bias/testSize)
272 | print "(Midclass %d predict finished, accuracy: %f)" % (midclass, bias)
273 | # update bias of large class
274 | larclass = int(midclass/100)
275 | if (larclass in larclasPred):
276 | for i in range(0, testSize):
277 | larclasPred[larclass][i] += teP[i]
278 | larclasLabl[larclass][i] += teL[i]
279 | else:
280 | larclasPred[larclass] = teP
281 | larclasLabl[larclass] = teL
282 | #dataLog(midclass, bias, trL, teP, teL)
283 | # print bias of large class
284 | for larclass in larclasPred:
285 | bias = 0.0
286 | for i in range(0, testSize):
287 | d = larclasLabl[larclass][i] - larclasPred[larclass][i]
288 | bias += d*d;
289 | totalBias += bias
290 | totalCount += testSize
291 | bias = math.sqrt(bias/testSize)
292 | print "(Larclass %d predict finished, accuracy: %f)" % (larclass, bias)
293 |
294 | totalBias = math.sqrt(totalBias/totalCount)
295 | print "(Predict finished, accuracy: %f)" % (totalBias)
296 | f.close()
297 |
298 | def submit(trainSize):
299 | global larclasPred
300 | larclasPred = {}
301 | f1 = open("data.csv", "r")
302 | data_csv = csv.reader(f1)
303 | f2 = open("submit.csv", "r")
304 | submit_csv = csv.reader(f2)
305 | submit_csv.next()
306 |
307 | # generate feature
308 | goal = []
309 | for i in range(1, 31):
310 | x = [i, (i+4)%7, 0, 0, 0, 0]
311 | if (x[1] == 6 or x[1]==0):
312 | x[3] = 1
313 | elif (x[1] == 5):
314 | x[2] = 1
315 | goal.append(x)
316 | goal[0][3] = 1
317 | goal[0][2] = 0
318 |
319 | current = 0
320 |
321 | while (True):
322 | midclass, trD, trL, teD, teL = getData(data_csv, trainSize, 0)
323 | if (midclass == 0):
324 | break
325 | else:
326 |
327 | if (modelChoose[current] == 1):
328 | try:
329 | model = sarimaTrain(trL)
330 | teP = sarimaPredict(model, 30)
331 | except:
332 | teP = xgboostPredict(array(trD), array(trL), array(goal))
333 | elif (modelChoose[current] == 2):
334 | teP = xgboostPredict(array(trD), array(trL), array(goal))
335 | else:
336 | teP = zeros(30)
337 | current += 1
338 |
339 | # write file - midclass
340 | for x in teP:
341 | if (x < 0):
342 | x = 0
343 | row = submit_csv.next()
344 | if (int(row[0]) != midclass):
345 | raise KeyError
346 | with open('submit1.csv', 'ab') as f:
347 | writer = csv.writer(f)
348 | writer.writerow([row[0], row[1], x])
349 |
350 | # count larclass
351 | larclass = int(midclass/100)
352 | if (larclass in larclasPred):
353 | for i in range(0, 30):
354 | larclasPred[larclass][i] += teP[i]
355 | else:
356 | larclasPred[larclass] = teP
357 |
358 | # write file - larcalss
359 | oldLC = 0
360 | for row in submit_csv:
361 | larclass = int(row[0])
362 | if larclass != oldLC:
363 | oldLC = larclass
364 | i = 0
365 | with open('submit1.csv', 'ab') as f:
366 | writer = csv.writer(f)
367 | writer.writerow([row[0], row[1], larclasPred[larclass][i]])
368 | i+=1
369 | f1.close()
370 | f2.close()
371 |
372 | test(106, 14)
373 | modelselect(106, 14)
374 | #submit(120)
--------------------------------------------------------------------------------
/tools/csvloader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Oct 24 18:48:57 2017
4 |
5 | @author: wangjun
6 |
7 | 用于从给定的数据集生成训练数据;
8 | 由于训练程序是按照中类顺序(而非日期顺序)训练的,生成的训练数据需使用Excel按中类
9 | 进行排序:)
10 | """
11 |
12 | import csv
13 | import datetime
14 |
15 | DictHoilday = [1,2,3,49,50,51,52,53,54,55,96,121,173]
16 | DictBeforeHoilday = [46,47,48,120]
17 | DictWorkday = [46, 58, 59]
18 | midClasses = {}
19 |
20 | date = datetime.datetime(2015, 1, 1)
21 | dailyData = {}
22 | # index -0 -1
23 | # middle class-sales count-promotions
24 | # string -float -int
25 | promotions = []
26 | totalCount = 0
27 | totalPay = 0
28 | lineNum = 1
29 | dayCount = 1
30 |
31 | dataLog = [{}, {}, {}, {}, {}, {}, {}]
32 |
33 | def getHistory(midclass):
34 | total = 0
35 | log = []
36 | for i in range(0, 7):
37 | try:
38 | temp = dataLog[i][midclass][0]
39 | total += temp
40 | log.append(temp)
41 | except KeyError:
42 | log.append(0)
43 | return log[0], log[1], log[2], total/7
44 |
45 | def writeData():
46 | global dailyData, promotions, totalCount, totalPay, dayCount, dataLog
47 | day = date.day
48 | month = date.month
49 | week = (date.weekday() + 1) % 7
50 | if (dayCount in DictHoilday):
51 | holiday = 1
52 | beforeHoliday = 0
53 | elif (dayCount in DictBeforeHoilday):
54 | holiday = 0
55 | beforeHoliday = 1
56 | elif (dayCount in DictWorkday):
57 | holiday = 0
58 | if (week==6 or ((dayCount+1) in DictHoilday)):
59 | beforeHoliday = 1
60 | else:
61 | beforeHoliday = 0
62 | elif (week==0 or week==6):
63 | holiday = 1
64 | beforeHoliday = 0
65 | elif (week==5):
66 | holiday = 0
67 | beforeHoliday = 1
68 | else:
69 | holiday = 0
70 | beforeHoliday = 0
71 | promotionClass = {}
72 | for midclass in promotions:
73 | larclass = int(midclass)/100
74 | if larclass in promotionClass:
75 | promotionClass[larclass] = promotionClass[larclass] + 1;
76 | else:
77 | promotionClass[larclass] = 1;
78 | with open('output.csv', 'ab') as f:
79 | writer = csv.writer(f)
80 | for midclass in dailyData:
81 | l1, l2, l3, la = getHistory(midclass)
82 | if (midclass not in midClasses):
83 | continue
84 | else:
85 | midClasses[midclass] = 1
86 | try:
87 | larclass = int(midclass) / 100
88 | if (larclass in promotionClass):
89 | writer.writerow([midclass, dayCount, month,
90 | day, week, beforeHoliday, holiday,
91 | dailyData[midclass][1],
92 | promotionClass[larclass]-dailyData[midclass][1],
93 | l1, l2, l3, la,
94 | totalCount, totalPay, dailyData[midclass][0]])
95 | else:
96 | writer.writerow([midclass, dayCount, month,
97 | day, week, beforeHoliday, holiday,
98 | 0, 0, l1, l2, l3, la,
99 | totalCount, totalPay, dailyData[midclass][0]])
100 | except ZeroDivisionError:
101 | pass
102 | #just neglect it
103 | for midclass in midClasses:
104 | l1, l2, l3, la = getHistory(midclass)
105 | if (midClasses[midclass] == 0):
106 | larclass = int(midclass) / 100
107 | if (larclass in promotionClass):
108 | writer.writerow([midclass, dayCount, month,
109 | day, week, beforeHoliday, holiday,
110 | 0, promotionClass[larclass],
111 | l1, l2, l3, la,
112 | totalCount, totalPay, 0])
113 | else:
114 | writer.writerow([midclass, dayCount, month,
115 | day, week, beforeHoliday, holiday, 0, 0,
116 | l1, l2, l3, la,
117 | totalCount, totalPay, 0])
118 | dataLog.insert(0, dailyData)
119 | dataLog.pop()
120 | dailyData = {}
121 | promotions = []
122 | totalCount = 0
123 | totalPay = 0
124 | dayCount += 1
125 | for midclass in midClasses:
126 | midClasses[midclass] = 0
127 |
128 | with open('example.csv') as f:
129 | f_csv = csv.reader(f)
130 | f_csv.next()
131 | for row in f_csv:
132 | if (int(row[0]) > 100):
133 | midClasses[row[0]] = 0;
134 |
135 | with open('train.csv') as f:
136 | f_csv = csv.reader(f)
137 | f_csv.next()
138 | for row in f_csv:
139 | lineNum += 1
140 |
141 | # check date
142 | day = int(row[7]) % 100
143 | month = int(row[7]) / 100 % 100
144 | tempdate = datetime.datetime(2015, month, day)
145 | while (date != tempdate):
146 | writeData()
147 | date = date.__add__(datetime.timedelta(1))
148 |
149 | midclass = row[3]
150 | if (midclass in dailyData):
151 | #float(row[13]) or 1
152 | dailyData[midclass][0] = dailyData[midclass][0]+1
153 | totalCount=totalCount+1
154 | try:
155 | totalPay=totalPay+float(row[14])
156 | except:
157 | pass
158 | else:
159 | dailyData[midclass] = [1, 0]
160 | totalCount=totalCount+1
161 | try:
162 | totalPay=totalPay+float(row[14])
163 | except:
164 | pass
165 | if (row[16]!='\xb7\xf1'):
166 | dailyData[midclass][1] = 1
167 | if (midclass not in promotions):
168 | promotions.append(midclass)
169 | writeData();
170 |
171 |
172 |
173 |
--------------------------------------------------------------------------------
/tools/csvloader_largeClass.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Oct 24 18:48:57 2017
4 |
5 | @author: wangjun
6 |
7 | 用于从给定的数据集生成大类训练数据;
8 | 由于训练程序是按照大类顺序(而非日期顺序)训练的,生成的训练数据需使用Excel按中类
9 | 进行排序:)
10 | """
11 |
12 | import csv
13 | import datetime
14 |
15 | DictHoilday = [1,2,3,49,50,51,52,53,54,55,96,121,173]
16 | DictBeforeHoilday = [46,47,48,120]
17 | DictWorkday = [46, 58, 59]
18 | larClasses = {}
19 |
20 | date = datetime.datetime(2015, 1, 1)
21 | dailyData = {}
22 | # index -0 -1
23 | # large class -sales count-promotions
24 | # string -float -int
25 | totalCount = 0
26 | totalPay = 0
27 | lineNum = 1
28 | dayCount = 1
29 |
30 | dataLog = [{}, {}, {}, {}, {}, {}, {}]
31 |
32 | def getHistory(larclass):
33 | total = 0
34 | log = []
35 | for i in range(0, 7):
36 | try:
37 | temp = dataLog[i][larclass][0]
38 | total += temp
39 | log.append(temp)
40 | except KeyError:
41 | log.append(0)
42 | return log[0], log[1], log[2], total/7
43 |
44 | def writeData():
45 | global dailyData, totalCount, totalPay, dayCount, dataLog
46 | day = date.day
47 | month = date.month
48 | week = (date.weekday() + 1) % 7
49 | if (dayCount in DictHoilday):
50 | holiday = 1
51 | beforeHoliday = 0
52 | elif (dayCount in DictBeforeHoilday):
53 | holiday = 0
54 | beforeHoliday = 1
55 | elif (dayCount in DictWorkday):
56 | holiday = 0
57 | if (week==6 or ((dayCount+1) in DictHoilday)):
58 | beforeHoliday = 1
59 | else:
60 | beforeHoliday = 0
61 | elif (week==0 or week==6):
62 | holiday = 1
63 | beforeHoliday = 0
64 | elif (week==5):
65 | holiday = 0
66 | beforeHoliday = 1
67 | else:
68 | holiday = 0
69 | beforeHoliday = 0
70 | with open('lcoutput.csv', 'ab') as f:
71 | writer = csv.writer(f)
72 | for larclass in dailyData:
73 | l1, l2, l3, la = getHistory(larclass)
74 | if (larclass not in larClasses):
75 | continue
76 | else:
77 | larClasses[larclass] = 1
78 | try:
79 | writer.writerow([larclass, dayCount, month,
80 | day, week, beforeHoliday, holiday,
81 | dailyData[larclass][1],
82 | l1, l2, l3, la,
83 | totalCount, totalPay, dailyData[larclass][0]])
84 | except ZeroDivisionError:
85 | pass
86 | #just neglect it
87 | for larclass in larClasses:
88 | l1, l2, l3, la = getHistory(larclass)
89 | if (larClasses[larclass] == 0):
90 | writer.writerow([larclass, dayCount, month,
91 | day, week, beforeHoliday, holiday,
92 | 0,
93 | l1, l2, l3, la,
94 | totalCount, totalPay, 0])
95 | dataLog.insert(0, dailyData)
96 | dataLog.pop()
97 | dailyData = {}
98 | totalCount = 0
99 | totalPay = 0
100 | dayCount += 1
101 | for larclass in larClasses:
102 | larClasses[larclass] = 0
103 |
104 | with open('example.csv') as f:
105 | f_csv = csv.reader(f)
106 | f_csv.next()
107 | for row in f_csv:
108 | if (int(row[0]) < 100):
109 | larClasses[row[0]] = 0;
110 |
111 | with open('train.csv') as f:
112 | f_csv = csv.reader(f)
113 | f_csv.next()
114 | for row in f_csv:
115 | lineNum += 1
116 |
117 | # check date
118 | day = int(row[7]) % 100
119 | month = int(row[7]) / 100 % 100
120 | tempdate = datetime.datetime(2015, month, day)
121 | while (date != tempdate):
122 | writeData()
123 | date = date.__add__(datetime.timedelta(1))
124 |
125 | larclass = row[1]
126 | if (larclass in dailyData):
127 | #float(row[13]) or 1
128 | dailyData[larclass][0] = dailyData[larclass][0]+1
129 | totalCount=totalCount+1
130 | try:
131 | totalPay=totalPay+float(row[14])
132 | except:
133 | pass
134 | else:
135 | dailyData[larclass] = [1, 0]
136 | totalCount=totalCount+1
137 | try:
138 | totalPay=totalPay+float(row[14])
139 | except:
140 | pass
141 | if (row[16]!='\xb7\xf1'):
142 | dailyData[larclass][1] = 1
143 | writeData();
144 |
145 |
146 |
147 |
--------------------------------------------------------------------------------
/tools/dataModify.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Nov 03 20:30:09 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | import csv
9 | import pandas as pd
10 | from statsmodels.tsa.statespace.sarimax import SARIMAX
11 | import datetime as dt
12 | import arimaPredicter
13 |
14 | dateToModify = [34, 44, 89, 98, 105, 150, 211]
15 |
16 | ap = arimaPredicter.predicter()
17 | index = ap.createIndex(dt.datetime(2015,1,1), 243)
18 |
19 | def getData(csvReader, count):
20 | data = []
21 | label = []
22 | try:
23 | for x in range(0, count):
24 | row = csvReader.next()
25 | data.append(row[:-1])
26 | label.append(int(row[-1]))
27 | return int(row[0]), data, label
28 | except StopIteration:
29 | return 0, [], []
30 |
31 | def modifyFile(reader, writer, count):
32 | global dateToModify, index
33 | while (True):
34 | clas, data, label = getData(reader, count)
35 | if (clas == 0):
36 | break
37 | data0 = pd.Series(label)
38 | data0.index = pd.Index(index)
39 | try:
40 | model = SARIMAX(data0, order=(1,1,1), seasonal_order=(0,1,1,7))
41 | result = model.fit()
42 | except:
43 | print("%d: failed to train sarimax model, abort" % clas)
44 | for i in range(0, count):
45 | writer.writerow(data[i] + [label[i]])
46 | continue
47 | for i in dateToModify:
48 | label[i] = round(result.predict(i, i)[0])
49 | if (label[i] < 0):
50 | label[i] = 0
51 | for i in range(0, count):
52 | writer.writerow(data[i] + [label[i]])
53 |
54 | f1 = open("data.csv", "r")
55 | reader = csv.reader(f1)
56 | f2 = open('datam.csv', 'wb')
57 | writer = csv.writer(f2)
58 | modifyFile(reader, writer, 243)
59 | f1.close()
60 | f2.close()
61 |
62 | f1 = open("lcdata.csv", "r")
63 | reader = csv.reader(f1)
64 | f2 = open('lcdatam.csv', 'wb')
65 | writer = csv.writer(f2)
66 | modifyFile(reader, writer, 243)
67 | f1.close()
68 | f2.close()
69 |
70 |
--------------------------------------------------------------------------------
/tools/fileChecker.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Nov 10 18:55:56 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | import csv
9 |
10 | f1 = open("submit1.csv", "r")
11 | f1_csv = csv.reader(f1)
12 |
13 | f2 = open("submit3.csv", "r")
14 | f2_csv = csv.reader(f2)
15 |
16 | lineNo = 2
17 | row1 = f1_csv.next()
18 | row2 = f2_csv.next()
19 |
20 | while (True):
21 | try:
22 | row1 = f1_csv.next()
23 | row2 = f2_csv.next()
24 | except StopIteration:
25 | break
26 | if (int(row1[2])!=int(row2[2])):
27 | print lineNo
28 | i = input()
29 | lineNo += 1
--------------------------------------------------------------------------------
/v3/Readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/Readme.txt
--------------------------------------------------------------------------------
/v3/Version_3.py:
--------------------------------------------------------------------------------
1 | from sklearn.ensemble import RandomForestRegressor
2 | import numpy as np
3 | import csv
4 |
5 |
6 | mid_class_num = 134
7 | large_class_num = 14
8 | class_codes = []
9 | train_set_x = {}
10 | train_set_y = {}
11 | test_set_x = {}
12 | test_set_y = {}
13 | May_set_x = {}
14 | May_set_y = {}
15 | large_codes = ['10', '11', '12', '13', '15', '20', '21', '22', '23', '30', '31', '32', '33', '34']
16 | commit_codes = []
17 |
18 | accumulate_err = 0
19 |
20 |
21 | # 载入训练和测试模型的数据(不包括5月份的)
22 | def load_data():
23 | with open('train.csv') as input_file:
24 | input_csv = csv.reader(input_file)
25 | day = 0
26 | for row in input_csv:
27 | code = row[0]
28 | if day == 0:
29 | class_codes.append(code)
30 | train_set_x[code] = []
31 | train_set_y[code] = []
32 | x = list(map(float, row[1:-1]))
33 | # 将大类的feature增加一项:预测的当天的对应中类customer之和, 初始化为0
34 | if code in large_codes:
35 | x.append(0)
36 | train_set_x[code].append(x)
37 | train_set_y[code].append(float(row[-1]))
38 | day = (day + 1) % 100
39 | with open('test.csv') as input_file:
40 | input_csv = csv.reader(input_file)
41 | day = 0
42 | for row in input_csv:
43 | code = row[0]
44 | if day == 0:
45 | test_set_x[code] = []
46 | test_set_y[code] = []
47 | x = list(map(float, row[1:-1]))
48 | # 将大类的feature增加一项:预测的当天的对应中类customer之和, 初始化为0
49 | if code in large_codes:
50 | x.append(0)
51 | test_set_x[code].append(x)
52 | test_set_y[code].append(float(row[-1]))
53 | day = (day + 1) % 20
54 |
55 |
56 | def load_May_data():
57 | with open('May_input.csv') as input_file:
58 | input_csv = csv.reader(input_file)
59 | day = 0
60 | for row in input_csv:
61 | code = row[0]
62 | if code in commit_codes:
63 | if day == 0:
64 | May_set_x[code] = []
65 | May_set_x[code].append(list(map(float, row[1:])))
66 | day = (day + 1) % 30
67 |
68 |
69 | # 修改大类feature的最后一项(大类中中类的预测销量和)
70 | def modify_large_feature(type, class_code, pred):
71 | class_code = class_code[:2]
72 | if type == 'train':
73 | for day in range(len(train_set_x[class_code])):
74 | train_set_x[class_code][day][-1] += pred[day]
75 | if type == 'test':
76 | for day in range(len(test_set_x[class_code])):
77 | test_set_x[class_code][day][-1] += pred[day]
78 |
79 |
80 | def train_test_eval(train_x, train_y, test_x, test_y, params=None):
81 | # train
82 | if params is None:
83 | rf = RandomForestRegressor()
84 | else:
85 | rf = RandomForestRegressor(n_estimators=params['n_estimators'], oob_score=params['oob_score'])
86 | rf.fit(train_x, train_y)
87 |
88 | # test
89 | ypred = np.asarray(list(map(round, rf.predict(test_x))))
90 |
91 | # evaluation
92 | rmse = np.sqrt(((test_y - ypred) ** 2).mean())
93 | global accumulate_err
94 | accumulate_err += np.sum((test_y - ypred) ** 2)
95 |
96 | # this is used for modifying large class feature
97 | train_predict = rf.predict(train_x)
98 |
99 | return rf, ypred, rmse, train_predict
100 |
101 |
102 | # 为每一个类训练一个模型,如果params为None,则预测5月份的销量;否则用params测试,不预测5月份,并将结果RMSE写到 调参.txt 中
103 | def run_for_classes(params=None):
104 | output = []
105 | for code in class_codes:
106 | if code not in commit_codes:
107 | continue
108 | model, ypred, rmse, train_predict = train_test_eval(train_set_x[code], train_set_y[code], test_set_x[code], test_set_y[code], params)
109 | if code in large_codes:
110 | modify_large_feature('train', code, train_predict)
111 | modify_large_feature('test', code, ypred)
112 | if params is None:
113 | print('class: ', code, ' RMSE: ', rmse)
114 |
115 | # prediction for May
116 | predict_May(model, code)
117 |
118 | else:
119 | output.append('class: ' + code + ' RMSE: ' + str(rmse) + '\n')
120 |
121 | if params is not None:
122 | global accumulate_err
123 | with open('调参.txt', 'a') as output_file:
124 | output_file.write('n_estimators=' + str(params['n_estimators']) + ' oob_score=' + str(params['oob_score']) + '\n')
125 | output_file.writelines(output)
126 | output_file.write('total RMSE: ' + str(accumulate_err / 2960))
127 | accumulate_err = 0
128 |
129 |
130 | # 调参
131 | def run_for_classes_params():
132 | for n_estimators in range(50, 160, 10):
133 | params = {'n_estimators': n_estimators, 'oob_score': False}
134 | run_for_classes(params)
135 | params = {'n_estimators': n_estimators, 'oob_score': True}
136 | run_for_classes(params)
137 |
138 |
139 | def predict_May(rfmodel, code):
140 | ypred = rfmodel.predict(May_set_x[code])
141 | ypred = list(map(round, ypred))
142 | May_set_y[code] = ypred
143 | large_code = code[:2]
144 | for day in range(30):
145 | May_set_x[large_code][day][-1] += ypred[day]
146 |
147 |
148 | # 获取提交文件中需要提交的codes,保存在commit_codes中
149 | def codes_list_out():
150 | global commit_codes
151 | codes = [0]
152 | with open('commit_empty.csv') as native_set_file:
153 | native_csv = csv.reader(native_set_file)
154 | next(native_csv)
155 | for row in native_csv:
156 | if row[0] != codes[-1]:
157 | codes.append(row[0])
158 | commit_codes = codes[1:]
159 |
160 | if __name__ == '__main__':
161 | load_data()
162 | codes_list_out()
163 | load_May_data()
164 |
165 | # 不调参,用默认参数预测5月份,结果保存在字典May_set_y中
166 | run_for_classes()
167 | # write the predicted results of May
168 | with open('submit.csv', 'w', newline='') as output_file:
169 | output_csv = csv.writer(output_file)
170 | output_csv.writerow(['编码', '日期', '销量'])
171 | for code in commit_codes:
172 | for day in range(30):
173 | output_csv.writerow([code, str(20150501 + day), str(int(May_set_y[code][day]))])
174 |
175 | '''
176 | # 调参时调用
177 | run_for_classes_params()'''
178 |
--------------------------------------------------------------------------------
/v3/commit_empty.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/commit_empty.csv
--------------------------------------------------------------------------------
/v3/five_fold.py:
--------------------------------------------------------------------------------
1 | # 五折交叉,用于v5的stacking
2 |
3 | import csv
4 | import numpy as np
5 | from sklearn.ensemble import RandomForestRegressor
6 |
7 |
8 | commit_codes = []
9 | all_x = {}
10 | all_y = {}
11 | all_pred = {}
12 |
13 |
14 | def load_all_data():
15 | global all_x, all_y
16 | with open('features.csv') as input_file:
17 | input_csv = csv.reader(input_file)
18 | next(input_csv)
19 | for row in input_csv:
20 | feature = list(map(float, row[1:-1]))
21 | if len(row[0]) == 2:
22 | feature.append(0)
23 | if row[0] not in all_x:
24 | all_x[row[0]] = [feature]
25 | all_y[row[0]] = [float(row[-1])]
26 | else:
27 | all_x[row[0]].append(feature)
28 | all_y[row[0]].append(float(row[-1]))
29 |
30 |
31 | def get_day(date):
32 | date = int(date)
33 | if date < 20150132:
34 | return date - 20150100
35 | elif date < 20150229:
36 | return date - 20150200 + 31
37 | elif date < 20150332:
38 | return date - 20150300 + 59
39 | else:
40 | return date - 20150400 + 89
41 |
42 |
43 | # 用不同参数调用five_fold_params_pred
44 | def five_fold_pred():
45 | global all_pred
46 | for n_estimators in range(50, 160, 10):
47 | print('n_estimators= ', n_estimators)
48 | params = {'n_estimators': n_estimators, 'oob_score': False}
49 | five_fold_params_pred(params)
50 | # 写回文件
51 | output = []
52 | with open('five_fold_feature.csv') as input_file:
53 | input_csv = csv.reader(input_file)
54 | output.append(next(input_csv))
55 | for row in input_csv:
56 | output.append(row + [str(all_pred[row[0]][get_day(row[1])-1])])
57 | with open('five_fold_feature_v3.csv', 'w', newline='') as output_file:
58 | output_csv = csv.writer(output_file)
59 | for row in output:
60 | output_csv.writerow(row)
61 |
62 | # 清空all_pred
63 | all_pred = {}
64 |
65 |
66 | # 用指定参数,5折交叉
67 | def five_fold_params_pred(params):
68 | global commit_codes, all_pred
69 | for code in commit_codes:
70 | if code not in all_pred:
71 | all_pred[code] = np.zeros(120)
72 | if code not in all_x: # 部分商品类原始数据里没有
73 | continue
74 | for i in range(5):
75 | train_x, train_y, test_x = get_fold_set(code, i)
76 | rf = RandomForestRegressor(n_estimators=params['n_estimators'], oob_score=params['oob_score'])
77 | rf.fit(train_x, train_y)
78 | ypred = rf.predict(test_x)
79 | # 存入all_pred
80 | for index in range(24):
81 | all_pred[code][i*24+index] = ypred[index]
82 |
83 | # 修改对应大类的最后一个特征值
84 | large_code = code[:2]
85 | for day in range(120):
86 | all_x[large_code][day][-1] += all_pred[code][day]
87 |
88 |
89 | def get_fold_set(code, fold_index):
90 | train_x, train_y, test_x = [], [], []
91 | for i in range(120):
92 | if (i >= fold_index * 24) and (i < (fold_index + 1) * 24):
93 | test_x.append(all_x[code][i])
94 | else:
95 | train_x.append(all_x[code][i])
96 | train_y.append(all_y[code][i])
97 | return train_x, train_y, test_x
98 |
99 |
100 | # 获取提交文件中需要提交的codes,保存在commit_codes中
101 | def codes_list_out():
102 | global commit_codes
103 | codes = [0]
104 | with open('commit_empty.csv') as native_set_file:
105 | native_csv = csv.reader(native_set_file)
106 | next(native_csv)
107 | for row in native_csv:
108 | if row[0] != codes[-1]:
109 | codes.append(row[0])
110 | commit_codes = codes[1:]
111 |
112 |
113 | # 初始化结果文件
114 | def initialize_file():
115 | global commit_codes
116 | with open('five_fold_feature.csv', 'w', newline='') as output_file:
117 | output_csv = csv.writer(output_file)
118 | output_csv.writerow(['code', 'date', 'models'])
119 | for code in commit_codes:
120 | for date in range(20150101, 20150132):
121 | output_csv.writerow([code, str(date)])
122 | for date in range(20150201, 20150229):
123 | output_csv.writerow([code, str(date)])
124 | for date in range(20150301, 20150332):
125 | output_csv.writerow([code, str(date)])
126 | for date in range(20150401, 20150431):
127 | output_csv.writerow([code, str(date)])
128 |
129 |
130 | if __name__ == '__main__':
131 | codes_list_out()
132 | initialize_file()
133 | load_all_data()
134 | five_fold_pred()
135 |
--------------------------------------------------------------------------------
/v3/preparedata.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 |
4 | codes = []
5 |
6 |
7 | # 计算1-4月份特征保存在features.csv中
8 | def get_features():
9 | holidays = [0, 1, 2, 41, 44, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 93, 94, 95]
10 |
11 | def get_date_in_month(day):
12 | if day <= 31:
13 | return day
14 | elif day <= 59:
15 | return day - 31
16 | elif day <= 90:
17 | return day - 59
18 | else:
19 | return day - 90
20 |
21 | with open('timeseries_customers.csv') as customers_file,\
22 | open('timeseries_discounts.csv') as discounts_file,\
23 | open('features.csv', 'w', newline='') as output_file:
24 | input_customers = csv.reader(customers_file)
25 | input_discounts = csv.reader(discounts_file)
26 | output_csv = csv.writer(output_file)
27 | next(input_customers)
28 | next(input_discounts)
29 | output_csv.writerow(['class', 'day_in_week', 'day_in_month', 'holiday', 'discount', 'label']) # 中类特征
30 | for row in input_customers:
31 | class_code = row[0]
32 | discount_row = next(input_discounts)
33 | for day in range(1, 121):
34 | feature_row = []
35 | feature_row.append(class_code)
36 | day_in_week = day % 7 + 4
37 | feature_row.append(str(day_in_week))
38 | feature_row.append(str(get_date_in_month(day)))
39 | if day in holidays:
40 | feature_row.append('1')
41 | else:
42 | feature_row.append('0')
43 | feature_row.append(discount_row[day])
44 | feature_row.append(row[day])
45 | output_csv.writerow(feature_row)
46 |
47 |
48 | def divide_train_test_set():
49 | with open('features.csv') as input_file,\
50 | open('train.csv', 'w', newline='') as train_file,\
51 | open('test.csv', 'w', newline='') as test_file:
52 | input_csv = csv.reader(input_file)
53 | train_csv = csv.writer(train_file)
54 | test_csv = csv.writer(test_file)
55 | next(input_csv)
56 | day = 0
57 | for row in input_csv:
58 | if day < 100:
59 | train_csv.writerow(row)
60 | day += 1
61 | else:
62 | test_csv.writerow(row)
63 | day = (day + 1) % 120
64 |
65 |
66 | # 计算5月份特征并保存在May_input.csv中,其中大类最后一个特征(大类中中类的预测销量之和)需一边预测一边修改
67 | def compute_May_features():
68 | def codes_list_out():
69 | global codes
70 | codes = [0]
71 | with open('commit_empty.csv') as native_set_file:
72 | native_csv = csv.reader(native_set_file)
73 | next(native_csv)
74 | for row in native_csv:
75 | if row[0] != codes[-1]:
76 | codes.append(row[0])
77 | codes = codes[1:]
78 | print(codes)
79 |
80 | codes_list_out()
81 | with open('May_input.csv', 'w', newline='') as output_file:
82 | output_csv = csv.writer(output_file)
83 | for code in codes:
84 | for day in range(1, 31):
85 | feature = [code, str(day % 7 + 4), str(day), '0', '0']
86 | if len(code) == 2: # 大类
87 | feature.append('0')
88 | output_csv.writerow(feature)
89 |
90 |
91 | if __name__ == '__main__':
92 | get_features()
93 | divide_train_test_set()
94 | compute_May_features()
95 |
--------------------------------------------------------------------------------
/v3/submit.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/submit.csv
--------------------------------------------------------------------------------
/v3/timeseries_customers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/timeseries_customers.csv
--------------------------------------------------------------------------------
/v3/timeseries_discounts.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v3/timeseries_discounts.csv
--------------------------------------------------------------------------------
/v5/KNN_interface.py:
--------------------------------------------------------------------------------
1 | # version 5 KNN算法的调用接口
2 |
3 | from sklearn.neighbors import KNeighborsRegressor
4 | import numpy as np
5 | import csv
6 |
7 |
8 | # data: array of float, 销量数据
9 | # pred_length: int,需预测的天数
10 | # D_window(窗口长度)和max_k(最大的k值)
11 | def knn(data, pred_length, D_window=14, max_k=7):
12 | if pred_length + D_window >= len(data):
13 | print('ERROR: pred_length or D_window too long')
14 | return None
15 |
16 | ret_ypred = []
17 | for h in range(4):
18 | train_feature, train_label = get_train_set(data, h, D_window, pred_length)
19 |
20 | e_LOO_arr = np.zeros(max_k)
21 | for k in range(2, max_k + 1):
22 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
23 | model.fit(train_feature, train_label)
24 |
25 | # 获取k近邻
26 | dist_list, index_list = model.kneighbors([data[0 - D_window:]])
27 | k_neighbor_label = []
28 | for i in index_list[0]:
29 | k_neighbor_label.append(train_label[i])
30 |
31 | # 基于k近邻的预测值
32 | ypred = model.predict([data[0-D_window:]])
33 | ypred = np.asarray(list(map(round, ypred[0])))
34 |
35 | # 计算e_LOO
36 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
37 |
38 | # 取e_LOO最小的k值
39 | k_min = np.argmin(e_LOO_arr[1:]) + 2
40 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
41 | model.fit(train_feature, train_label)
42 | ypred = model.predict([data[0 - D_window:]])
43 | ret_ypred += list(map(round, ypred[0]))
44 |
45 | return np.asarray(ret_ypred)
46 |
47 |
48 | def get_train_set(train_data, h, D, pred_length):
49 | feature, label = [], []
50 | block_len = int(pred_length / 4)
51 | if h != 3:
52 | for i in range(len(train_data) - D - block_len * (h + 1) + 1):
53 | feature.append(train_data[i:i + D])
54 | label.append(train_data[i + D + block_len * h:i + D + block_len * h + block_len])
55 | else:
56 | for i in range(len(train_data) - D - pred_length + 1):
57 | feature.append(train_data[i:i + D])
58 | label.append(train_data[i + D + 3 * block_len:i + D + pred_length])
59 | return np.array(feature), np.array(label)
60 |
61 |
62 | # 计算LOO,用于k(近邻数)的选择
63 | def LOO(k_neighbor_label, ypred, k):
64 | ret = 0
65 | for neighbor in k_neighbor_label:
66 | ret = ret + ((neighbor - ypred) ** 2).sum()
67 | ret = ret * k / (k - 1)**2
68 | # ret = ret / (k)**2
69 | return ret
70 |
71 |
72 | def test():
73 | with open('timeseries_customers_processed.csv') as input_file:
74 | input_csv = csv.reader(input_file)
75 | next(input_csv)
76 | row = next(input_csv)
77 | data = list(map(float, row[1:]))
78 | print(knn(data, 30))
79 |
80 |
81 | if __name__ == '__main__':
82 | test()
83 |
--------------------------------------------------------------------------------
/v5/Readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/Readme.txt
--------------------------------------------------------------------------------
/v5/Version_5.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from sklearn.neighbors import KNeighborsRegressor
3 | import numpy as np
4 |
5 | from modify_submit import change_pred
6 |
7 |
8 | def main_fun():
9 | class_codes = ['1201', '2011', '12', '15', '20', '22', '23', '30']
10 | with open('timeseries_customers_processed.csv') as input_file:
11 | input_csv = csv.reader(input_file)
12 | next(input_csv)
13 | for row in input_csv:
14 | if row[0] in class_codes:
15 | # MIMO_KNN_test(row)
16 | # MIMO_KNN_LOO_test(row)
17 | MIMO_KNN_LOO_May(row)
18 |
19 |
20 | # 划分数据集测试不同参数(D_window, k),没有预测5月份销量
21 | def MIMO_KNN_test(data):
22 | code = data[0]
23 | data = list(map(float, data[1:]))
24 | train_data = data[:90]
25 | test_data = data[90:]
26 |
27 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天
28 | D_window = 14
29 | for h in range(4):
30 | train_feature, train_label = get_train_set(train_data, h, D_window)
31 | y_label = get_test_label(test_data, h)
32 |
33 | for k in range(1, 8):
34 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
35 | model.fit(train_feature, train_label)
36 |
37 | ypred = model.predict([train_data[0-D_window:]])
38 | ypred = np.array(list(map(round, ypred[0])))
39 |
40 | rmse = np.sqrt(((ypred - y_label) ** 2).mean())
41 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse)
42 |
43 |
44 | # 划分数据集,实现论文里的方法,没有预测5月份销量
45 | def MIMO_KNN_LOO_test(data):
46 | code = data[0]
47 | data = list(map(float, data[1:]))
48 | train_data = data[:90]
49 | test_data = data[90:]
50 |
51 | # 对4个时间段分别训练模型,时间段分别为7天、7天、7天、9天
52 | D_window = 14
53 | max_k = 7
54 | for h in range(4):
55 | train_feature, train_label = get_train_set(train_data, h, D_window)
56 | y_label = get_test_label(test_data, h)
57 |
58 | e_LOO_arr = np.zeros(max_k)
59 | for k in range(2, max_k + 1):
60 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
61 | model.fit(train_feature, train_label)
62 |
63 | # 获取k近邻
64 | dist_list, index_list = model.kneighbors([train_data[0 - D_window:]])
65 | k_neighbor_label = []
66 | for i in index_list[0]:
67 | k_neighbor_label.append(train_label[i])
68 |
69 | # 基于k近邻的预测值
70 | ypred = model.predict([train_data[0-D_window:]])
71 | ypred = np.asarray(list(map(round, ypred[0])))
72 | rmse = np.sqrt(((ypred - y_label) ** 2).mean())
73 | print(code, ' h=', h, ' k=', k, ' rmse=', rmse)
74 |
75 | # 计算e_LOO
76 | e_LOO_arr[k-1] = LOO(k_neighbor_label, ypred, k)
77 |
78 | # 取e_LOO最小的k值
79 | k_min = np.argmin(e_LOO_arr[1:]) + 2
80 | print('k_min=', k_min)
81 |
82 |
83 | # 使用整个数据集,实现论文里的方法,预测5月份销量
84 | def MIMO_KNN_LOO_May(data):
85 | code = data[0]
86 | data = list(map(float, data[1:]))
87 |
88 | D_window = 14
89 | max_k = 7
90 | pred_May = []
91 | for h in range(4):
92 | train_feature, train_label = get_train_set(data, h, D_window)
93 | e_LOO_arr = np.zeros(max_k)
94 | for k in range(2, max_k + 1):
95 | model = KNeighborsRegressor(n_neighbors=k, weights='uniform', algorithm='auto')
96 | model.fit(train_feature, train_label)
97 |
98 | # 获取k近邻
99 | dist_list, index_list = model.kneighbors([data[0 - D_window:]])
100 | k_neighbor_label = []
101 | for i in index_list[0]:
102 | k_neighbor_label.append(train_label[i])
103 |
104 | # 基于k近邻的预测值
105 | ypred = model.predict([data[0 - D_window:]])
106 | ypred = np.asarray(list(map(round, ypred[0])))
107 |
108 | # 计算e_LOO
109 | e_LOO_arr[k - 1] = LOO(k_neighbor_label, ypred, k)
110 |
111 | # 取e_LOO最小的k值
112 | k_min = np.argmin(e_LOO_arr[1:]) + 2
113 |
114 | # 令k=k_min,做预测
115 | model = KNeighborsRegressor(n_neighbors=k_min, weights='uniform', algorithm='auto')
116 | model.fit(train_feature, train_label)
117 | ypred = model.predict([data[0 - D_window:]])
118 | ypred = list(map(round, ypred[0]))
119 | pred_May = pred_May + ypred
120 |
121 | print(pred_May)
122 | # 替换文件里编码为code的预测值
123 | change_pred(code, pred_May)
124 |
125 |
126 | # 计算LOO,用于k(近邻数)的选择
127 | def LOO(k_neighbor_label, ypred, k):
128 | ret = 0
129 | for neighbor in k_neighbor_label:
130 | ret = ret + ((neighbor - ypred) ** 2).sum()
131 | ret = ret * k / (k - 1)**2
132 | # ret = ret / (k)**2
133 | return ret
134 |
135 |
136 | def get_train_set(train_data, h, D):
137 | feature, label = [], []
138 | if h != 3:
139 | for i in range(len(train_data) - D - 7 * (h+1) + 1):
140 | feature.append(train_data[i:i+D])
141 | label.append(train_data[i+D+7*h:i+D+7*h+7])
142 | else:
143 | for i in range(len(train_data) - D - 30 + 1):
144 | feature.append(train_data[i:i+D])
145 | label.append(train_data[i+D+21:i+D+30])
146 | return np.array(feature), np.array(label)
147 |
148 |
149 | def get_test_label(test_data, h):
150 | if h != 3:
151 | return test_data[7*h:7*h+7]
152 | else:
153 | return test_data[21:]
154 |
155 |
156 | if __name__ == '__main__':
157 | main_fun()
158 |
--------------------------------------------------------------------------------
/v5/commit_empty.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/commit_empty.csv
--------------------------------------------------------------------------------
/v5/modify_submit.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 |
4 | # 读取原预测文件,预测结果取整再写回去
5 | def get_round():
6 | rows = []
7 | with open('submit.csv') as input_file:
8 | input_csv = csv.reader(input_file)
9 | rows.append(next(input_csv))
10 | for row in input_csv:
11 | row[2] = str(int(round(float(row[2]))))
12 | rows.append(row)
13 | with open('submit.csv', 'w', newline='') as output_file:
14 | output_csv = csv.writer(output_file)
15 | for row in rows:
16 | output_csv.writerow(row)
17 |
18 |
19 | # 将预测文件中编码为code的类别预测值用pred替换
20 | def change_pred(code, pred):
21 | rows = []
22 | file_name = 'submit_WJ_2.csv'
23 | with open(file_name) as input_file:
24 | input_csv = csv.reader(input_file)
25 | rows.append(next(input_csv))
26 | i = 0
27 | for row in input_csv:
28 | if row[0] == code:
29 | rows.append([code, row[1], str(pred[i])])
30 | i += 1
31 | else:
32 | rows.append(row)
33 | with open(file_name, 'w', newline='') as output_file:
34 | output_csv = csv.writer(output_file)
35 | for row in rows:
36 | output_csv.writerow(row)
37 |
38 |
39 | if __name__ == '__main__':
40 | get_round()
41 |
--------------------------------------------------------------------------------
/v5/submit.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/submit.csv
--------------------------------------------------------------------------------
/v5/timeseries_customers_processed.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v5/timeseries_customers_processed.csv
--------------------------------------------------------------------------------
/v5/调参.txt:
--------------------------------------------------------------------------------
1 | 1201 h= 0 k= 1 rmse= 42.6430366113
2 | 1201 h= 0 k= 2 rmse= 38.3723718188
3 | 1201 h= 0 k= 3 rmse= 32.4761047772
4 | 1201 h= 0 k= 4 rmse= 24.5028359411
5 | 1201 h= 0 k= 5 rmse= 24.2706714272
6 | 1201 h= 0 k= 6 rmse= 23.3642368486
7 | 1201 h= 0 k= 7 rmse= 25.0468866937
8 | 1201 h= 1 k= 1 rmse= 31.5300310362
9 | 1201 h= 1 k= 2 rmse= 28.4413534227
10 | 1201 h= 1 k= 3 rmse= 23.3969834544
11 | 1201 h= 1 k= 4 rmse= 21.5311928775
12 | 1201 h= 1 k= 5 rmse= 17.0535644291
13 | 1201 h= 1 k= 6 rmse= 15.5524493595
14 | 1201 h= 1 k= 7 rmse= 17.4633370662
15 | 1201 h= 2 k= 1 rmse= 38.0844925321
16 | 1201 h= 2 k= 2 rmse= 28.5215343758
17 | 1201 h= 2 k= 3 rmse= 23.8729502535
18 | 1201 h= 2 k= 4 rmse= 24.2934772971
19 | 1201 h= 2 k= 5 rmse= 24.4475628222
20 | 1201 h= 2 k= 6 rmse= 26.420499459
21 | 1201 h= 2 k= 7 rmse= 24.835233271
22 | 1201 h= 3 k= 1 rmse= 36.0986303218
23 | 1201 h= 3 k= 2 rmse= 29.8369045237
24 | 1201 h= 3 k= 3 rmse= 29.5872383746
25 | 1201 h= 3 k= 4 rmse= 33.2802005146
26 | 1201 h= 3 k= 5 rmse= 31.1755055639
27 | 1201 h= 3 k= 6 rmse= 31.1048216768
28 | 1201 h= 3 k= 7 rmse= 29.6407753314
29 | 12 h= 0 k= 1 rmse= 53.8582796394
30 | 12 h= 0 k= 2 rmse= 44.5673280603
31 | 12 h= 0 k= 3 rmse= 34.0183858095
32 | 12 h= 0 k= 4 rmse= 32.9145546344
33 | 12 h= 0 k= 5 rmse= 29.392663649
34 | 12 h= 0 k= 6 rmse= 26.9584608416
35 | 12 h= 0 k= 7 rmse= 28.3394613257
36 | 12 h= 1 k= 1 rmse= 69.6593753305
37 | 12 h= 1 k= 2 rmse= 40.9992685927
38 | 12 h= 1 k= 3 rmse= 40.0368671351
39 | 12 h= 1 k= 4 rmse= 41.3502184792
40 | 12 h= 1 k= 5 rmse= 36.9253007747
41 | 12 h= 1 k= 6 rmse= 34.0657271442
42 | 12 h= 1 k= 7 rmse= 33.3902866927
43 | 12 h= 2 k= 1 rmse= 44.7931755006
44 | 12 h= 2 k= 2 rmse= 52.5043373322
45 | 12 h= 2 k= 3 rmse= 47.9634527977
46 | 12 h= 2 k= 4 rmse= 45.7079428355
47 | 12 h= 2 k= 5 rmse= 41.747281894
48 | 12 h= 2 k= 6 rmse= 40.4535372974
49 | 12 h= 2 k= 7 rmse= 39.1421180701
50 | 12 h= 3 k= 1 rmse= 43.1470354331
51 | 12 h= 3 k= 2 rmse= 44.093202437
52 | 12 h= 3 k= 3 rmse= 36.2472381216
53 | 12 h= 3 k= 4 rmse= 34.4079060788
54 | 12 h= 3 k= 5 rmse= 38.8412886812
55 | 12 h= 3 k= 6 rmse= 38.0493486071
56 | 12 h= 3 k= 7 rmse= 37.641989556
57 | 15 h= 0 k= 1 rmse= 14.0813960347
58 | 15 h= 0 k= 2 rmse= 12.2485595361
59 | 15 h= 0 k= 3 rmse= 13.2287257726
60 | 15 h= 0 k= 4 rmse= 12.7065671652
61 | 15 h= 0 k= 5 rmse= 11.87624956
62 | 15 h= 0 k= 6 rmse= 13.4246432771
63 | 15 h= 0 k= 7 rmse= 13.2146747442
64 | 15 h= 1 k= 1 rmse= 9.57675757834
65 | 15 h= 1 k= 2 rmse= 10.9716280299
66 | 15 h= 1 k= 3 rmse= 11.3157687678
67 | 15 h= 1 k= 4 rmse= 10.8028703988
68 | 15 h= 1 k= 5 rmse= 12.8554179205
69 | 15 h= 1 k= 6 rmse= 12.5323010033
70 | 15 h= 1 k= 7 rmse= 12.388370937
71 | 15 h= 2 k= 1 rmse= 20.4904158781
72 | 15 h= 2 k= 2 rmse= 12.0169917472
73 | 15 h= 2 k= 3 rmse= 11.8937080614
74 | 15 h= 2 k= 4 rmse= 11.1624704725
75 | 15 h= 2 k= 5 rmse= 11.7760027097
76 | 15 h= 2 k= 6 rmse= 12.1629264637
77 | 15 h= 2 k= 7 rmse= 13.0868435863
78 | 15 h= 3 k= 1 rmse= 31.3581462037
79 | 15 h= 3 k= 2 rmse= 20.8123719358
80 | 15 h= 3 k= 3 rmse= 18.6966951072
81 | 15 h= 3 k= 4 rmse= 13.589438938
82 | 15 h= 3 k= 5 rmse= 12.2614526733
83 | 15 h= 3 k= 6 rmse= 13.2781398948
84 | 15 h= 3 k= 7 rmse= 12.7837448865
85 | 20 h= 0 k= 1 rmse= 19.6650523954
86 | 20 h= 0 k= 2 rmse= 17.5970236087
87 | 20 h= 0 k= 3 rmse= 16.9153042486
88 | 20 h= 0 k= 4 rmse= 16.7590587119
89 | 20 h= 0 k= 5 rmse= 16.8402285481
90 | 20 h= 0 k= 6 rmse= 16.5004886991
91 | 20 h= 0 k= 7 rmse= 15.6761515174
92 | 20 h= 1 k= 1 rmse= 11.5201686496
93 | 20 h= 1 k= 2 rmse= 9.63450635631
94 | 20 h= 1 k= 3 rmse= 7.58729930368
95 | 20 h= 1 k= 4 rmse= 8.10614695274
96 | 20 h= 1 k= 5 rmse= 9.25628043697
97 | 20 h= 1 k= 6 rmse= 7.5033965945
98 | 20 h= 1 k= 7 rmse= 7.24532229897
99 | 20 h= 2 k= 1 rmse= 22.5134880207
100 | 20 h= 2 k= 2 rmse= 19.3281403697
101 | 20 h= 2 k= 3 rmse= 17.5829934327
102 | 20 h= 2 k= 4 rmse= 16.5980608769
103 | 20 h= 2 k= 5 rmse= 17.2165487439
104 | 20 h= 2 k= 6 rmse= 16.4409280022
105 | 20 h= 2 k= 7 rmse= 15.524009627
106 | 20 h= 3 k= 1 rmse= 26.1023626006
107 | 20 h= 3 k= 2 rmse= 33.697276393
108 | 20 h= 3 k= 3 rmse= 29.951316307
109 | 20 h= 3 k= 4 rmse= 32.3881422814
110 | 20 h= 3 k= 5 rmse= 25.7046785895
111 | 20 h= 3 k= 6 rmse= 25.2773090489
112 | 20 h= 3 k= 7 rmse= 22.2029257966
113 | 22 h= 0 k= 1 rmse= 43.8438788691
114 | 22 h= 0 k= 2 rmse= 41.6797254107
115 | 22 h= 0 k= 3 rmse= 38.884937818
116 | 22 h= 0 k= 4 rmse= 32.1492490424
117 | 22 h= 0 k= 5 rmse= 31.6650243057
118 | 22 h= 0 k= 6 rmse= 33.4756367623
119 | 22 h= 0 k= 7 rmse= 33.0505710863
120 | 22 h= 1 k= 1 rmse= 29.3841551083
121 | 22 h= 1 k= 2 rmse= 23.6434437635
122 | 22 h= 1 k= 3 rmse= 19.4591808033
123 | 22 h= 1 k= 4 rmse= 19.5137028576
124 | 22 h= 1 k= 5 rmse= 16.9122752479
125 | 22 h= 1 k= 6 rmse= 17.1294598104
126 | 22 h= 1 k= 7 rmse= 16.8492357209
127 | 22 h= 2 k= 1 rmse= 38.7243002335
128 | 22 h= 2 k= 2 rmse= 32.7553576964
129 | 22 h= 2 k= 3 rmse= 33.8979605757
130 | 22 h= 2 k= 4 rmse= 30.356791558
131 | 22 h= 2 k= 5 rmse= 28.7923710234
132 | 22 h= 2 k= 6 rmse= 25.7167889433
133 | 22 h= 2 k= 7 rmse= 31.4764977499
134 | 22 h= 3 k= 1 rmse= 127.657092765
135 | 22 h= 3 k= 2 rmse= 109.449161924
136 | 22 h= 3 k= 3 rmse= 86.6052707902
137 | 22 h= 3 k= 4 rmse= 75.037769277
138 | 22 h= 3 k= 5 rmse= 69.8723081655
139 | 22 h= 3 k= 6 rmse= 62.136255331
140 | 22 h= 3 k= 7 rmse= 57.9065491146
141 | 23 h= 0 k= 1 rmse= 5.63154381269
142 | 23 h= 0 k= 2 rmse= 4.63173518114
143 | 23 h= 0 k= 3 rmse= 4.83828724199
144 | 23 h= 0 k= 4 rmse= 5.80938123219
145 | 23 h= 0 k= 5 rmse= 5.68562994596
146 | 23 h= 0 k= 6 rmse= 4.35150609445
147 | 23 h= 0 k= 7 rmse= 3.76632918272
148 | 23 h= 1 k= 1 rmse= 6.4142698059
149 | 23 h= 1 k= 2 rmse= 6.7005057888
150 | 23 h= 1 k= 3 rmse= 7.15141892803
151 | 23 h= 1 k= 4 rmse= 6.56498281641
152 | 23 h= 1 k= 5 rmse= 9.63436179487
153 | 23 h= 1 k= 6 rmse= 10.8737237449
154 | 23 h= 1 k= 7 rmse= 10.0319859996
155 | 23 h= 2 k= 1 rmse= 5.8064004094
156 | 23 h= 2 k= 2 rmse= 5.92811066778
157 | 23 h= 2 k= 3 rmse= 7.75757150579
158 | 23 h= 2 k= 4 rmse= 7.67441476216
159 | 23 h= 2 k= 5 rmse= 7.50348547005
160 | 23 h= 2 k= 6 rmse= 6.80538086404
161 | 23 h= 2 k= 7 rmse= 7.34530943247
162 | 23 h= 3 k= 1 rmse= 31.3209195267
163 | 23 h= 3 k= 2 rmse= 26.1043465949
164 | 23 h= 3 k= 3 rmse= 26.5938520968
165 | 23 h= 3 k= 4 rmse= 20.6796134379
166 | 23 h= 3 k= 5 rmse= 18.5126222927
167 | 23 h= 3 k= 6 rmse= 17.3251045002
168 | 23 h= 3 k= 7 rmse= 15.8480509754
169 | 30 h= 0 k= 1 rmse= 17.4396920025
170 | 30 h= 0 k= 2 rmse= 15.286577467
171 | 30 h= 0 k= 3 rmse= 14.799299084
172 | 30 h= 0 k= 4 rmse= 13.4640063851
173 | 30 h= 0 k= 5 rmse= 12.5409692762
174 | 30 h= 0 k= 6 rmse= 12.2409109388
175 | 30 h= 0 k= 7 rmse= 11.5748669357
176 | 30 h= 1 k= 1 rmse= 12.5470542929
177 | 30 h= 1 k= 2 rmse= 11.1238979074
178 | 30 h= 1 k= 3 rmse= 10.0442549022
179 | 30 h= 1 k= 4 rmse= 10.4489658474
180 | 30 h= 1 k= 5 rmse= 9.76706560335
181 | 30 h= 1 k= 6 rmse= 9.5492204658
182 | 30 h= 1 k= 7 rmse= 8.02405103911
183 | 30 h= 2 k= 1 rmse= 13.7995859151
184 | 30 h= 2 k= 2 rmse= 11.9178406627
185 | 30 h= 2 k= 3 rmse= 10.9421559178
186 | 30 h= 2 k= 4 rmse= 11.4155579741
187 | 30 h= 2 k= 5 rmse= 11.1698815423
188 | 30 h= 2 k= 6 rmse= 10.3885622264
189 | 30 h= 2 k= 7 rmse= 9.75646178754
190 | 30 h= 3 k= 1 rmse= 18.5082564159
191 | 30 h= 3 k= 2 rmse= 16.269989379
192 | 30 h= 3 k= 3 rmse= 13.5020710817
193 | 30 h= 3 k= 4 rmse= 13.4553609332
194 | 30 h= 3 k= 5 rmse= 12.1959416121
195 | 30 h= 3 k= 6 rmse= 12.5498267393
196 | 30 h= 3 k= 7 rmse= 12.7345557171
--------------------------------------------------------------------------------
/v6_stacking/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/v6_stacking/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/v6_stacking/.idea/v6_stacking.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/v6_stacking/Version6_stacking.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from sklearn import linear_model
3 | from sklearn.ensemble import RandomForestRegressor
4 | import xgboost as xgb
5 | import numpy as np
6 |
7 |
8 | train_set_x = {}
9 | train_set_y = {}
10 | test_set_x = {}
11 | test_set_y = {}
12 | commit_codes = []
13 |
14 |
15 | # 载入训练和测试模型的数据(不包括5月份的)
16 | def load_data():
17 | with open('train.csv') as input_file:
18 | input_csv = csv.reader(input_file)
19 | day = 0
20 | for row in input_csv:
21 | code = row[0]
22 | if day == 0:
23 | train_set_x[code] = []
24 | train_set_y[code] = []
25 | x = list(map(float, row[2:-1]))
26 | train_set_x[code].append(x)
27 | train_set_y[code].append(float(row[-1]))
28 | day = (day + 1) % 100
29 | with open('test.csv') as input_file:
30 | input_csv = csv.reader(input_file)
31 | day = 0
32 | for row in input_csv:
33 | code = row[0]
34 | if day == 0:
35 | test_set_x[code] = []
36 | test_set_y[code] = []
37 | x = list(map(float, row[2:-1]))
38 | test_set_x[code].append(x)
39 | test_set_y[code].append(float(row[-1]))
40 | day = (day + 1) % 20
41 |
42 |
43 | # 获取提交文件中需要提交的codes,保存在commit_codes中
44 | def codes_list_out():
45 | global commit_codes
46 | codes = [0]
47 | with open('commit_empty.csv') as native_set_file:
48 | native_csv = csv.reader(native_set_file)
49 | next(native_csv)
50 | for row in native_csv:
51 | if row[0] != codes[-1]:
52 | codes.append(row[0])
53 | commit_codes = codes[1:]
54 |
55 |
56 | def train_test_eval():
57 | for code in commit_codes:
58 | # model = linear_model.LinearRegression()
59 | model = RandomForestRegressor()
60 | model.fit(train_set_x[code], train_set_y[code])
61 | ypred = model.predict(test_set_x[code])
62 | ypred = np.array(list(map(round, ypred)))
63 | rmse = np.sqrt(((test_set_y[code] - ypred) ** 2).mean())
64 | print(code, ' rmse=', rmse)
65 |
66 |
67 | if __name__ == '__main__':
68 | codes_list_out()
69 | load_data()
70 | train_test_eval()
71 |
--------------------------------------------------------------------------------
/v6_stacking/commit_empty.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v6_stacking/commit_empty.csv
--------------------------------------------------------------------------------
/v6_stacking/cv/arima_cv.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import datetime as dt
9 | import numpy as np
10 |
11 | import csv
12 | import arimaPredicter
13 | import dataLoader
14 |
15 | index = [dt.datetime(2015,1,x) for x in range(1, 32)]
16 | index = index + [dt.datetime(2015,2,x) for x in (range(1 ,29))]
17 | index = index + [dt.datetime(2015,3,x) for x in range(1, 32)]
18 | index = index + [dt.datetime(2015,4,x) for x in range(1, 31)]
19 |
20 | def sariamOutput():
21 | loader = dataLoader.loader("datam.csv", "lcdatam.csv")
22 | loader.setSize(120, 0, 0)
23 |
24 | f1 = open("result01.csv", "wb")
25 | writer1 = csv.writer(f1)
26 | f2 = open("result11.csv", "wb")
27 | writer2 = csv.writer(f2)
28 | f3 = open("result12.csv", "wb")
29 | writer3 = csv.writer(f3)
30 |
31 | ap = arimaPredicter.predicter();
32 | ap.setIndex(index)
33 |
34 | while (True):
35 | midclass, _, trainData, _, _ = loader.getNextMidClass()
36 | if (midclass == 0):
37 | break
38 |
39 | ap.setPara(midclass, (0, 1))
40 | try:
41 | model = ap.sarimaTrain(midclass, trainData)
42 | result = ap.sarimaPredict(model, 30)
43 | except:
44 | result = np.zeros(30)
45 | for i in range(0, 30):
46 | writer1.writerow([midclass, "201505%02d" % (i+1), result[i]])
47 |
48 |
49 | ap.setPara(midclass, (1, 1))
50 | try:
51 | model = ap.sarimaTrain(midclass, trainData)
52 | result = ap.sarimaPredict(model, 30)
53 | except:
54 | result = np.zeros(30)
55 | for i in range(0, 30):
56 | writer2.writerow([midclass, "201505%02d" % (i+1), result[i]])
57 |
58 | ap.setPara(midclass, (1, 2))
59 | try:
60 | model = ap.sarimaTrain(midclass, trainData)
61 | result = ap.sarimaPredict(model, 30)
62 | except:
63 | result = np.zeros(30)
64 | for i in range(0, 30):
65 | writer3.writerow([midclass, "201505%02d" % (i+1), result[i]])
66 |
67 |
68 | while (True):
69 | larclass, _, trainData, _, _ = loader.getNextLarClass()
70 | if (larclass == 0):
71 | break
72 |
73 | ap.setPara(larclass, (0, 1))
74 | try:
75 | model = ap.sarimaTrain(larclass, trainData)
76 | result = ap.sarimaPredict(model, 30)
77 | except:
78 | result = np.zeros(30)
79 | for i in range(0, 30):
80 | writer1.writerow([larclass, "201505%02d" % (i+1), result[i]])
81 |
82 |
83 | ap.setPara(larclass, (1, 1))
84 | try:
85 | model = ap.sarimaTrain(larclass, trainData)
86 | result = ap.sarimaPredict(model, 30)
87 | except:
88 | result = np.zeros(30)
89 | for i in range(0, 30):
90 | writer2.writerow([larclass, "201505%02d" % (i+1), result[i]])
91 |
92 | ap.setPara(larclass, (1, 2))
93 | try:
94 | model = ap.sarimaTrain(larclass, trainData)
95 | result = ap.sarimaPredict(model, 30)
96 | except:
97 | result = np.zeros(30)
98 | for i in range(0, 30):
99 | writer3.writerow([larclass, "201505%02d" % (i+1), result[i]])
100 |
101 | f1.close()
102 | f2.close()
103 | f3.close()
104 | loader.closeFiles()
105 |
106 | sariamOutput()
--------------------------------------------------------------------------------
/v6_stacking/cv/xgboost_cv.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Spyder Editor
4 |
5 | This is a temporary script file.
6 | """
7 |
8 | import xgboost as xgb
9 | from numpy import array
10 | import csv
11 | import datetime as dt
12 |
13 | larclasPred = {}
14 | larclasLabl = {}
15 | totalBias = 0
16 | totalCount = 0
17 |
18 | dtIndex = [dt.datetime(2015,1,x) for x in range(1, 32)]
19 | dtIndex = dtIndex + [dt.datetime(2015,2,x) for x in (range(1, 29))]
20 | dtIndex = dtIndex + [dt.datetime(2015,3,x) for x in range(1, 32)]
21 | dtIndex = dtIndex + [dt.datetime(2015,4,x) for x in (range(1, 31))]
22 |
23 | modelChoose = []
24 | lcModelChoose = []
25 | arimaParaChoose = {}
26 |
27 | def getData(csvReader, trainCount, testCount):
28 | trainData = []
29 | testData = []
30 | trainLabel = []
31 | testLabel = []
32 | try:
33 | for x in range(0, trainCount):
34 | row = csvReader.next()
35 | """
36 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
37 | float(row[7]), float(row[8]), float(row[9]), float(row[10]),
38 | float(row[11]), float(row[12])]
39 | """
40 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
41 | float(row[7]), float(row[8])]
42 | trainData.append(data)
43 | trainLabel.append(float(row[15]))
44 | for x in range(0, testCount):
45 | row = csvReader.next()
46 | """
47 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
48 | float(row[7]), float(row[8]), float(row[9]), float(row[10]),
49 | float(row[11]), float(row[12])]
50 | """
51 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
52 | float(row[7]), float(row[8])]
53 | testData.append(data)
54 | testLabel.append(float(row[15]))
55 | return int(row[0]), trainData, trainLabel, testData, testLabel
56 | except StopIteration:
57 | return 0, [], [], [], []
58 |
59 | def getLCData(csvReader, trainCount, testCount):
60 | trainData = []
61 | testData = []
62 | trainLabel = []
63 | testLabel = []
64 | try:
65 | for x in range(0, trainCount):
66 | row = csvReader.next()
67 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
68 | float(row[7])]
69 | trainData.append(data)
70 | trainLabel.append(float(row[14]))
71 | for x in range(0, testCount):
72 | row = csvReader.next()
73 | data = [float(row[3]), float(row[4]), float(row[5]), float(row[6]),
74 | float(row[7])]
75 | testData.append(data)
76 | testLabel.append(float(row[14]))
77 | return int(row[0]), trainData, trainLabel, testData, testLabel
78 | except StopIteration:
79 | return 0, [], [], [], []
80 |
81 | def xgboostPredict(trainData, trainLabel, dataToPredict):
82 | dtrain = xgb.DMatrix(trainData, trainLabel)
83 | params = {"objective": "reg:linear"}
84 | gbm = xgb.train(dtrain=dtrain, params=params)
85 | return gbm.predict(xgb.DMatrix(dataToPredict))
86 |
87 | def simData(data):
88 | ret = data[:]
89 | for i in range(0, len(ret)):
90 | for j in range(4, len(ret[i])):
91 | ret[i][j] = 0
92 | return ret
93 |
94 | def xgboostCV(trainSize):
95 | global larclasPred
96 | larclasPred = {}
97 | f1 = open("datam.csv", "r")
98 | data_csv = csv.reader(f1)
99 | f3 = open("lcdatam.csv", "r")
100 | lc_data_csv = csv.reader(f3)
101 | f4 = open('xgboost_cv.csv', 'wb')
102 | writer = csv.writer(f4)
103 |
104 | split = [int(trainSize/5), int(2*trainSize/5),
105 | int(3*trainSize/5), int(4*trainSize/5)]
106 |
107 | while (True):
108 | midclass, trD, trL, teD, teL = getData(data_csv, trainSize, 0)
109 | if (midclass == 0):
110 | break
111 | else:
112 | trd1 = trD[split[0]:]
113 | trl1 = trL[split[0]:]
114 | ted1 = simData(trD[:split[0]])
115 | tep1 = xgboostPredict(array(trd1), array(trl1), array(ted1))
116 |
117 | trd2 = trD[:split[0]]+trD[split[1]:]
118 | trl2 = trL[:split[0]]+trL[split[1]:]
119 | ted2 = simData(trD[split[0]:split[1]])
120 | tep2 = xgboostPredict(array(trd2), array(trl2), array(ted2))
121 |
122 | trd3 = trD[:split[1]]+trD[split[2]:]
123 | trl3 = trL[:split[1]]+trL[split[2]:]
124 | ted3 = simData(trD[split[1]:split[2]])
125 | tep3 = xgboostPredict(array(trd3), array(trl3), array(ted3))
126 |
127 | trd4 = trD[:split[2]]+trD[split[3]:]
128 | trl4 = trL[:split[2]]+trL[split[3]:]
129 | ted4 = simData(trD[split[2]:split[3]])
130 | tep4 = xgboostPredict(array(trd4), array(trl4), array(ted4))
131 |
132 | trd5 = trD[:split[3]]
133 | trl5 = trL[:split[3]]
134 | ted5 = simData(trD[split[3]:])
135 | tep5 = xgboostPredict(array(trd5), array(trl5), array(ted5))
136 |
137 | ans = list(tep1) + list(tep2) + list(tep3) + list(tep4) + list(tep5)
138 |
139 | for i in range(0, trainSize):
140 | writer.writerow([midclass, dtIndex[i].strftime("%Y%m%d"),
141 | ans[i]])
142 |
143 | while (True):
144 | larclass, trD, trL, teD, teL = getLCData(lc_data_csv, trainSize, 0)
145 | if (larclass == 0):
146 | break
147 | else:
148 | trd1 = trD[split[0]:]
149 | trl1 = trL[split[0]:]
150 | ted1 = simData(trD[:split[0]])
151 | tep1 = xgboostPredict(array(trd1), array(trl1), array(ted1))
152 |
153 | trd2 = trD[:split[0]]+trD[split[1]:]
154 | trl2 = trL[:split[0]]+trL[split[1]:]
155 | ted2 = simData(trD[split[0]:split[1]])
156 | tep2 = xgboostPredict(array(trd2), array(trl2), array(ted2))
157 |
158 | trd3 = trD[:split[1]]+trD[split[2]:]
159 | trl3 = trL[:split[1]]+trL[split[2]:]
160 | ted3 = simData(trD[split[1]:split[2]])
161 | tep3 = xgboostPredict(array(trd3), array(trl3), array(ted3))
162 |
163 | trd4 = trD[:split[2]]+trD[split[3]:]
164 | trl4 = trL[:split[2]]+trL[split[3]:]
165 | ted4 = simData(trD[split[2]:split[3]])
166 | tep4 = xgboostPredict(array(trd4), array(trl4), array(ted4))
167 |
168 | trd5 = trD[:split[3]]
169 | trl5 = trL[:split[3]]
170 | ted5 = simData(trD[split[3]:])
171 | tep5 = xgboostPredict(array(trd5), array(trl5), array(ted5))
172 |
173 | ans = list(tep1) + list(tep2) + list(tep3) + list(tep4) + list(tep5)
174 |
175 | for i in range(0, trainSize):
176 | writer.writerow([larclass, dtIndex[i].strftime("%Y%m%d"),
177 | ans[i]])
178 |
179 | f1.close()
180 | f3.close()
181 | f4.close()
182 |
183 | xgboostCV(120)
--------------------------------------------------------------------------------
/v6_stacking/prepare_data.py:
--------------------------------------------------------------------------------
1 | # 把不同模型结果合并在一个文件中
2 |
3 | import csv
4 |
5 | commit_codes = []
6 |
7 |
8 | # 获取提交文件中需要提交的codes,保存在commit_codes中
9 | def codes_list_out():
10 | global commit_codes
11 | codes = [0]
12 | with open('commit_empty.csv') as native_set_file:
13 | native_csv = csv.reader(native_set_file)
14 | next(native_csv)
15 | for row in native_csv:
16 | if row[0] != codes[-1]:
17 | codes.append(row[0])
18 | commit_codes = codes[1:]
19 |
20 |
21 | def get_day(date):
22 | date = int(date)
23 | if date < 20150132:
24 | return date - 20150100
25 | elif date < 20150229:
26 | return date - 20150200 + 31
27 | elif date < 20150332:
28 | return date - 20150300 + 59
29 | else:
30 | return date - 20150400 + 90
31 |
32 |
33 | def merge_file():
34 | features = {}
35 | with open('five_fold_feature_v3.csv') as input_file:
36 | input_csv = csv.reader(input_file)
37 | next(input_csv)
38 | for row in input_csv:
39 | if row[0] not in features:
40 | features[row[0]] = [row]
41 | else:
42 | features[row[0]].append(row)
43 | with open('five_fold_feature_xgboost.csv') as input_file:
44 | input_csv = csv.reader(input_file)
45 | for row in input_csv:
46 | if row[0] in features:
47 | features[row[0]][get_day(row[1])-1] = features[row[0]][get_day(row[1])-1] + row[2:]
48 | # 最后一列是label
49 | with open('timeseries_customers.csv') as input_file:
50 | input_csv = csv.reader(input_file)
51 | for row in input_csv:
52 | if row[0] in features:
53 | for day in range(120):
54 | features[row[0]][day].append(row[day+1])
55 | with open('merged_feature.csv', 'w', newline='') as output_file:
56 | output_csv = csv.writer(output_file)
57 | for code in commit_codes:
58 | for row in features[code]:
59 | output_csv.writerow(row)
60 |
61 |
62 | def divide_train_test_set():
63 | with open('merged_feature.csv') as input_file,\
64 | open('train.csv', 'w', newline='') as train_file,\
65 | open('test.csv', 'w', newline='') as test_file:
66 | input_csv = csv.reader(input_file)
67 | train_csv = csv.writer(train_file)
68 | test_csv = csv.writer(test_file)
69 | day = 0
70 | for row in input_csv:
71 | if day < 100:
72 | train_csv.writerow(row)
73 | day += 1
74 | else:
75 | test_csv.writerow(row)
76 | day = (day + 1) % 120
77 |
78 |
79 | if __name__ == '__main__':
80 | codes_list_out()
81 | merge_file()
82 | divide_train_test_set()
83 |
--------------------------------------------------------------------------------
/v6_stacking/timeseries_customers.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IngramWang/BDCI2017/dd2ced21731bcf6ee8a225619efbb0758551be61/v6_stacking/timeseries_customers.csv
--------------------------------------------------------------------------------
/xgboostPredicter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 27 21:53:16 2017
4 |
5 | @author: wangjun
6 | """
7 |
8 | import xgboost as xgb
9 | from numpy import array
10 | import datetime as dt
11 | import numpy
12 |
13 | class predicter:
14 | def __init__(self, params = {"objective":"reg:linear", "max_depth":1, "gamma":2}):
15 | self.params = params
16 |
17 | def setDefaultParams(self, params):
18 | self.params = params
19 |
20 | def xgboostTrain(self, trainData, trainLabel, params = {}):
21 | if (type(trainData)!=numpy.ndarray):
22 | trainData = array(trainData)
23 | if (type(trainLabel)!=numpy.ndarray):
24 | trainLabel = array(trainLabel)
25 | dTrain = xgb.DMatrix(trainData, trainLabel)
26 | if (len(params)==0):
27 | params = self.params
28 | model = xgb.train(dtrain=dTrain, params=params)
29 | return model
30 |
31 | @staticmethod
32 | def xgboostPredict(model, dataToPredict):
33 | if (type(dataToPredict)!=numpy.ndarray):
34 | dataToPredict = array(dataToPredict)
35 | return model.predict(xgb.DMatrix(dataToPredict))
36 |
37 | @staticmethod
38 | def simulateFeature(trainData, musk):
39 | for feature in trainData:
40 | for i in musk:
41 | feature[i] = 0
42 |
43 | @staticmethod
44 | def createFeature(date_from, length, zeros, DictHoilday, DictBeforeHoilday,
45 | DictWorkday):
46 | delta = dt.timedelta(days=1)
47 | now = date_from
48 | index = []
49 | for i in range(0, length):
50 | index.append(now)
51 | now = now + delta
52 | feature = []
53 | empty = [0 for x in range(0, zeros+4)]
54 | for i in range(0, length):
55 | x = empty[:]
56 | x[0] = index[i].day
57 | x[1] = (index[i].weekday() + 1) % 7
58 | dayCount = i + 1
59 | if (dayCount in DictHoilday):
60 | x[3] = 1
61 | elif (dayCount in DictBeforeHoilday):
62 | x[2] = 1
63 | elif (dayCount in DictWorkday):
64 | if (x[1]==6 or ((dayCount+1) in DictHoilday)):
65 | x[2] = 1
66 | elif (x[1]==0 or x[1]==6):
67 | x[3] = 1
68 | elif (x[1]==5):
69 | x[2] = 1
70 | feature.append(x)
71 | return feature
--------------------------------------------------------------------------------