├── README.md
├── deep_learning_feature.R
├── heat_map_nanjing.py
├── month_barplot.py
├── nanjing_week_month.png
├── predict_plot.py
├── quantile_pre.R
├── taibao
    ├── .RData
    ├── EX-04-01.R
    ├── EX-04-02.R
    ├── IPAD.xlsx
    ├── a.txt
    ├── prog-00.R
    ├── prog-01.R
    ├── prog-01old.R
    ├── prog-02.R
    ├── sub-01.R
    └── sub-02.R
├── xingqi.py
└── 最新预测.py


/README.md:
--------------------------------------------------------------------------------
1 | # This is the code for my third paper
2 | 


--------------------------------------------------------------------------------
/deep_learning_feature.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | setwd("D:/Rdata/Third_paper/third_paper_data")
  4 | 
  5 | library(readxl)
  6 | library(h2o)
  7 | library(dplyr)
  8 | 
  9 | h2o.init(nthreads = -1)
 10 |  
 11 | rm(list = ls())
 12 | 
 13 | set.seed(12345)
 14 | 
 15 | dat<-read_excel("suzhou_total_feature.xlsx")
 16 | 
 17 | dim(dat)
 18 | 
 19 | #dat<-h2o.importFile(path = "total_feature.xlsx",destination_frame = "dat")
 20 | #dat1<-as.data.frame(dat)
 21 | dat1<-na.omit(dat)
 22 | 
 23 | dat2<-dat1[,2:58]
 24 | 
 25 | names(dat2)
 26 | 
 27 | month<-as.factor(dat1$month)
 28 | 
 29 | day_of_w<-as.factor(dat1$day_of_w)
 30 | 
 31 | air<-as.factor(dat1$air)
 32 | 
 33 | rain<-as.factor(dat1$rain)
 34 | 
 35 | 
 36 | levels(air)[levels(air)=="优"]<-"6"
 37 | levels(air)[levels(air)=="良"]<-"5"
 38 | levels(air)[levels(air)=="轻度污染"]<-"4"
 39 | levels(air)[levels(air)=="中度污染"]<-"3"
 40 | levels(air)[levels(air)=="重度污染"]<-"2"
 41 | levels(air)[levels(air)=="严重污染"]<-"1"
 42 | 
 43 | air<-as.numeric(air)
 44 | 
 45 | levels(month)[levels(month)=="January"]<-"1"
 46 | levels(month)[levels(month)=="February"]<-"2"
 47 | levels(month)[levels(month)=="March"]<-"3"
 48 | 
 49 | levels(month)[levels(month)=="April"]<-"4"
 50 | levels(month)[levels(month)=="May"]<-"5"
 51 | 
 52 | levels(month)[levels(month)=="June"]<-"6"
 53 | levels(month)[levels(month)=="July"]<-"7"
 54 | levels(month)[levels(month)=="August"]<-"8"
 55 | levels(month)[levels(month)=="September"]<-"9"
 56 | levels(month)[levels(month)=="October"]<-"10"
 57 | levels(month)[levels(month)=="November"]<-"11"
 58 | levels(month)[levels(month)=="December"]<-"12"
 59 | 
 60 | levels(month)
 61 | 
 62 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1"
 63 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2"
 64 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3"
 65 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4"
 66 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5"
 67 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6"
 68 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7"
 69 | 
 70 | levels(day_of_w)
 71 | 
 72 | dat2$month<-month
 73 | dat2$air<-air
 74 | dat2$rain<-rain
 75 | dat2$day_of_w<-day_of_w
 76 | 
 77 | 
 78 | 
 79 | 
 80 | y <- "E_demand"  #response column: digits 0-9
 81 | x <- setdiff(names(dat2), y)  #vector of predictor column names
 82 | 
 83 | dat3<-as.h2o(dat2,destination_frame = "dat3")
 84 | 
 85 | dat4<-dat3[1:348,]
 86 | 
 87 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345)
 88 | 
 89 | 
 90 | 
 91 | 
 92 | # first part of the data, without labels for unsupervised learning
 93 | train <- splits[[1]]
 94 | 
 95 | # second part of the data, with labels for supervised learning
 96 | valid <- splits[[2]]
 97 | 
 98 | test<-dat3[349:358,]
 99 | 
100 | #dim(train_supervised)
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | DL_pre_time_1<-Sys.time()
113 | dl <- h2o.deeplearning(
114 |   model_id="dl", 
115 |   training_frame=train, 
116 |   validation_frame=valid,   ## validation dataset: used for scoring and early stopping
117 |   x=x,
118 |   y=y,
119 |   hidden = c(200,200,200),
120 |   #hidden=c(200,200,200),       ## default: 2 hidden layers with 200 neurons each
121 |   epochs=1000,
122 |   variable_importances=T ,
123 |   seed = 12345,
124 |   l1=0.001,
125 |   l2=0.001
126 |   
127 |   ## not enabled by default
128 | )
129 | DL_pre_time_2<-Sys.time()
130 | DL_run_time<-(DL_pre_time_2-DL_pre_time_1)
131 | 
132 | h2o.varimp_plot(dl,num_of_features = 15)
133 | 
134 | 
135 | 
136 | 
137 | 
138 | #plot(dl)
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | #####random forset
148 | ## run our first predictive model
149 | RF_pre_time_1<-Sys.time()
150 | rf <- h2o.randomForest(         ## h2o.randomForest function
151 |   training_frame = train,        ## the H2O frame for training
152 |   validation_frame = valid,      ## the H2O frame for validation (not required)
153 |   x=x,                           ## the predictor columns, by column index
154 |   y=y,                           ## the target index (what we are predicting)
155 |   model_id = "rf_covType_v1",    ## name the model in H2O
156 |                                  ##   not required, but helps use Flow
157 |   ntrees = 200,                  ## use a maximum of 200 trees to create the
158 |                                  ##  random forest model. The default is 50.
159 |                                  ##  I have increased it because I will let 
160 |                                  ##  the early stopping criteria decide when
161 |                                  ##  the random forest is sufficiently accurate
162 |   stopping_rounds = 2,           ## Stop fitting new trees when the 2-tree
163 |                                  ##  average is within 0.001 (default) of 
164 |                                  ##  the prior two 2-tree averages.
165 |                                  ##  Can be thought of as a convergence setting
166 |   score_each_iteration = T,      ## Predict against training and validation for
167 |                                  ##  each tree. Default will skip several.
168 |   seed = 12345)                ## Set the random seed so that this can be
169 | 
170 | RF_pre_time_2<-Sys.time()
171 | RF_run_time<-(RF_pre_time_2-RF_pre_time_1)
172 | 
173 | 
174 | GBM_pre_time_1<-Sys.time()
175 | gbm <- h2o.gbm(
176 |   training_frame = train,     ##
177 |   validation_frame = valid,   ##
178 |   x=x,                        ##
179 |   y=y,                        ## 
180 |   ntrees = 200,               ## decrease the trees, mostly to allow for run time
181 |   ##  (from 50)
182 |   learn_rate = 0.2,
183 |    ## increase the learning rate (from 0.1)
184 |   max_depth = 10,             ## increase the depth (from 5)
185 |   stopping_rounds = 2,        ## 
186 |   stopping_tolerance = 0.01,  ##
187 |   score_each_iteration = T,   ##
188 |   model_id = "gbm_covType3",  ##
189 |   seed = 12345)             ##
190 | 
191 | GBM_pre_time_2<-Sys.time()
192 | GBM_run_time<-(GBM_pre_time_2-GBM_pre_time_1)
193 | 
194 | 
195 | plot(gbm)
196 | 
197 | pre1<-h2o.predict(object = gbm,newdata = test)
198 | pre2<-h2o.predict(object = dl,newdata = test)
199 | pre3<-h2o.predict(object = rf,newdata = test)
200 | 
201 | pre1_d<-as.data.frame(pre1$predict)
202 | pre2_d<-as.data.frame(pre2$predict)
203 | pre3_d<-as.data.frame(pre3$predict)
204 | 
205 | 
206 | pre1_3<-cbind(pre1_d,pre2_d,pre3_d,as.data.frame(test$E_demand))
207 | 
208 | names(pre1_3)<-c("GBM","DL","RF","E_demand")
209 | 
210 | GBM_MAE<-mean(abs(pre1_3$GBM-pre1_3$E_demand))
211 | DL_MAE<-mean(abs(pre1_3$DL-pre1_3$E_demand))
212 | RF_MAE<-mean(abs(pre1_3$RF-pre1_3$E_demand))
213 | 
214 | GBM_MAE
215 | DL_MAE
216 | RF_MAE
217 | 
218 | 
219 | 
220 | GBM_MAPE<-mean(abs(pre1_3$GBM-pre1_3$E_demand)/pre1_3$E_demand)
221 | DL_MAPE<-mean(abs(pre1_3$DL-pre1_3$E_demand)/pre1_3$E_demand)
222 | RF_MAPE<-mean(abs(pre1_3$RF-pre1_3$E_demand)/pre1_3$E_demand)
223 | 
224 | GBM_MAPE
225 | DL_MAPE
226 | RF_MAPE
227 | 
228 | GBM_MRPE<-max(abs(pre1_3$GBM-pre1_3$E_demand)/pre1_3$E_demand)
229 | DL_MRPE<-max(abs(pre1_3$DL-pre1_3$E_demand)/pre1_3$E_demand)
230 | RF_MRPE<-max(abs(pre1_3$RF-pre1_3$E_demand)/pre1_3$E_demand)
231 | 
232 | GBM_MRPE
233 | DL_MRPE
234 | RF_MRPE
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | lianyungang_var_im_dl<-as.data.frame(h2o.varimp(dl))
242 | 
243 | lianyungang_var_im_rf<-as.data.frame(h2o.varimp(rf))
244 | 
245 | lianyungang_var_im_gbm<-as.data.frame(h2o.varimp(gbm))
246 | 
247 | write.csv(lianyungang_var_im_dl,file='SZ_var_im_dl.csv')
248 | write.csv(lianyungang_var_im_rf,file='SZ_var_im_rf.csv')
249 | write.csv(lianyungang_var_im_gbm,file='SZ_var_im_gbm.csv')
250 | 
251 | 
252 | 
253 | predict_lianyungang_result<-data.frame(GBM_MAE,DL_MAE,RF_MAE,
254 |                                        GBM_MAPE,DL_MAPE,RF_MAPE,
255 |                                        GBM_MRPE,DL_MRPE,RF_MRPE,
256 |                                        DL_run_time,
257 |                                        RF_run_time,
258 |                                        GBM_run_time)
259 | 
260 | 
261 | 
262 | write.csv(predict_lianyungang_result,file="predict25_lianyungang_result.csv")
263 | write.csv(pre1_3,file="predict25_lianyungang.csv")
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 
271 | 
272 | ##################################################################
273 | 
274 | 
275 | 
276 | pred_all<-list()
277 | taus<-seq(from=0.01,to=0.99,length=99)
278 | quantile_pre_time_1<-Sys.time()
279 | for(i in 1:10)
280 | {
281 |   m1 <- h2o.deeplearning(
282 |     model_id="dl_model_first", 
283 |     training_frame=train, 
284 |     validation_frame=valid,   ## validation dataset: used for scoring and early stopping
285 |     x=x,
286 |     y=y,
287 |     #activation="Rectifier",  ## default
288 |     hidden=c(200,200,200),       ## default: 2 hidden layers with 200 neurons each
289 |     epochs=500,
290 |     variable_importances=T ,
291 |     distribution = 'quantile',
292 |     quantile_alpha = taus[i],
293 |     seed = 12345
294 |     ## not enabled by default
295 |   )
296 |   pred <- h2o.predict(m1, newdata = test)
297 |   pred <-as.data.frame(pred$predict)
298 |   names(pred)<-paste("quantile",taus,sep = "_")[i]
299 |   pred_all[[i]]<-pred
300 | }
301 | quantile_pre_time_2<-Sys.time()
302 |  
303 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1)
304 | 
305 | 
306 | 
307 | 
308 | data<-as.data.frame(pred_all)
309 | 
310 | write.csv(data,file="quantile_m.csv")
311 | 
312 | data1<-as.matrix(data[11,])
313 | d<-test$E_demand[11]
314 | data_pdf <- akj(data1, data1)
315 | 
316 | 
317 | 
318 | 
319 | 
320 | 
321 | 
322 | 
323 | 


--------------------------------------------------------------------------------
/heat_map_nanjing.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import matplotlib.pyplot as plt
 3 | import pandas as pd
 4 | import numpy as np
 5 | %pylab inline
 6 | 
 7 | week = pd.read_excel('dat_month1_12_T.xlsx', index_col=0)
 8 | 
 9 | # Normalize data columns
10 | #nba_norm = (nba - nba.mean()) / (nba.max() - nba.min())
11 | 
12 | # Sort data according to Points, lowest to highest
13 | # This was just a design choice made by Yau
14 | # inplace=False (default) ->thanks SO user d1337
15 | #nba_sort = nba_norm.sort('PTS', ascending=True)
16 | 
17 | #nba_sort['PTS'].head(10)
18 | 
19 | # Plot it out
20 | fig, ax = plt.subplots()
21 | heatmap = ax.pcolor(week, cmap=plt.cm.Blues)
22 | 
23 | # Format
24 | fig = plt.gcf()
25 | fig.set_size_inches(8, 4)
26 | 
27 | # turn off the frame
28 | ax.set_frame_on(False)
29 | 
30 | # put the major ticks at the middle of each cell
31 | ax.set_yticks(np.arange(week.shape[0]) + 0.5, minor=False)
32 | ax.set_xticks(np.arange(week.shape[1]) + 0.5, minor=False)
33 | 
34 | # want a more natural, table-like display
35 | ax.invert_yaxis()
36 | ax.xaxis.tick_top()
37 | 
38 | # Set the labels
39 | 
40 | # label source:https://en.wikipedia.org/wiki/Basketball_statistics
41 | labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
42 | # note I could have used nba_sort.columns but made "labels" instead
43 | ax.set_xticklabels(labels, minor=False)
44 | ax.set_yticklabels(week.index, minor=False)
45 | 
46 | # rotate the
47 | #plt.xticks(rotation=90)
48 | 
49 | ax.grid(False)
50 | 
51 | # Turn off all the ticks
52 | ax = plt.gca()
53 | 
54 | for t in ax.xaxis.get_major_ticks():
55 |     t.tick1On = False
56 |     t.tick2On = False
57 | for t in ax.yaxis.get_major_ticks():
58 |     t.tick1On = False
59 |     t.tick2On = False
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/month_barplot.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import matplotlib.pyplot as plt
 3 | import pandas as pd
 4 | import numpy as np
 5 | import matplotlib.ticker as ticker
 6 | 
 7 | import os
 8 | 
 9 | os.chdir("D:/Rdata/Third_paper/third_paper_data/")
10 | dat_nanjing=pd.read_excel("nanjing_month_week.xlsx",index_col=[0])
11 | 
12 | month1=dat_nanjing['20140101':'20140131']['E_demand'].sum()
13 | month2=dat_nanjing['20140201':'20140228']['E_demand'].sum()
14 | month3=dat_nanjing['20140301':'20140331']['E_demand'].sum()
15 | month4=dat_nanjing['20140401':'20140430']['E_demand'].sum()
16 | month5=dat_nanjing['20140501':'20140531']['E_demand'].sum()
17 | month6=dat_nanjing['20140601':'20140630']['E_demand'].sum()
18 | month7=dat_nanjing['20140701':'20140731']['E_demand'].sum()
19 | month8=dat_nanjing['20140801':'20140831']['E_demand'].sum()
20 | month9=dat_nanjing['20140901':'20140930']['E_demand'].sum()
21 | month10=dat_nanjing['20141001':'20141031']['E_demand'].sum()
22 | month11=dat_nanjing['20141101':'20141130']['E_demand'].sum()
23 | month12=dat_nanjing['20141201':'20141231']['E_demand'].sum()
24 | 
25 | y=[month1,month2,month3,month4,month5,month6,month7,month8,month9,month10,month11,month12]
26 | 
27 | N=len(y)
28 | 
29 | x=range(N)
30 | #name_list = ('January', 'February', 'March', 'April', 
31 |            # 'May','June','July','August','September',
32 |             #'October','November','December')
33 | 
34 | 
35 | name_list = ('Jan', 'Feb', 'Mar', 'Apr', 
36 |             'May','Jun','Jul','Aug','Sep',
37 |             'Oct','Nov','Dec')
38 |             
39 | pos_list = np.arange(len(name_list))
40 | ax = plt.axes()
41 | ax.xaxis.set_major_locator(ticker.FixedLocator((pos_list)))
42 | ax.xaxis.set_major_formatter(ticker.FixedFormatter((name_list)))
43 | ax.spines['right'].set_visible(False)
44 | ax.spines['top'].set_visible(False)
45 | ax.yaxis.set_ticks_position('left')
46 | ax.xaxis.set_ticks_position('bottom')
47 | plt.bar(x,y,width=0.7,align='center',color='darkorange',edgecolor='darkorange')
48 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
49 | plt.xlabel('Month')
50 | plt.ylabel('Electricity consumption(KWh)')
51 | #plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
52 | plt.title('Monthly electricity consumption in Nanjing')
53 | plt.bar
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/nanjing_week_month.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoguozhi/paper_code/eec3042cc81e9706f5a64787f6c869f9bd1ee13c/nanjing_week_month.png


--------------------------------------------------------------------------------
/predict_plot.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.dates as mdates
  5 | from sklearn.neighbors import KernelDensity
  6 | from scipy.stats import norm
  7 | from scipy import stats
  8 | import os
  9 | 
 10 | os.chdir("D:/Rdata/Third_paper/third_paper_data/")
 11 | 
 12 | pre_dat=pd.read_excel('nanjing_pre.xlsx',index_col=[0])
 13 | 
 14 | pre_dat_1=pre_dat[356:365]
 15 | #pre_dat_1=pre_dat
 16 | #pre_dat_1.plot(kind='line',ylim=(10000,30000),)
 17 | 
 18 | 
 19 | fig=plt.figure(figsize=(10,5))
 20 | ax1=fig.add_subplot(111)
 21 | #plt.figure(figsize=(10,5))
 22 | ax1.plot(pre_dat_1['E_demand'],label='True value',linewidth=3)
 23 | ax1.plot(pre_dat_1['DL'],label='Deep learning',linewidth=2,linestyle='dashed',marker='o')
 24 | ax1.plot(pre_dat_1['GBM'],label='Gradient boosting',linewidth=2,linestyle='dashed',marker='d')
 25 | ax1.plot(pre_dat_1['RF'],label='Random forest',linewidth=2,linestyle='dashed',marker='p')
 26 | ax1.set_ylim(10000,30000)
 27 | plt.xlabel('Date(day)')
 28 | plt.ylabel('Electricity consumption(KWh)')
 29 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 30 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=1))
 31 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
 32 | #plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
 33 | ax1.legend(loc='upper left')
 34 | 
 35 | 
 36 | 
 37 | 
 38 | ############################################################
 39 | #####plot for  predict
 40 | fig=plt.figure(figsize=(10,5))
 41 | ax1=fig.add_subplot(111)
 42 | #plt.figure(figsize=(10,5))
 43 | ax1.plot(pre_dat_1['E_demand'],label='True value',linewidth=3)
 44 | ax1.plot(pre_dat_1['DL'],label='Deep learning',linewidth=2,linestyle='dashed')
 45 | ax1.plot(pre_dat_1['GBM'],label='Gradient boosting',linewidth=2,linestyle='dashed')
 46 | ax1.plot(pre_dat_1['RF'],label='Random forest',linewidth=2,linestyle='dashed')
 47 | ax1.axvline(pre_dat_1.index[335],color='r',linewidth=3)
 48 | ax1.set_ylim(5000,40000)
 49 | plt.xlabel('Date(day)')
 50 | plt.ylabel('Electricity consumption(KWh)')
 51 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 52 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16))
 53 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
 54 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
 55 | ax1.legend(loc='upper left')
 56 | 
 57 | 
 58 | 
 59 | ################################################################
 60 | #######plot for real value for Lianyungang
 61 | 
 62 | real_dat=pd.read_excel('lianyungang_total.xlsx',index_col=[0])
 63 | 
 64 | fig=plt.figure(figsize=(10,5))
 65 | ax1=fig.add_subplot(111)
 66 | #plt.figure(figsize=(10,5))
 67 | ax1.plot(real_dat['value'],label='True value',linewidth=3)
 68 | ax1.set_ylim(5000,40000)
 69 | plt.xlabel('Date(day)')
 70 | plt.ylabel('Electricity consumption(KWh)')
 71 | plt.title('Electricity consumption in Lianyungang in 2014')
 72 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 73 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16))
 74 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
 75 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
 76 | ax1.legend(loc='upper left')
 77 | 
 78 | 
 79 | ################################################################
 80 | #######plot for real value for Suzhou
 81 | 
 82 | real_dat=pd.read_excel('suzhou_total.xlsx',index_col=[0])
 83 | 
 84 | fig=plt.figure(figsize=(10,5))
 85 | ax1=fig.add_subplot(111)
 86 | #plt.figure(figsize=(10,5))
 87 | ax1.plot(real_dat['value'],label='True value',linewidth=3)
 88 | ax1.set_ylim(5000,40000)
 89 | plt.xlabel('Date(day)')
 90 | plt.ylabel('Electricity consumption(KWh)')
 91 | plt.title('Electricity consumption in Suzhou in 2014')
 92 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 93 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16))
 94 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
 95 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
 96 | ax1.legend(loc='upper left')
 97 | 
 98 | ################################################################
 99 | #######plot for real value for Nanjing
100 | 
101 | real_dat=pd.read_excel('nanjing_total.xlsx',index_col=[0])
102 | 
103 | fig=plt.figure(figsize=(10,5))
104 | ax1=fig.add_subplot(111)
105 | #plt.figure(figsize=(10,5))
106 | ax1.plot(real_dat['value'],label='True value',linewidth=3)
107 | ax1.set_ylim(5000,40000)
108 | plt.xlabel('Date(day)')
109 | plt.ylabel('Electricity consumption(KWh)')
110 | plt.title('Electricity consumption in Nanjing in 2014')
111 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
112 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16))
113 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
114 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
115 | ax1.legend(loc='upper left')
116 | 
117 | 
118 | 
119 | 
120 | ###########################################################################
121 | #############wendu and power demand in Nanjing
122 | 
123 | total_dat=pd.read_excel('nanjing_total_feature.xlsx',index_col=[0])
124 | total_dat['average_person']=total_dat['E_demand']/3000
125 | 
126 | total_dat1=total_dat['20140601':'20140831']
127 | 
128 | fig=plt.figure(figsize=(10,5))
129 | ax1=fig.add_subplot(111)
130 | 
131 | ax1.plot(total_dat1['average_person'],label='Electricity consumption',linewidth=3)
132 | ax1.plot(total_dat1['wdh_var1(t)'],label='Temperature',linewidth=3,color='m')
133 | ax1.axvline(x=total_dat1.index[64],color='black',linewidth=1,linestyle='dashed',label='Local high temperature')
134 | ax1.axvline(x=total_dat1.index[39],color='black',linewidth=1,linestyle='dashed')
135 | ax1.axvline(x=total_dat1.index[50],color='black',linewidth=1,linestyle='dashed')
136 | ax1.axvline(x=total_dat1.index[59],color='black',linewidth=1,linestyle='dashed')
137 | 
138 | ax1.set_ylim(0,60)
139 | plt.xlabel('Date(day)')
140 | plt.ylabel('Electricity consumption(KWh) and temperature')
141 | plt.title('Electricity consumption and temperature in Nanjing')
142 | #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
143 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=4))
144 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
145 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
146 | ax1.legend(loc='upper left')
147 | 
148 | 
149 | ###########################################################################
150 | #############wendu and power demand in Suzhou
151 | 
152 | total_dat=pd.read_excel('suzhou_total_feature.xlsx',index_col=[0])
153 | total_dat['average_person']=total_dat['E_demand']/3000
154 | 
155 | total_dat1=total_dat['20140601':'20140831']
156 | 
157 | fig=plt.figure(figsize=(10,5))
158 | ax1=fig.add_subplot(111)
159 | 
160 | ax1.plot(total_dat1['average_person'],label='Electricity consumption',linewidth=3)
161 | ax1.plot(total_dat1['wdh_var1(t)'],label='Temperature',linewidth=3,color='m')
162 | ax1.axvline(x=total_dat1.index[64],color='black',linewidth=1,linestyle='dashed',label='Local high temperature')
163 | ax1.axvline(x=total_dat1.index[39],color='black',linewidth=1,linestyle='dashed')
164 | ax1.axvline(x=total_dat1.index[49],color='black',linewidth=1,linestyle='dashed')
165 | ax1.axvline(x=total_dat1.index[59],color='black',linewidth=1,linestyle='dashed')
166 | ax1.axvline(x=total_dat1.index[84],color='black',linewidth=1,linestyle='dashed')
167 | 
168 | 
169 | ax1.set_ylim(0,60)
170 | plt.xlabel('Date(day)')
171 | plt.ylabel('Electricity consumption(KWh) and temperature')
172 | plt.title('Electricity consumption and temperature in Suzhou')
173 | #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
174 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=4))
175 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
176 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
177 | ax1.legend(loc='upper left')
178 | 
179 | 
180 | 
181 | ###########################################################################
182 | #############wendu and power demand in Lianyungang
183 | 
184 | total_dat=pd.read_excel('lianyungang_total_feature.xlsx',index_col=[0])
185 | total_dat['average_person']=total_dat['E_demand']/3000
186 | 
187 | total_dat1=total_dat['20140601':'20140831']
188 | 
189 | fig=plt.figure(figsize=(10,5))
190 | ax1=fig.add_subplot(111)
191 | 
192 | ax1.plot(total_dat1['average_person'],label='Electricity consumption',linewidth=3)
193 | ax1.plot(total_dat1['wdh_var1(t)'],label='Temperature',linewidth=3,color='m')
194 | ax1.axvline(x=total_dat1.index[64],color='black',linewidth=1,linestyle='dashed',label='Local high temperature')
195 | ax1.axvline(x=total_dat1.index[29],color='black',linewidth=1,linestyle='dashed')
196 | ax1.axvline(x=total_dat1.index[50],color='black',linewidth=1,linestyle='dashed')
197 | ax1.axvline(x=total_dat1.index[82],color='black',linewidth=1,linestyle='dashed')
198 | 
199 | 
200 | ax1.set_ylim(0,60)
201 | plt.xlabel('Date(day)')
202 | plt.ylabel('Electricity consumption(KWh) and temperature')
203 | plt.title('Electricity consumption and temperature in Lianyungang')
204 | #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
205 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=4))
206 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
207 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
208 | ax1.legend(loc='upper left')
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | ##################################################################
223 | ####南京 
224 | 
225 | dat_density=pd.read_csv('nanjing_quantile_m.csv',index_col=[0])
226 | 
227 | dat_density=dat_density.T
228 | 
229 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values)
230 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values)
231 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values)
232 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values)
233 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values)
234 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values)
235 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values)
236 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values)
237 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values)
238 | 
239 | fig, ax = plt.subplots(nrows=3, ncols=3)
240 | fig.set_size_inches(20,15)
241 | 
242 | x=np.linspace(10000, 30000, 99)
243 | 
244 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel')
245 | ax[0,0].axvline(x=18404,color='r',linewidth=3)
246 | ax[0,0].set_title('2014-12-22')
247 | 
248 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel')
249 | ax[0,1].axvline(x=18188,color='r',linewidth=3)
250 | ax[0,1].set_title('2014-12-23')
251 | 
252 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel')
253 | ax[0,2].axvline(x=17003,color='r',linewidth=3)
254 | ax[0,2].set_title('2014-12-24')
255 | 
256 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel')
257 | ax[1,0].axvline(x=17997,color='r',linewidth=3)
258 | ax[1,0].set_title('2014-12-25')
259 | 
260 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel')
261 | ax[1,1].axvline(x=17714,color='r',linewidth=3)
262 | ax[1,1].set_title('2014-12-26')
263 | 
264 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel')
265 | ax[1,2].axvline(x=18576,color='r',linewidth=3)
266 | ax[1,2].set_title('2014-12-27')
267 | 
268 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel')
269 | ax[2,0].axvline(x=18963,color='r',linewidth=3)
270 | ax[2,0].set_title('2014-12-28')
271 | 
272 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel')
273 | ax[2,1].axvline(x=16182,color='r',linewidth=3)
274 | ax[2,1].set_title('2014-12-29')
275 | 
276 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel')
277 | ax[2,2].axvline(x=15920,color='r',linewidth=3)
278 | ax[2,2].set_title('2014-12-30')
279 | 
280 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
281 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
282 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
283 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
284 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
285 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
286 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
287 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
288 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
289 | 
290 | ax[0,0].set_xlabel('Electricity consumption(KWh)')
291 | ax[0,0].set_ylabel('Probability density')
292 | ax[0,0].legend(loc='upper right')
293 | 
294 | ax[0,1].set_xlabel('Electricity consumption(KWh)')
295 | ax[0,1].set_ylabel('Probability density')
296 | ax[0,1].legend(loc='upper right')
297 | 
298 | ax[0,2].set_xlabel('Electricity consumption(KWh)')
299 | ax[0,2].set_ylabel('Probability density')
300 | ax[0,2].legend(loc='upper right')
301 | 
302 | ax[1,0].set_xlabel('Electricity consumption(KWh)')
303 | ax[1,0].set_ylabel('Probability density')
304 | ax[1,0].legend(loc='upper right')
305 | 
306 | ax[1,1].set_xlabel('Electricity consumption(KWh)')
307 | ax[1,1].set_ylabel('Probability density')
308 | ax[1,1].legend(loc='upper right')
309 | 
310 | ax[1,2].set_xlabel('Electricity consumption(KWh)')
311 | ax[1,2].set_ylabel('Probability density')
312 | ax[1,2].legend(loc='upper right')
313 | 
314 | ax[2,0].set_xlabel('Electricity consumption(KWh)')
315 | ax[2,0].set_ylabel('Probability density')
316 | ax[2,0].legend(loc='upper right')
317 | 
318 | ax[2,1].set_xlabel('Electricity consumption(KWh)')
319 | ax[2,1].set_ylabel('Probability density')
320 | ax[2,1].legend(loc='upper right')
321 | 
322 | ax[2,2].set_xlabel('Electricity consumption(KWh)')
323 | ax[2,2].set_ylabel('Probability density')
324 | ax[2,2].legend(loc='upper right')
325 | 
326 | ################################################################################
327 | ##############苏州##########################
328 | dat_density=pd.read_csv('suzhou_quantile_m.csv',index_col=[0])
329 | 
330 | dat_density=dat_density.T
331 | 
332 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values)
333 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values)
334 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values)
335 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values)
336 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values)
337 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values)
338 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values)
339 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values)
340 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values)
341 | 
342 | fig, ax = plt.subplots(nrows=3, ncols=3)
343 | fig.set_size_inches(20,15)
344 | 
345 | x=np.linspace(10000, 30000, 99)
346 | 
347 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel')
348 | ax[0,0].axvline(x=20071,color='r',linewidth=3)
349 | ax[0,0].set_title('2014-12-22')
350 | 
351 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel')
352 | ax[0,1].axvline(x=19069,color='r',linewidth=3)
353 | ax[0,1].set_title('2014-12-23')
354 | 
355 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel')
356 | ax[0,2].axvline(x=18218,color='r',linewidth=3)
357 | ax[0,2].set_title('2014-12-24')
358 | 
359 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel')
360 | ax[1,0].axvline(x=18874,color='r',linewidth=3)
361 | ax[1,0].set_title('2014-12-25')
362 | 
363 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel')
364 | ax[1,1].axvline(x=18487,color='r',linewidth=3)
365 | ax[1,1].set_title('2014-12-26')
366 | 
367 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel')
368 | ax[1,2].axvline(x=19158,color='r',linewidth=3)
369 | ax[1,2].set_title('2014-12-27')
370 | 
371 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel')
372 | ax[2,0].axvline(x=20061,color='r',linewidth=3)
373 | ax[2,0].set_title('2014-12-28')
374 | 
375 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel')
376 | ax[2,1].axvline(x=17648,color='r',linewidth=3)
377 | ax[2,1].set_title('2014-12-29')
378 | 
379 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel')
380 | ax[2,2].axvline(x=17474,color='r',linewidth=3)
381 | ax[2,2].set_title('2014-12-30')
382 | 
383 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
384 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
385 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
386 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
387 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
388 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
389 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
390 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
391 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
392 | 
393 | ax[0,0].set_xlabel('Electricity consumption(KWh)')
394 | ax[0,0].set_ylabel('Probability density')
395 | ax[0,0].legend(loc='upper right')
396 | 
397 | ax[0,1].set_xlabel('Electricity consumption(KWh)')
398 | ax[0,1].set_ylabel('Probability density')
399 | ax[0,1].legend(loc='upper right')
400 | 
401 | ax[0,2].set_xlabel('Electricity consumption(KWh)')
402 | ax[0,2].set_ylabel('Probability density')
403 | ax[0,2].legend(loc='upper right')
404 | 
405 | ax[1,0].set_xlabel('Electricity consumption(KWh)')
406 | ax[1,0].set_ylabel('Probability density')
407 | ax[1,0].legend(loc='upper right')
408 | 
409 | ax[1,1].set_xlabel('Electricity consumption(KWh)')
410 | ax[1,1].set_ylabel('Probability density')
411 | ax[1,1].legend(loc='upper right')
412 | 
413 | ax[1,2].set_xlabel('Electricity consumption(KWh)')
414 | ax[1,2].set_ylabel('Probability density')
415 | ax[1,2].legend(loc='upper right')
416 | 
417 | ax[2,0].set_xlabel('Electricity consumption(KWh)')
418 | ax[2,0].set_ylabel('Probability density')
419 | ax[2,0].legend(loc='upper right')
420 | 
421 | ax[2,1].set_xlabel('Electricity consumption(KWh)')
422 | ax[2,1].set_ylabel('Probability density')
423 | ax[2,1].legend(loc='upper right')
424 | 
425 | ax[2,2].set_xlabel('Electricity consumption(KWh)')
426 | ax[2,2].set_ylabel('Probability density')
427 | ax[2,2].legend(loc='upper right')
428 | 
429 | 
430 | 
431 | ################################################################################
432 | ##############连云港##########################
433 | dat_density=pd.read_csv('lianyungang_quantile_m.csv',index_col=[0])
434 | 
435 | dat_density=dat_density.T
436 | 
437 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values)
438 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values)
439 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values)
440 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values)
441 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values)
442 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values)
443 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values)
444 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values)
445 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values)
446 | 
447 | fig, ax = plt.subplots(nrows=3, ncols=3)
448 | fig.set_size_inches(20,15)
449 | 
450 | x=np.linspace(10000, 30000, 99)
451 | 
452 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel')
453 | ax[0,0].axvline(x=15172,color='r',linewidth=3)
454 | ax[0,0].set_title('2014-12-22')
455 | 
456 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel')
457 | ax[0,1].axvline(x=14331,color='r',linewidth=3)
458 | ax[0,1].set_title('2014-12-23')
459 | 
460 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel')
461 | ax[0,2].axvline(x=13838,color='r',linewidth=3)
462 | ax[0,2].set_title('2014-12-24')
463 | 
464 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel')
465 | ax[1,0].axvline(x=14283,color='r',linewidth=3)
466 | ax[1,0].set_title('2014-12-25')
467 | 
468 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel')
469 | ax[1,1].axvline(x=14585,color='r',linewidth=3)
470 | ax[1,1].set_title('2014-12-26')
471 | 
472 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel')
473 | ax[1,2].axvline(x=14723,color='r',linewidth=3)
474 | ax[1,2].set_title('2014-12-27')
475 | 
476 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel')
477 | ax[2,0].axvline(x=14979,color='r',linewidth=3)
478 | ax[2,0].set_title('2014-12-28')
479 | 
480 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel')
481 | ax[2,1].axvline(x=13440,color='r',linewidth=3)
482 | ax[2,1].set_title('2014-12-29')
483 | 
484 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel')
485 | ax[2,2].axvline(x=13224,color='r',linewidth=3)
486 | ax[2,2].set_title('2014-12-30')
487 | 
488 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
489 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
490 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
491 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
492 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
493 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
494 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
495 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
496 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
497 | 
498 | ax[0,0].set_xlabel('Electricity consumption(KWh)')
499 | ax[0,0].set_ylabel('Probability density')
500 | ax[0,0].legend(loc='upper right')
501 | 
502 | ax[0,1].set_xlabel('Electricity consumption(KWh)')
503 | ax[0,1].set_ylabel('Probability density')
504 | ax[0,1].legend(loc='upper right')
505 | 
506 | ax[0,2].set_xlabel('Electricity consumption(KWh)')
507 | ax[0,2].set_ylabel('Probability density')
508 | ax[0,2].legend(loc='upper right')
509 | 
510 | ax[1,0].set_xlabel('Electricity consumption(KWh)')
511 | ax[1,0].set_ylabel('Probability density')
512 | ax[1,0].legend(loc='upper right')
513 | 
514 | ax[1,1].set_xlabel('Electricity consumption(KWh)')
515 | ax[1,1].set_ylabel('Probability density')
516 | ax[1,1].legend(loc='upper right')
517 | 
518 | ax[1,2].set_xlabel('Electricity consumption(KWh)')
519 | ax[1,2].set_ylabel('Probability density')
520 | ax[1,2].legend(loc='upper right')
521 | 
522 | ax[2,0].set_xlabel('Electricity consumption(KWh)')
523 | ax[2,0].set_ylabel('Probability density')
524 | ax[2,0].legend(loc='upper right')
525 | 
526 | ax[2,1].set_xlabel('Electricity consumption(KWh)')
527 | ax[2,1].set_ylabel('Probability density')
528 | ax[2,1].legend(loc='upper right')
529 | 
530 | ax[2,2].set_xlabel('Electricity consumption(KWh)')
531 | ax[2,2].set_ylabel('Probability density')
532 | ax[2,2].legend(loc='upper right')
533 | 
534 | 
535 | 
536 | 
537 | 
538 | 
539 | 
540 | 


--------------------------------------------------------------------------------
/quantile_pre.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | rm(list = ls())
  4 | setwd("D:/Rdata/Third_paper/third_paper_data")
  5 | 
  6 | library(readxl)
  7 | library(h2o)
  8 | library(dplyr)
  9 | 
 10 | h2o.init(nthreads = -1)
 11 | 
 12 | set.seed(12345)
 13 | 
 14 | dat<-read_excel("lianyungang_total_feature.xlsx")
 15 | 
 16 | dim(dat)
 17 | 
 18 | 
 19 | dat1<-na.omit(dat)
 20 | 
 21 | dat2<-dat1[,2:58]
 22 | 
 23 | names(dat2)
 24 | 
 25 | month<-as.factor(dat1$month)
 26 | 
 27 | day_of_w<-as.factor(dat1$day_of_w)
 28 | 
 29 | air<-as.factor(dat1$air)
 30 | 
 31 | rain<-as.factor(dat1$rain)
 32 | 
 33 | 
 34 | levels(air)[levels(air)=="优"]<-"6"
 35 | levels(air)[levels(air)=="良"]<-"5"
 36 | levels(air)[levels(air)=="轻度污染"]<-"4"
 37 | levels(air)[levels(air)=="中度污染"]<-"3"
 38 | levels(air)[levels(air)=="重度污染"]<-"2"
 39 | levels(air)[levels(air)=="严重污染"]<-"1"
 40 | 
 41 | air<-as.numeric(air)
 42 | 
 43 | levels(month)[levels(month)=="January"]<-"1"
 44 | levels(month)[levels(month)=="February"]<-"2"
 45 | levels(month)[levels(month)=="March"]<-"3"
 46 | 
 47 | levels(month)[levels(month)=="April"]<-"4"
 48 | levels(month)[levels(month)=="May"]<-"5"
 49 | 
 50 | levels(month)[levels(month)=="June"]<-"6"
 51 | levels(month)[levels(month)=="July"]<-"7"
 52 | levels(month)[levels(month)=="August"]<-"8"
 53 | levels(month)[levels(month)=="September"]<-"9"
 54 | levels(month)[levels(month)=="October"]<-"10"
 55 | levels(month)[levels(month)=="November"]<-"11"
 56 | levels(month)[levels(month)=="December"]<-"12"
 57 | 
 58 | levels(month)
 59 | 
 60 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1"
 61 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2"
 62 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3"
 63 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4"
 64 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5"
 65 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6"
 66 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7"
 67 | 
 68 | levels(day_of_w)
 69 | 
 70 | dat2$month<-month
 71 | dat2$air<-air
 72 | dat2$rain<-rain
 73 | dat2$day_of_w<-day_of_w
 74 | 
 75 | 
 76 | 
 77 | 
 78 | y <- "E_demand"  #response column: digits 0-9
 79 | x <- setdiff(names(dat2), y)  #vector of predictor column names
 80 | 
 81 | dat3<-as.h2o(dat2,destination_frame = "dat3")
 82 | 
 83 | dat4<-dat3[1:333,]
 84 | 
 85 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345)
 86 | 
 87 | 
 88 | 
 89 | 
 90 | # first part of the data, without labels for unsupervised learning
 91 | train <- splits[[1]]
 92 | 
 93 | # second part of the data, with labels for supervised learning
 94 | valid <- splits[[2]]
 95 | 
 96 | test<-dat3[334:358,]
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | pred_all<-list()
111 | taus<-seq(from=0.01,to=0.99,length=99)
112 | quantile_pre_time_1<-Sys.time()
113 | for(i in 1:99)
114 | {
115 |   m1 <- h2o.deeplearning(
116 |     model_id="dl_model_first", 
117 |     training_frame=train, 
118 |     validation_frame=valid,   ## validation dataset: used for scoring and early stopping
119 |     x=x,
120 |     y=y,
121 |     #activation="Rectifier",  ## default
122 |     hidden=c(200,200,200),       ## default: 2 hidden layers with 200 neurons each
123 |     epochs=500,
124 |     variable_importances=T ,
125 |     distribution = 'quantile',
126 |     quantile_alpha = taus[i],
127 |     seed = 12345
128 |     ## not enabled by default
129 |   )
130 |   pred <- h2o.predict(m1, newdata = test)
131 |   pred <-as.data.frame(pred$predict)
132 |   names(pred)<-paste("quantile",taus,sep = "_")[i]
133 |   pred_all[[i]]<-pred
134 | }
135 | quantile_pre_time_2<-Sys.time()
136 | 
137 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1)
138 | 
139 | 
140 | 
141 | 
142 | data<-as.data.frame(pred_all)
143 | 
144 | write.csv(data,file="lianyungang_quantile_25m.csv")
145 | 
146 | rm(list = ls())
147 | 
148 | 
149 | dat<-read_excel("suzhou_total_feature.xlsx")
150 | 
151 | dim(dat)
152 | 
153 | #dat<-h2o.importFile(path = "total_feature.xlsx",destination_frame = "dat")
154 | #dat1<-as.data.frame(dat)
155 | dat1<-na.omit(dat)
156 | 
157 | dat2<-dat1[,2:58]
158 | 
159 | names(dat2)
160 | 
161 | month<-as.factor(dat1$month)
162 | 
163 | day_of_w<-as.factor(dat1$day_of_w)
164 | 
165 | air<-as.factor(dat1$air)
166 | 
167 | rain<-as.factor(dat1$rain)
168 | 
169 | 
170 | levels(air)[levels(air)=="优"]<-"6"
171 | levels(air)[levels(air)=="良"]<-"5"
172 | levels(air)[levels(air)=="轻度污染"]<-"4"
173 | levels(air)[levels(air)=="中度污染"]<-"3"
174 | levels(air)[levels(air)=="重度污染"]<-"2"
175 | levels(air)[levels(air)=="严重污染"]<-"1"
176 | 
177 | air<-as.numeric(air)
178 | 
179 | levels(month)[levels(month)=="January"]<-"1"
180 | levels(month)[levels(month)=="February"]<-"2"
181 | levels(month)[levels(month)=="March"]<-"3"
182 | 
183 | levels(month)[levels(month)=="April"]<-"4"
184 | levels(month)[levels(month)=="May"]<-"5"
185 | 
186 | levels(month)[levels(month)=="June"]<-"6"
187 | levels(month)[levels(month)=="July"]<-"7"
188 | levels(month)[levels(month)=="August"]<-"8"
189 | levels(month)[levels(month)=="September"]<-"9"
190 | levels(month)[levels(month)=="October"]<-"10"
191 | levels(month)[levels(month)=="November"]<-"11"
192 | levels(month)[levels(month)=="December"]<-"12"
193 | 
194 | levels(month)
195 | 
196 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1"
197 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2"
198 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3"
199 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4"
200 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5"
201 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6"
202 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7"
203 | 
204 | levels(day_of_w)
205 | 
206 | dat2$month<-month
207 | dat2$air<-air
208 | dat2$rain<-rain
209 | dat2$day_of_w<-day_of_w
210 | 
211 | 
212 | 
213 | 
214 | y <- "E_demand"  #response column: digits 0-9
215 | x <- setdiff(names(dat2), y)  #vector of predictor column names
216 | 
217 | dat3<-as.h2o(dat2,destination_frame = "dat3")
218 | 
219 | dat4<-dat3[1:333,]
220 | 
221 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345)
222 | 
223 | 
224 | 
225 | 
226 | # first part of the data, without labels for unsupervised learning
227 | train <- splits[[1]]
228 | 
229 | # second part of the data, with labels for supervised learning
230 | valid <- splits[[2]]
231 | 
232 | test<-dat3[334:358,]
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | pred_all<-list()
247 | taus<-seq(from=0.01,to=0.99,length=99)
248 | quantile_pre_time_1<-Sys.time()
249 | for(i in 1:99)
250 | {
251 |   m1 <- h2o.deeplearning(
252 |     model_id="dl_model_first", 
253 |     training_frame=train, 
254 |     validation_frame=valid,   ## validation dataset: used for scoring and early stopping
255 |     x=x,
256 |     y=y,
257 |     #activation="Rectifier",  ## default
258 |     hidden=c(200,200,200),       ## default: 2 hidden layers with 200 neurons each
259 |     epochs=500,
260 |     variable_importances=T ,
261 |     distribution = 'quantile',
262 |     quantile_alpha = taus[i],
263 |     seed = 12345
264 |     ## not enabled by default
265 |   )
266 |   pred <- h2o.predict(m1, newdata = test)
267 |   pred <-as.data.frame(pred$predict)
268 |   names(pred)<-paste("quantile",taus,sep = "_")[i]
269 |   pred_all[[i]]<-pred
270 | }
271 | quantile_pre_time_2<-Sys.time()
272 | 
273 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1)
274 | 
275 | 
276 | 
277 | 
278 | data<-as.data.frame(pred_all)
279 | 
280 | write.csv(data,file="suzhou_quantile_25m.csv")
281 | 
282 | rm(list = ls())
283 | 
284 | 
285 | 
286 | dat<-read_excel("nanjing_total_feature.xlsx")
287 | 
288 | dim(dat)
289 | 
290 | #dat<-h2o.importFile(path = "total_feature.xlsx",destination_frame = "dat")
291 | #dat1<-as.data.frame(dat)
292 | dat1<-na.omit(dat)
293 | 
294 | dat2<-dat1[,2:58]
295 | 
296 | names(dat2)
297 | 
298 | month<-as.factor(dat1$month)
299 | 
300 | day_of_w<-as.factor(dat1$day_of_w)
301 | 
302 | air<-as.factor(dat1$air)
303 | 
304 | rain<-as.factor(dat1$rain)
305 | 
306 | 
307 | levels(air)[levels(air)=="优"]<-"6"
308 | levels(air)[levels(air)=="良"]<-"5"
309 | levels(air)[levels(air)=="轻度污染"]<-"4"
310 | levels(air)[levels(air)=="中度污染"]<-"3"
311 | levels(air)[levels(air)=="重度污染"]<-"2"
312 | levels(air)[levels(air)=="严重污染"]<-"1"
313 | 
314 | air<-as.numeric(air)
315 | 
316 | levels(month)[levels(month)=="January"]<-"1"
317 | levels(month)[levels(month)=="February"]<-"2"
318 | levels(month)[levels(month)=="March"]<-"3"
319 | 
320 | levels(month)[levels(month)=="April"]<-"4"
321 | levels(month)[levels(month)=="May"]<-"5"
322 | 
323 | levels(month)[levels(month)=="June"]<-"6"
324 | levels(month)[levels(month)=="July"]<-"7"
325 | levels(month)[levels(month)=="August"]<-"8"
326 | levels(month)[levels(month)=="September"]<-"9"
327 | levels(month)[levels(month)=="October"]<-"10"
328 | levels(month)[levels(month)=="November"]<-"11"
329 | levels(month)[levels(month)=="December"]<-"12"
330 | 
331 | levels(month)
332 | 
333 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1"
334 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2"
335 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3"
336 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4"
337 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5"
338 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6"
339 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7"
340 | 
341 | levels(day_of_w)
342 | 
343 | dat2$month<-month
344 | dat2$air<-air
345 | dat2$rain<-rain
346 | dat2$day_of_w<-day_of_w
347 | 
348 | 
349 | 
350 | 
351 | y <- "E_demand"  #response column: digits 0-9
352 | x <- setdiff(names(dat2), y)  #vector of predictor column names
353 | 
354 | dat3<-as.h2o(dat2,destination_frame = "dat3")
355 | 
356 | dat4<-dat3[1:333,]
357 | 
358 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345)
359 | 
360 | 
361 | 
362 | 
363 | # first part of the data, without labels for unsupervised learning
364 | train <- splits[[1]]
365 | 
366 | # second part of the data, with labels for supervised learning
367 | valid <- splits[[2]]
368 | 
369 | test<-dat3[334:358,]
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | pred_all<-list()
384 | taus<-seq(from=0.01,to=0.99,length=99)
385 | quantile_pre_time_1<-Sys.time()
386 | for(i in 1:99)
387 | {
388 |   m1 <- h2o.deeplearning(
389 |     model_id="dl_model_first", 
390 |     training_frame=train, 
391 |     validation_frame=valid,   ## validation dataset: used for scoring and early stopping
392 |     x=x,
393 |     y=y,
394 |     #activation="Rectifier",  ## default
395 |     hidden=c(200,200,200),       ## default: 2 hidden layers with 200 neurons each
396 |     epochs=500,
397 |     variable_importances=T ,
398 |     distribution = 'quantile',
399 |     quantile_alpha = taus[i],
400 |     seed = 12345
401 |     ## not enabled by default
402 |   )
403 |   pred <- h2o.predict(m1, newdata = test)
404 |   pred <-as.data.frame(pred$predict)
405 |   names(pred)<-paste("quantile",taus,sep = "_")[i]
406 |   pred_all[[i]]<-pred
407 | }
408 | quantile_pre_time_2<-Sys.time()
409 | 
410 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1)
411 | 
412 | 
413 | 
414 | data<-as.data.frame(pred_all)
415 | 
416 | write.csv(data,file="nanjing_quantile_25m.csv")
417 | 


--------------------------------------------------------------------------------
/taibao/.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoguozhi/paper_code/eec3042cc81e9706f5a64787f6c869f9bd1ee13c/taibao/.RData


--------------------------------------------------------------------------------
/taibao/EX-04-01.R:
--------------------------------------------------------------------------------
 1 | #############################################################
 2 | # Description:
 3 | # 1.for lecture 'my introduction to R'
 4 | # 2.No.: CH-04, EX-01: 
 5 | # 3.Purpose: threshold mean regression
 6 | # 4.Author: Qifa Xu
 7 | # 5.Founded: Apr 09, 2015
 8 | # 6.Revised: Apr 09, 2015
 9 | # 7.Reference:
10 | # ###########################################################
11 | # Contents:
12 | # 1. generate data
13 | # 2. do regression with real data
14 | # 3. define functions
15 | # 4. find the optimal threshold
16 | # 5. estimate threshold regression model
17 | #########################################################
18 | 
19 | # 0. Initialize
20 | setwd("F:/programe/lecture/my introduction to R")
21 | rm(list = ls())
22 | 
23 | # 1. generate data
24 | beta <- c(3,2,5)
25 | threshold <- 0.3
26 | n <- 200
27 | x <- matrix(runif(n), nrow=n, ncol=1)
28 | eps <- rnorm(n=n)
29 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + eps/10
30 | dat <- data.frame(y=y, x1=x[,1])
31 | plot(dat$x1, y)
32 | 
33 | # 2. do regression with real data
34 | lm(y~x1+I(x1*(x1>threshold)), data=dat)
35 | 
36 | # 3. define functions
37 | source('sub-01.R')
38 | 
39 | # 4. find the optimal threshold
40 | (gamopt <- gamsearch.mr(var=dat$x1, dat))
41 | 
42 | # 5. estimate threshold regression model
43 | # (1) make model
44 | (thrmodel.mr <- lm(y~x1+I(x1*(x1>gamopt)), data=dat))
45 | summary(thrmodel.mr)
46 | 
47 | # (2) show results
48 | xs <- seq(min(dat$x1), max(dat$x1), length=500)
49 | ys.hat <- predict(thrmodel.mr, newdata=data.frame(x1=xs))
50 | 
51 | plot(dat$x1, y, xlab='x', ylab='y')
52 | lines(xs[xs<gamopt], ys.hat[xs<gamopt], lwd=2, col='blue')
53 | lines(xs[xs>=gamopt], ys.hat[xs>=gamopt], lwd=2, col='red')
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/taibao/EX-04-02.R:
--------------------------------------------------------------------------------
 1 | #############################################################
 2 | # Description:
 3 | # 1.for lecture 'my introduction to R'
 4 | # 2.No.: CH-04, EX-02: 
 5 | # 3.Purpose: threshold quantile regression
 6 | # 4.Author: Qifa Xu
 7 | # 5.Founded: Apr 09, 2015
 8 | # 6.Revised: Apr 09, 2015
 9 | # 7.Reference:
10 | # ###########################################################
11 | # Contents:
12 | # 1. generate data
13 | # 2. do regression with real data
14 | # 3. define functions
15 | # 4. find the optimal threshold
16 | # 5. estimate threshold regression model
17 | #########################################################
18 | 
19 | # 0. Initialize
20 | setwd("F:/programe/lecture/my introduction to R")
21 | rm(list = ls())
22 | 
23 | 
24 | # 1. generate data
25 | beta <- c(3, 2, 5)
26 | threshold <- 0.3
27 | n <- 200
28 | x <- matrix(runif(n), nrow=n, ncol=1)
29 | eps <- rchisq(n=n, df=3)
30 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps
31 | dat <- data.frame(y=y, x1=x[,1])
32 | plot(dat$x1, y)
33 | 
34 | # 2. do regression with real data
35 | library(quantreg)
36 | taus <- seq(0.1, 0.9, by=0.2)
37 | rq(y~x1+I(x1*(x1>threshold)), tau=taus, data=dat)
38 | 
39 | # 3. define functions
40 | source('sub-01.R')
41 | 
42 | # 4. find the optimal threshold
43 | gamopt <- rep(NA, length=length(taus))
44 | for (i in seq_along(taus)){
45 |   gamopt[i] <- gamsearch.qr(var=dat$x1, tau=taus[i], dat)
46 | }
47 | names(gamopt) <- paste('tau=', taus, sep='')
48 | print(gamopt)
49 | 
50 | 
51 | # 5. estimate threshold regression model
52 | # (1) make model
53 | (thrmodel.qr <- rq(y~x1+I(x1*(x1>gamopt[1])), tau=taus, data=dat))
54 | summary(thrmodel.qr)
55 | coef(thrmodel.qr)      # compare with those true values
56 | # eps <- rchisq(n=n, df=3)
57 | # y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps
58 | # Q.y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*F.inv(eps)
59 | (F.inv <-qchisq(p=taus, df=3))
60 | beta[2] + F.inv     # slopes in the lower interval
61 | beta[2] + beta[3] + F.inv     # slopes in the upper interval
62 | 
63 | # (2) show results
64 | xs <- seq(min(dat$x1), max(dat$x1), length=500)
65 | ys.hat <- predict(thrmodel.qr, newdata=data.frame(x1=xs))
66 | 
67 | plot(dat$x1, y, xlab='x', ylab='y')
68 | for (i in 1:length(taus)){
69 |   lines(xs[xs<gamopt], ys.hat[xs<gamopt,i], lty=i, lwd=2)
70 |   lines(xs[xs>=gamopt], ys.hat[xs>=gamopt,i], lty=i, lwd=2)
71 | }
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/taibao/IPAD.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoguozhi/paper_code/eec3042cc81e9706f5a64787f6c869f9bd1ee13c/taibao/IPAD.xlsx


--------------------------------------------------------------------------------
/taibao/a.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/taibao/prog-00.R:
--------------------------------------------------------------------------------
 1 | ########################################################
 2 | # Description:
 3 | # 1.for threshold regression simulation
 4 | # 2.No.: 01
 5 | # 3.Purpose: threshold mean regression
 6 | # 4.Reference: non
 7 | # 5.Author: Qifa Xu
 8 | # 6.Founded: Mar 17, 2015.
 9 | # 7.Revised: Mar 18, 2015.
10 | ########################################################
11 | # Contents:
12 | # 1. generate data
13 | # 2. do regression with real data
14 | #########################################################
15 | 
16 | # 0. initialize
17 | setwd('F:/programe/paper/QR+goods')
18 | rm(list=ls())
19 | 
20 | # 1. generate data
21 | beta <- c(3,2,10)
22 | threshold <- 0.3
23 | n <- 200
24 | x <- matrix(runif(n), nrow=n, ncol=1)
25 | eps <- rnorm(n=n)
26 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + eps
27 | dat <- data.frame(y=y, x1=x[,1])
28 | plot(dat$x1, y)
29 | 
30 | # 2. do regression with real data
31 | lm(y~x1+I(x1*(x1>threshold)), data=dat)
32 | 
33 | # 3. define functions
34 | source('sub-01.R')
35 | 
36 | # 4. find the optimal threshold
37 | (gamopt <- gamsearch.mr(var=dat$x1, dat))
38 | 
39 | # 5. estimate threshold regression model
40 | # (1) make model
41 | (thrmodel.mr <- lm(y~x1+I(x1*(x1>gamopt)), data=dat))
42 | summary(thrmodel.mr)
43 | 
44 | # (2) show results
45 | xs <- seq(min(dat$x1), max(dat$x1), length=500)
46 | ys.hat <- predict(thrmodel.mr, newdata=data.frame(x1=xs))
47 | 
48 | plot(dat$x1, y, xlab='x', ylab='y')
49 | lines(xs[xs<gamopt], ys.hat[xs<gamopt], lwd=2, col='blue')
50 | lines(xs[xs>=gamopt], ys.hat[xs>=gamopt], lwd=2, col='red')
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/taibao/prog-01.R:
--------------------------------------------------------------------------------
 1 | ########################################################
 2 | # Description:
 3 | # 1.for threshold regression simulation
 4 | # 2.No.: 02
 5 | # 3.Purpose: threshold quantile regression
 6 | # 4.Reference: non
 7 | # 5.Author: Qifa Xu
 8 | # 6.Founded: Mar 17, 2015.
 9 | # 7.Revised: Mar 18, 2015.
10 | ########################################################
11 | # Contents:
12 | # 1. generate data
13 | # 2. do regression with real data
14 | #########################################################
15 | 
16 | # 0. initialize
17 | setwd('E:/QR+goods')
18 | rm(list=ls())
19 | 
20 | # 1. generate data
21 | beta <- c(3, 2, 5)
22 | threshold <- 0.3
23 | n <- 200
24 | x <- matrix(runif(n), nrow=n, ncol=1)
25 | eps <- rchisq(n=n, df=3)
26 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps/10
27 | dat <- data.frame(y=y, x1=x[,1])
28 | plot(dat$x1, y)
29 | 
30 | # 2. do regression with real data
31 | library(quantreg)
32 | taus <- seq(0.1, 0.9, by=0.2)
33 | rq(y~x1+I(x1*(x1>threshold)), tau=taus, data=dat)
34 | 
35 | # 3. define functions
36 | source('sub-01.R')
37 | 
38 | # 4. find the optimal threshold
39 | gamopt <- rep(NA, length=length(taus))
40 | for (i in seq_along(taus)){
41 |   gamopt[i] <- gamsearch.qr(var=dat$x1, tau=taus[i], dat)
42 | }
43 | names(gamopt) <- paste('tau=', taus, sep='')
44 | print(gamopt)
45 | 
46 | 
47 | # 5. estimate threshold regression model
48 | # (1) make model
49 | (thrmodel.qr <- rq(y~x1+I(x1*(x1>gamopt[1])), tau=taus, data=dat))
50 | summary(thrmodel.qr)
51 | coef(thrmodel.qr)      # compare with those true values
52 | # eps <- rchisq(n=n, df=3)
53 | # y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps
54 | # Q.y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*F.inv(eps)
55 | (F.inv <-qchisq(p=taus, df=3))
56 | beta[2] + F.inv     # slopes in the lower interval
57 | beta[2] + beta[3] + F.inv     # slopes in the upper interval
58 | 
59 | # (2) show results
60 | xs <- seq(min(dat$x1), max(dat$x1), length=500)
61 | ys.hat <- predict(thrmodel.qr, newdata=data.frame(x1=xs))
62 | 
63 | plot(dat$x1, y, xlab='x', ylab='y')
64 | for (i in 1:length(taus)){
65 |   lines(xs[xs<gamopt], ys.hat[xs<gamopt,i], lty=i, lwd=2)
66 |   lines(xs[xs>=gamopt], ys.hat[xs>=gamopt,i], lty=i, lwd=2)
67 | }
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/taibao/prog-01old.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | setwd('E:/QR+goods')
 4 | rm(list=ls())
 5 | # 1. read data
 6 | library(xlsx)
 7 | dat <- read.xlsx(file='IPAD.xlsx', sheetName='all', startRow=1, endRow=436, colIndex=2:9)
 8 | head(dat)
 9 | class(dat)
10 | names(dat) <- c('credit', 'grade', 'popular', 'price', 'sale', 'RevAmou', 'RevGrad', 'No')
11 | summary(dat)
12 | 
13 | # 2. data process
14 | dat <- na.omit(dat)
15 | 
16 | # data <- data.frame()
17 | # for (j in 1:ncol(dat)){
18 | #   data[,j] <- as.data.frame(dat[,j])
19 | # #   data <- cbind(data, as.data.frame(dat[,j]))
20 | # }
21 | 
22 | credit <- dat$credit
23 | grade <- dat$grade
24 | popular <- dat$popular
25 | price <- as.numeric(matrix(dat$price))
26 | sale <- as.numeric(matrix(dat$sale))
27 | RevAmou <- as.numeric(matrix(dat$RevAmou))
28 | RevGrad <- dat$RevGrad
29 | No <- dat$No
30 | 
31 | dat <- data.frame(sale, credit, grade, popular, price, RevAmou, RevGrad, No)
32 | 
33 | plot(dat$sale~dat$price)
34 | 
35 | 
36 | cor(dat)
37 | 
38 | # 3. make model in mean regression
39 | model.lm <- lm(sale~., data=dat)
40 | summary(model.lm)
41 | 
42 | # 4. make model in quantile regression
43 | library(quantreg)
44 | taus <- seq(0.1, 0.9, length=5)
45 | model.rq <- rq(sale~., tau=taus, data=dat)
46 | plot(summary(model.rq))
47 | summary(model.rq)
48 | 
49 | 
50 | # 5. make threshold model in mean regression
51 | # (1) define functions
52 | source('sub.R')
53 | 
54 | # (2) find the optimal threshold
55 | (gamopt <- gamsearch(dat=dat))
56 | 
57 | # (3) estimate threshold model
58 | thrmodel.lm <- lm(sale~price+I(price*(price>gamopt))+grade+credit
59 |                   +popular+RevAmou+RevGrad+No, data=dat)
60 | summary(thrmodel.lm)
61 | round(coef(thrmodel.lm), digits=4)
62 | 
63 | # 6. make threshold model in quantile regression
64 | # (1) define functions
65 | 
66 | 
67 | # (2) find the optimal threshold
68 | tau <- 0.7
69 | (gamopt.rq <- gamsearch.rq(dat=dat, tau=tau, var=price))
70 | 
71 | 
72 | # (3) estimate threshold model
73 | thrmodel.rq <- rq(sale~price+I(price*(price>gamopt.rq))+grade+credit
74 |                   +popular+RevAmou+RevGrad+No, tau=tau, data=dat)
75 | summary(thrmodel.rq)
76 | 


--------------------------------------------------------------------------------
/taibao/prog-02.R:
--------------------------------------------------------------------------------
  1 | ########################################################
  2 | # Description:
  3 | # 1.for threshold regression simulation
  4 | # 2.No.: 02
  5 | # 3.Purpose: threshold quantile regression
  6 | # 4.Reference: non
  7 | # 5.Author: Qifa Xu
  8 | # 6.Founded: Mar 17, 2015.
  9 | # 7.Revised: Mar 18, 2015.
 10 | ########################################################
 11 | # Contents:
 12 | # 1. read data
 13 | # 2. data process
 14 | # 3. make model in mean regression
 15 | # 4. make model in quantile regression
 16 | # 5. make threshold model in mean regression
 17 | # 6. make threshold model in quantile regression
 18 | #########################################################
 19 | 
 20 | # 0. initialize
 21 | setwd('E:/QR+goods')
 22 | rm(list=ls())
 23 | 
 24 | # 1. read data
 25 | library(xlsx)
 26 | dat <- read.xlsx(file='IPAD.xlsx', sheetName='all', startRow=1, endRow=436, colIndex=2:9)
 27 | head(dat)
 28 | class(dat)
 29 | names(dat) <- c('credit', 'grade', 'popular', 'price', 'sale', 'RevAmou', 'RevGrad', 'No')
 30 | summary(dat)
 31 | 
 32 | # 2. data process
 33 | 
 34 | # data <- data.frame()
 35 | # for (j in 1:ncol(dat)){
 36 | #   data[,j] <- as.data.frame(dat[,j])
 37 | # #   data <- cbind(data, as.data.frame(dat[,j]))
 38 | # }
 39 | 
 40 | credit <- dat$credit
 41 | grade <- dat$grade
 42 | popular <- dat$popular
 43 | price <- as.numeric(matrix(dat$price))
 44 | sale <- as.numeric(matrix(dat$sale))
 45 | RevAmou <- as.numeric(matrix(dat$RevAmou))
 46 | RevGrad <- dat$RevGrad
 47 | No <- dat$No
 48 | 
 49 | dat <- data.frame(sale, credit, grade, popular, price, RevAmou, RevGrad, No)
 50 | dat <- na.omit(dat)
 51 | 
 52 | plot(dat$sale~dat$price)
 53 | 
 54 | round(cor(dat), digits=4)
 55 | 
 56 | # 3. make model in mean regression
 57 | model.lm <- lm(sale~., data=dat)
 58 | summary(model.lm)
 59 | 
 60 | # 4. make model in quantile regression
 61 | library(quantreg)
 62 | taus <- seq(0.1, 0.9, length=5)
 63 | model.rq <- rq(sale~., tau=taus, data=dat)
 64 | plot(summary(model.rq))
 65 | summary(model.rq)
 66 | round(coef(model.rq), digits=4)
 67 | 
 68 | # 5. make threshold model in mean regression
 69 | # (1) define functions
 70 | source('sub-02.R')
 71 | 
 72 | # (2) find the optimal threshold
 73 | (gamopt <- gamsearch.mr(var=price, dat=dat))
 74 | 
 75 | # (3) estimate threshold model
 76 | thrmodel.mr <- lm(sale~price+I(price*(price>gamopt))+grade+credit
 77 |                   +popular+RevAmou+RevGrad+No, data=dat)
 78 | summary(thrmodel.mr)
 79 | 
 80 | # (1) make model
 81 | (thrmodel.mr <- lm(y~x1+I(x1*(x1>gamopt)), data=dat))
 82 | summary(thrmodel.mr)
 83 | 
 84 | # (2) show results
 85 | prices <- seq(min(dat$price), max(dat$price), length=6838)
 86 | sales.hat <- predict(thrmodel.mr, newdata=data.frame(price=prices))
 87 | 
 88 | plot(dat$x1, y, xlab='x', ylab='y')
 89 | lines(xs[xs<gamopt], ys.hat[xs<gamopt], lwd=2, col='blue')
 90 | lines(xs[xs>=gamopt], ys.hat[xs>=gamopt], lwd=2, col='red')
 91 | # sale~price+I(price*(price>gamma))+grade+credit+popular+RevAmou+RevGrad+No
 92 | 
 93 | # 6. make threshold model in quantile regression
 94 | # (1) define functions
 95 | 
 96 | 
 97 | # (2) find the optimal threshold
 98 | tau <- 0.1
 99 | (gamopt.qr <- gamsearch.qr(dat=dat, tau=tau, var=price))
100 | 
101 | 
102 | # (3) estimate threshold model
103 | thrmodel.qr <- rq(sale~price+I(price*(price>gamopt.qr))+grade+credit
104 |                   +popular+RevAmou+RevGrad+No, tau=tau, data=dat)
105 | summary(thrmodel.qr)
106 | # 5. estimate threshold regression model
107 | # (1) make model
108 | (thrmodel.qr.1 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[1], data=dat))
109 | (thrmodel.qr.2 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[2], data=dat))
110 | (thrmodel.qr.3 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[3], data=dat))
111 | (thrmodel.qr.4 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[4], data=dat))
112 | (thrmodel.qr.5 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[5], data=dat))
113 | summary(thrmodel.qr)
114 | coef(thrmodel.qr)   # compare with those true values
115 | # eps <- rchisq(n=n, df=3)
116 | # y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps
117 | # Q.y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*F.inv(eps)
118 | (F.inv <-qchisq(p=taus, df=3))
119 | beta[2] + F.inv     # slopes in the lower interval
120 | beta[2] + beta[3] + F.inv     # slopes in the upper interval
121 | 
122 | # (2) show results
123 | xs <- seq(min(dat$price), max(dat$price), length=6838)
124 | ys.hat.1 <- predict(thrmodel.qr.1, newdata=data.frame(price=xs))
125 | ys.hat.2 <- predict(thrmodel.qr.2, newdata=data.frame(price=xs))
126 | ys.hat.3 <- predict(thrmodel.qr.3, newdata=data.frame(price=xs))
127 | ys.hat.4 <- predict(thrmodel.qr.4, newdata=data.frame(price=xs))
128 | ys.hat.5 <- predict(thrmodel.qr.5, newdata=data.frame(price=xs))
129 | cbind(ys.hat.1,ys.hat.2,ys.hat.3,ys.hat.4,ys.hat.5)
130 | plot(dat$price, sale, xlab='price', ylab='sale')
131 | for (i in 1:length(taus)){
132 |   lines(xs[xs<gamopt], ys.hat[xs<gamopt,i], lty=i, lwd=2)
133 |   lines(xs[xs>=gamopt], ys.hat[xs>=gamopt,i], lty=i, lwd=2)
134 | }
135 | 
136 | 


--------------------------------------------------------------------------------
/taibao/sub-01.R:
--------------------------------------------------------------------------------
 1 | # sub functions for goods pricing
 2 | # 1. define loss function for mean regression
 3 | loss.mr <- function(gam, dat){
 4 |   fmla <- y ~ x1 + I(x1 * (x1 > gam))
 5 |   model <- lm(formula=fmla, data=dat)
 6 |   sse <- sum(model$residuals^2)
 7 |   sse
 8 | }
 9 | 
10 | # 2. define gamma serch function
11 | gamsearch.mr <- function(var=x, dat){
12 | #   browser()
13 |   min <- min(var)
14 |   max <- max(var)
15 |   gams <- seq(min, max, length=500)
16 |   los <- rep(NA, length(gams))
17 |   for (i in 1:length(gams)){
18 |     los[i] <- loss.mr(gam=gams[i], dat=dat)
19 |   }
20 |   plot(gams, los, type='l')
21 |   optgam <- gams[which.min(los)]
22 |   optgam
23 | }
24 | 
25 | # 3. define loss function for quantile regression
26 | loss.qr <- function(gam, tau, dat){
27 |   fmla <- y ~ x1 + I(x1 * (x1 > gam))
28 |   model <- rq(fmla, tau=tau, data=dat)
29 |   rho <- model$rho
30 |   rho
31 | }
32 | 
33 | 
34 | # 4. define gamma serch function for quantile regression
35 | gamsearch.qr <- function(var=x, tau, dat){
36 |   min <- min(var)*1.2
37 |   max <- max(var)/1.2
38 |   gams <- seq(min, max, length=100)
39 |   los <- rep(NA, length(gams))
40 |   for (i in 1:length(gams)){
41 |     los[i] <- loss.qr(gam=gams[i], tau=tau, dat=dat)
42 |   }
43 |   plot(gams, los, type='l')
44 |   optgam <- gams[which.min(los)]
45 |   optgam
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/taibao/sub-02.R:
--------------------------------------------------------------------------------
 1 | # sub functions for goods pricing
 2 | # 1. define loss function for mean regression
 3 | loss.mr <- function(gam, dat){
 4 | #   fmla <- y ~ x1 + I(x1 * (x1 > gam))
 5 |   fmla <- sale~price+I(price*(price>gam))+grade+credit
 6 |   +popular+RevAmou+RevGrad+No
 7 |   model <- lm(formula=fmla, data=dat)
 8 |   sse <- sum(model$residuals^2)
 9 |   sse
10 | }
11 | 
12 | # 2. define gamma serch function
13 | gamsearch.mr <- function(var=x, dat){
14 | #   browser()
15 |   min <- min(var)
16 |   max <- max(var)
17 |   gams <- seq(min, max, length=100)
18 |   los <- rep(NA, length(gams))
19 |   for (i in 1:length(gams)){
20 |     los[i] <- loss.mr(gam=gams[i], dat=dat)
21 |   }
22 |   plot(gams, los, type='l')
23 |   optgam <- gams[which.min(los)]
24 |   optgam
25 | }
26 | 
27 | # 3. define loss function for quantile regression
28 | loss.qr <- function(gam, tau, dat){
29 |   fmla <- sale~price+I(price*(price>gam))+grade+credit
30 |   +popular+RevAmou+RevGrad+No
31 |   model <- rq(fmla, tau=tau, data=dat)
32 |   rho <- model$rho
33 |   rho
34 | }
35 | 
36 | 
37 | # 4. define gamma serch function for quantile regression
38 | gamsearch.qr <- function(var=x, tau, dat){
39 |   min <- min(var)*1.2
40 |   max <- max(var)/1.2
41 |   gams <- seq(min, max, length=100)
42 |   los <- rep(NA, length(gams))
43 |   for (i in 1:length(gams)){
44 |     los[i] <- loss.qr(gam=gams[i], tau=tau, dat=dat)
45 |   }
46 |   plot(gams, los, type='l')
47 |   optgam <- gams[which.min(los)]
48 |   optgam
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/xingqi.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | import numpy as np
 4 | import matplotlib.dates as mdates
 5 | from sklearn.neighbors import KernelDensity
 6 | from scipy.stats import norm
 7 | from scipy import stats
 8 | import os
 9 | 
10 | os.chdir("D:/Rdata/Third_paper/third_paper_data/")
11 | 
12 | total_dat=pd.read_excel('nanjing_total_feature.xlsx',index_col=[0])
13 | 
14 | 
15 | total_dat_1=total_dat['20140106':'20141228']
16 | xingqi_data=total_dat_1[['E_demand','month']]
17 | xingqi=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
18 | 
19 | fig=plt.figure(figsize=(10,5))
20 | ax1=fig.add_subplot(111)
21 | x=range(len(xingqi))
22 | for i in range(1,51):
23 |     ax1.plot(x,xingqi_data[7*i:7*i+7]['E_demand'].values,'r-',linewidth=3,color='m',alpha=0.5)
24 | 
25 | 
26 | ax1.plot(x,xingqi_data[0:7]['E_demand'].values,'r-',linewidth=3,color='m',alpha=0.5,label="Electricity consumption")
27 | ax1.axvline(x=0,color='black',linewidth=1,linestyle='dashed')
28 | ax1.axvline(x=1,color='black',linewidth=1,linestyle='dashed')
29 | ax1.axvline(x=2,color='black',linewidth=1,linestyle='dashed')
30 | ax1.axvline(x=3,color='black',linewidth=1,linestyle='dashed')
31 | ax1.axvline(x=4,color='black',linewidth=1,linestyle='dashed')
32 | ax1.axvline(x=5,color='black',linewidth=1,linestyle='dashed')
33 | ax1.axvline(x=6,color='black',linewidth=1,linestyle='dashed',label="Date")
34 | 
35 | ax1.set_ylim(8000,35000)
36 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
37 | plt.xticks(x, xingqi)
38 | plt.margins(0.001)
39 | plt.title("Electricity consumption from the weekly perspective")
40 | plt.xlabel('Date(day)')
41 | plt.ylabel('Electricity consumption(KWh)')
42 | ax1.legend(loc="upper right")
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/最新预测.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import pandas as pd
  3 | import numpy as np
  4 | import matplotlib.dates as mdates
  5 | from sklearn.neighbors import KernelDensity
  6 | from scipy.stats import norm
  7 | from scipy import stats
  8 | import os
  9 | 
 10 | os.chdir("D:/Rdata/Third_paper/third_paper_data/")
 11 | 
 12 | ##################################################################
 13 | ####南京 
 14 | 
 15 | dat_density=pd.read_csv('nanjing_quantile_25m.csv',index_col=[0])
 16 | dat_real_value=pd.read_excel('nanjing_total.xlsx',index_col=[0])
 17 | 
 18 | dat_density=dat_density.T
 19 | n_pre=dat_density.shape[1]
 20 | n_real_value=dat_real_value[(365-n_pre):365]
 21 | 
 22 | 
 23 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values)
 24 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values)
 25 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values)
 26 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values)
 27 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values)
 28 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values)
 29 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values)
 30 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values)
 31 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values)
 32 | 
 33 | 
 34 | 
 35 | fig, ax = plt.subplots(nrows=3, ncols=3)
 36 | fig.set_size_inches(20,15)
 37 | 
 38 | x=np.linspace(10000, 30000, 99)
 39 | 
 40 | y_max=np.max([np.max(density1(x)),np.max(density2(x)),
 41 |                 np.max(density3(x)),np.max(density4(x)),
 42 |                 np.max(density5(x)),np.max(density6(x)),
 43 |                 np.max(density7(x)),np.max(density8(x)),
 44 |                 np.max(density9(x))])
 45 |                 
 46 |                 
 47 |                 
 48 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel')
 49 | ax[0,0].axvline(x=n_real_value.iloc[0,0],color='r',linewidth=3)
 50 | ax[0,0].set_ylim(0,y_max)
 51 | ax[0,0].set_title('2014-12-22')
 52 | 
 53 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel')
 54 | ax[0,1].axvline(x=n_real_value.iloc[1,0],color='r',linewidth=3)
 55 | ax[0,1].set_ylim(0,y_max)
 56 | ax[0,1].set_title('2014-12-23')
 57 | 
 58 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel')
 59 | ax[0,2].axvline(x=n_real_value.iloc[2,0],color='r',linewidth=3)
 60 | ax[0,2].set_ylim(0,y_max)
 61 | ax[0,2].set_title('2014-12-24')
 62 | 
 63 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel')
 64 | ax[1,0].axvline(x=n_real_value.iloc[3,0],color='r',linewidth=3)
 65 | ax[1,0].set_ylim(0,y_max)
 66 | ax[1,0].set_title('2014-12-25')
 67 | 
 68 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel')
 69 | ax[1,1].axvline(x=n_real_value.iloc[4,0],color='r',linewidth=3)
 70 | ax[1,1].set_ylim(0,y_max)
 71 | ax[1,1].set_title('2014-12-26')
 72 | 
 73 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel')
 74 | ax[1,2].axvline(x=n_real_value.iloc[5,0],color='r',linewidth=3)
 75 | ax[1,2].set_ylim(0,y_max)
 76 | ax[1,2].set_title('2014-12-27')
 77 | 
 78 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel')
 79 | ax[2,0].axvline(x=n_real_value.iloc[6,0],color='r',linewidth=3)
 80 | ax[2,0].set_ylim(0,y_max)
 81 | ax[2,0].set_title('2014-12-28')
 82 | 
 83 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel')
 84 | ax[2,1].axvline(x=n_real_value.iloc[7,0],color='r',linewidth=3)
 85 | ax[2,1].set_ylim(0,y_max)
 86 | ax[2,1].set_title('2014-12-29')
 87 | 
 88 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel')
 89 | ax[2,2].axvline(x=n_real_value.iloc[8,0],color='r',linewidth=3)
 90 | ax[2,2].set_ylim(0,y_max)
 91 | ax[2,2].set_title('2014-12-30')
 92 | 
 93 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 94 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 95 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 96 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 97 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 98 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
 99 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
100 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
101 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
102 | 
103 | ax[0,0].set_xlabel('Electricity consumption(KWh)')
104 | ax[0,0].set_ylabel('Probability density')
105 | ax[0,0].legend(loc='upper right')
106 | 
107 | ax[0,1].set_xlabel('Electricity consumption(KWh)')
108 | ax[0,1].set_ylabel('Probability density')
109 | ax[0,1].legend(loc='upper right')
110 | 
111 | ax[0,2].set_xlabel('Electricity consumption(KWh)')
112 | ax[0,2].set_ylabel('Probability density')
113 | ax[0,2].legend(loc='upper right')
114 | 
115 | ax[1,0].set_xlabel('Electricity consumption(KWh)')
116 | ax[1,0].set_ylabel('Probability density')
117 | ax[1,0].legend(loc='upper right')
118 | 
119 | ax[1,1].set_xlabel('Electricity consumption(KWh)')
120 | ax[1,1].set_ylabel('Probability density')
121 | ax[1,1].legend(loc='upper right')
122 | 
123 | ax[1,2].set_xlabel('Electricity consumption(KWh)')
124 | ax[1,2].set_ylabel('Probability density')
125 | ax[1,2].legend(loc='upper right')
126 | 
127 | ax[2,0].set_xlabel('Electricity consumption(KWh)')
128 | ax[2,0].set_ylabel('Probability density')
129 | ax[2,0].legend(loc='upper right')
130 | 
131 | ax[2,1].set_xlabel('Electricity consumption(KWh)')
132 | ax[2,1].set_ylabel('Probability density')
133 | ax[2,1].legend(loc='upper right')
134 | 
135 | ax[2,2].set_xlabel('Electricity consumption(KWh)')
136 | ax[2,2].set_ylabel('Probability density')
137 | ax[2,2].legend(loc='upper right')
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | ################################################################################
156 | ##############苏州##########################
157 | dat_density=pd.read_csv('suzhou_quantile_m.csv',index_col=[0])
158 | dat_real_value=pd.read_excel('suzhou_total.xlsx',index_col=[0])
159 | dat_density=dat_density.T
160 | n_pre=dat_density.shape[1]
161 | n_real_value=dat_real_value[(365-n_pre):365]
162 | 
163 | 
164 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values)
165 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values)
166 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values)
167 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values)
168 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values)
169 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values)
170 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values)
171 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values)
172 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values)
173 | 
174 | fig, ax = plt.subplots(nrows=3, ncols=3)
175 | fig.set_size_inches(20,15)
176 | 
177 | x=np.linspace(10000, 30000, 99)
178 | 
179 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel')
180 | ax[0,0].axvline(x=n_real_value.iloc[0,0],color='r',linewidth=3)
181 | ax[0,0].set_title('2014-12-22')
182 | 
183 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel')
184 | ax[0,1].axvline(x=n_real_value.iloc[1,0],color='r',linewidth=3)
185 | ax[0,1].set_title('2014-12-23')
186 | 
187 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel')
188 | ax[0,2].axvline(x=n_real_value.iloc[2,0],color='r',linewidth=3)
189 | ax[0,2].set_title('2014-12-24')
190 | 
191 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel')
192 | ax[1,0].axvline(x=n_real_value.iloc[3,0],color='r',linewidth=3)
193 | ax[1,0].set_title('2014-12-25')
194 | 
195 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel')
196 | ax[1,1].axvline(x=n_real_value.iloc[4,0],color='r',linewidth=3)
197 | ax[1,1].set_title('2014-12-26')
198 | 
199 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel')
200 | ax[1,2].axvline(x=n_real_value.iloc[5,0],color='r',linewidth=3)
201 | ax[1,2].set_title('2014-12-27')
202 | 
203 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel')
204 | ax[2,0].axvline(x=n_real_value.iloc[6,0],color='r',linewidth=3)
205 | ax[2,0].set_title('2014-12-28')
206 | 
207 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel')
208 | ax[2,1].axvline(x=n_real_value.iloc[7,0],color='r',linewidth=3)
209 | ax[2,1].set_title('2014-12-29')
210 | 
211 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel')
212 | ax[2,2].axvline(x=n_real_value.iloc[8,0],color='r',linewidth=3)
213 | ax[2,2].set_title('2014-12-30')
214 | 
215 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
216 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
217 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
218 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
219 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
220 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
221 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
222 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
223 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
224 | 
225 | ax[0,0].set_xlabel('Electricity consumption(KWh)')
226 | ax[0,0].set_ylabel('Probability density')
227 | ax[0,0].legend(loc='upper right')
228 | 
229 | ax[0,1].set_xlabel('Electricity consumption(KWh)')
230 | ax[0,1].set_ylabel('Probability density')
231 | ax[0,1].legend(loc='upper right')
232 | 
233 | ax[0,2].set_xlabel('Electricity consumption(KWh)')
234 | ax[0,2].set_ylabel('Probability density')
235 | ax[0,2].legend(loc='upper right')
236 | 
237 | ax[1,0].set_xlabel('Electricity consumption(KWh)')
238 | ax[1,0].set_ylabel('Probability density')
239 | ax[1,0].legend(loc='upper right')
240 | 
241 | ax[1,1].set_xlabel('Electricity consumption(KWh)')
242 | ax[1,1].set_ylabel('Probability density')
243 | ax[1,1].legend(loc='upper right')
244 | 
245 | ax[1,2].set_xlabel('Electricity consumption(KWh)')
246 | ax[1,2].set_ylabel('Probability density')
247 | ax[1,2].legend(loc='upper right')
248 | 
249 | ax[2,0].set_xlabel('Electricity consumption(KWh)')
250 | ax[2,0].set_ylabel('Probability density')
251 | ax[2,0].legend(loc='upper right')
252 | 
253 | ax[2,1].set_xlabel('Electricity consumption(KWh)')
254 | ax[2,1].set_ylabel('Probability density')
255 | ax[2,1].legend(loc='upper right')
256 | 
257 | ax[2,2].set_xlabel('Electricity consumption(KWh)')
258 | ax[2,2].set_ylabel('Probability density')
259 | ax[2,2].legend(loc='upper right')
260 | 
261 | 
262 | 
263 | ################################################################################
264 | ##############连云港##########################
265 | 
266 | dat_density=pd.read_csv('lianyungang_quantile_m.csv',index_col=[0])
267 | dat_real_value=pd.read_excel('lianyungang_total.xlsx',index_col=[0])
268 | dat_density=dat_density.T
269 | n_pre=dat_density.shape[1]
270 | n_real_value=dat_real_value[(365-n_pre):365]
271 | 
272 | 
273 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values)
274 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values)
275 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values)
276 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values)
277 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values)
278 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values)
279 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values)
280 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values)
281 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values)
282 | 
283 | fig, ax = plt.subplots(nrows=3, ncols=3)
284 | fig.set_size_inches(20,15)
285 | 
286 | x=np.linspace(10000, 30000, 99)
287 | 
288 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel')
289 | ax[0,0].axvline(x=n_real_value.iloc[0,0],color='r',linewidth=3)
290 | ax[0,0].set_title('2014-12-22')
291 | 
292 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel')
293 | ax[0,1].axvline(x=n_real_value.iloc[1,0],color='r',linewidth=3)
294 | ax[0,1].set_title('2014-12-23')
295 | 
296 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel')
297 | ax[0,2].axvline(x=n_real_value.iloc[2,0],color='r',linewidth=3)
298 | ax[0,2].set_title('2014-12-24')
299 | 
300 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel')
301 | ax[1,0].axvline(x=n_real_value.iloc[3,0],color='r',linewidth=3)
302 | ax[1,0].set_title('2014-12-25')
303 | 
304 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel')
305 | ax[1,1].axvline(x=n_real_value.iloc[4,0],color='r',linewidth=3)
306 | ax[1,1].set_title('2014-12-26')
307 | 
308 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel')
309 | ax[1,2].axvline(x=n_real_value.iloc[5,0],color='r',linewidth=3)
310 | ax[1,2].set_title('2014-12-27')
311 | 
312 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel')
313 | ax[2,0].axvline(x=n_real_value.iloc[6,0],color='r',linewidth=3)
314 | ax[2,0].set_title('2014-12-28')
315 | 
316 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel')
317 | ax[2,1].axvline(x=n_real_value.iloc[7,0],color='r',linewidth=3)
318 | ax[2,1].set_title('2014-12-29')
319 | 
320 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel')
321 | ax[2,2].axvline(x=n_real_value.iloc[8,0],color='r',linewidth=3)
322 | ax[2,2].set_title('2014-12-30')
323 | 
324 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
325 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
326 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
327 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
328 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
329 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
330 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
331 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
332 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
333 | 
334 | ax[0,0].set_xlabel('Electricity consumption(KWh)')
335 | ax[0,0].set_ylabel('Probability density')
336 | ax[0,0].legend(loc='upper right')
337 | 
338 | ax[0,1].set_xlabel('Electricity consumption(KWh)')
339 | ax[0,1].set_ylabel('Probability density')
340 | ax[0,1].legend(loc='upper right')
341 | 
342 | ax[0,2].set_xlabel('Electricity consumption(KWh)')
343 | ax[0,2].set_ylabel('Probability density')
344 | ax[0,2].legend(loc='upper right')
345 | 
346 | ax[1,0].set_xlabel('Electricity consumption(KWh)')
347 | ax[1,0].set_ylabel('Probability density')
348 | ax[1,0].legend(loc='upper right')
349 | 
350 | ax[1,1].set_xlabel('Electricity consumption(KWh)')
351 | ax[1,1].set_ylabel('Probability density')
352 | ax[1,1].legend(loc='upper right')
353 | 
354 | ax[1,2].set_xlabel('Electricity consumption(KWh)')
355 | ax[1,2].set_ylabel('Probability density')
356 | ax[1,2].legend(loc='upper right')
357 | 
358 | ax[2,0].set_xlabel('Electricity consumption(KWh)')
359 | ax[2,0].set_ylabel('Probability density')
360 | ax[2,0].legend(loc='upper right')
361 | 
362 | ax[2,1].set_xlabel('Electricity consumption(KWh)')
363 | ax[2,1].set_ylabel('Probability density')
364 | ax[2,1].legend(loc='upper right')
365 | 
366 | ax[2,2].set_xlabel('Electricity consumption(KWh)')
367 | ax[2,2].set_ylabel('Probability density')
368 | ax[2,2].legend(loc='upper right')
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 


--------------------------------------------------------------------------------