├── README.md ├── deep_learning_feature.R ├── heat_map_nanjing.py ├── month_barplot.py ├── nanjing_week_month.png ├── predict_plot.py ├── quantile_pre.R ├── taibao ├── .RData ├── EX-04-01.R ├── EX-04-02.R ├── IPAD.xlsx ├── a.txt ├── prog-00.R ├── prog-01.R ├── prog-01old.R ├── prog-02.R ├── sub-01.R └── sub-02.R ├── xingqi.py └── 最新预测.py /README.md: -------------------------------------------------------------------------------- 1 | # This is the code for my third paper 2 | -------------------------------------------------------------------------------- /deep_learning_feature.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | setwd("D:/Rdata/Third_paper/third_paper_data") 4 | 5 | library(readxl) 6 | library(h2o) 7 | library(dplyr) 8 | 9 | h2o.init(nthreads = -1) 10 | 11 | rm(list = ls()) 12 | 13 | set.seed(12345) 14 | 15 | dat<-read_excel("suzhou_total_feature.xlsx") 16 | 17 | dim(dat) 18 | 19 | #dat<-h2o.importFile(path = "total_feature.xlsx",destination_frame = "dat") 20 | #dat1<-as.data.frame(dat) 21 | dat1<-na.omit(dat) 22 | 23 | dat2<-dat1[,2:58] 24 | 25 | names(dat2) 26 | 27 | month<-as.factor(dat1$month) 28 | 29 | day_of_w<-as.factor(dat1$day_of_w) 30 | 31 | air<-as.factor(dat1$air) 32 | 33 | rain<-as.factor(dat1$rain) 34 | 35 | 36 | levels(air)[levels(air)=="优"]<-"6" 37 | levels(air)[levels(air)=="良"]<-"5" 38 | levels(air)[levels(air)=="轻度污染"]<-"4" 39 | levels(air)[levels(air)=="中度污染"]<-"3" 40 | levels(air)[levels(air)=="重度污染"]<-"2" 41 | levels(air)[levels(air)=="严重污染"]<-"1" 42 | 43 | air<-as.numeric(air) 44 | 45 | levels(month)[levels(month)=="January"]<-"1" 46 | levels(month)[levels(month)=="February"]<-"2" 47 | levels(month)[levels(month)=="March"]<-"3" 48 | 49 | levels(month)[levels(month)=="April"]<-"4" 50 | levels(month)[levels(month)=="May"]<-"5" 51 | 52 | levels(month)[levels(month)=="June"]<-"6" 53 | levels(month)[levels(month)=="July"]<-"7" 54 | levels(month)[levels(month)=="August"]<-"8" 55 | levels(month)[levels(month)=="September"]<-"9" 56 | levels(month)[levels(month)=="October"]<-"10" 57 | levels(month)[levels(month)=="November"]<-"11" 58 | levels(month)[levels(month)=="December"]<-"12" 59 | 60 | levels(month) 61 | 62 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1" 63 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2" 64 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3" 65 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4" 66 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5" 67 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6" 68 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7" 69 | 70 | levels(day_of_w) 71 | 72 | dat2$month<-month 73 | dat2$air<-air 74 | dat2$rain<-rain 75 | dat2$day_of_w<-day_of_w 76 | 77 | 78 | 79 | 80 | y <- "E_demand" #response column: digits 0-9 81 | x <- setdiff(names(dat2), y) #vector of predictor column names 82 | 83 | dat3<-as.h2o(dat2,destination_frame = "dat3") 84 | 85 | dat4<-dat3[1:348,] 86 | 87 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345) 88 | 89 | 90 | 91 | 92 | # first part of the data, without labels for unsupervised learning 93 | train <- splits[[1]] 94 | 95 | # second part of the data, with labels for supervised learning 96 | valid <- splits[[2]] 97 | 98 | test<-dat3[349:358,] 99 | 100 | #dim(train_supervised) 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | DL_pre_time_1<-Sys.time() 113 | dl <- h2o.deeplearning( 114 | model_id="dl", 115 | training_frame=train, 116 | validation_frame=valid, ## validation dataset: used for scoring and early stopping 117 | x=x, 118 | y=y, 119 | hidden = c(200,200,200), 120 | #hidden=c(200,200,200), ## default: 2 hidden layers with 200 neurons each 121 | epochs=1000, 122 | variable_importances=T , 123 | seed = 12345, 124 | l1=0.001, 125 | l2=0.001 126 | 127 | ## not enabled by default 128 | ) 129 | DL_pre_time_2<-Sys.time() 130 | DL_run_time<-(DL_pre_time_2-DL_pre_time_1) 131 | 132 | h2o.varimp_plot(dl,num_of_features = 15) 133 | 134 | 135 | 136 | 137 | 138 | #plot(dl) 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | #####random forset 148 | ## run our first predictive model 149 | RF_pre_time_1<-Sys.time() 150 | rf <- h2o.randomForest( ## h2o.randomForest function 151 | training_frame = train, ## the H2O frame for training 152 | validation_frame = valid, ## the H2O frame for validation (not required) 153 | x=x, ## the predictor columns, by column index 154 | y=y, ## the target index (what we are predicting) 155 | model_id = "rf_covType_v1", ## name the model in H2O 156 | ## not required, but helps use Flow 157 | ntrees = 200, ## use a maximum of 200 trees to create the 158 | ## random forest model. The default is 50. 159 | ## I have increased it because I will let 160 | ## the early stopping criteria decide when 161 | ## the random forest is sufficiently accurate 162 | stopping_rounds = 2, ## Stop fitting new trees when the 2-tree 163 | ## average is within 0.001 (default) of 164 | ## the prior two 2-tree averages. 165 | ## Can be thought of as a convergence setting 166 | score_each_iteration = T, ## Predict against training and validation for 167 | ## each tree. Default will skip several. 168 | seed = 12345) ## Set the random seed so that this can be 169 | 170 | RF_pre_time_2<-Sys.time() 171 | RF_run_time<-(RF_pre_time_2-RF_pre_time_1) 172 | 173 | 174 | GBM_pre_time_1<-Sys.time() 175 | gbm <- h2o.gbm( 176 | training_frame = train, ## 177 | validation_frame = valid, ## 178 | x=x, ## 179 | y=y, ## 180 | ntrees = 200, ## decrease the trees, mostly to allow for run time 181 | ## (from 50) 182 | learn_rate = 0.2, 183 | ## increase the learning rate (from 0.1) 184 | max_depth = 10, ## increase the depth (from 5) 185 | stopping_rounds = 2, ## 186 | stopping_tolerance = 0.01, ## 187 | score_each_iteration = T, ## 188 | model_id = "gbm_covType3", ## 189 | seed = 12345) ## 190 | 191 | GBM_pre_time_2<-Sys.time() 192 | GBM_run_time<-(GBM_pre_time_2-GBM_pre_time_1) 193 | 194 | 195 | plot(gbm) 196 | 197 | pre1<-h2o.predict(object = gbm,newdata = test) 198 | pre2<-h2o.predict(object = dl,newdata = test) 199 | pre3<-h2o.predict(object = rf,newdata = test) 200 | 201 | pre1_d<-as.data.frame(pre1$predict) 202 | pre2_d<-as.data.frame(pre2$predict) 203 | pre3_d<-as.data.frame(pre3$predict) 204 | 205 | 206 | pre1_3<-cbind(pre1_d,pre2_d,pre3_d,as.data.frame(test$E_demand)) 207 | 208 | names(pre1_3)<-c("GBM","DL","RF","E_demand") 209 | 210 | GBM_MAE<-mean(abs(pre1_3$GBM-pre1_3$E_demand)) 211 | DL_MAE<-mean(abs(pre1_3$DL-pre1_3$E_demand)) 212 | RF_MAE<-mean(abs(pre1_3$RF-pre1_3$E_demand)) 213 | 214 | GBM_MAE 215 | DL_MAE 216 | RF_MAE 217 | 218 | 219 | 220 | GBM_MAPE<-mean(abs(pre1_3$GBM-pre1_3$E_demand)/pre1_3$E_demand) 221 | DL_MAPE<-mean(abs(pre1_3$DL-pre1_3$E_demand)/pre1_3$E_demand) 222 | RF_MAPE<-mean(abs(pre1_3$RF-pre1_3$E_demand)/pre1_3$E_demand) 223 | 224 | GBM_MAPE 225 | DL_MAPE 226 | RF_MAPE 227 | 228 | GBM_MRPE<-max(abs(pre1_3$GBM-pre1_3$E_demand)/pre1_3$E_demand) 229 | DL_MRPE<-max(abs(pre1_3$DL-pre1_3$E_demand)/pre1_3$E_demand) 230 | RF_MRPE<-max(abs(pre1_3$RF-pre1_3$E_demand)/pre1_3$E_demand) 231 | 232 | GBM_MRPE 233 | DL_MRPE 234 | RF_MRPE 235 | 236 | 237 | 238 | 239 | 240 | 241 | lianyungang_var_im_dl<-as.data.frame(h2o.varimp(dl)) 242 | 243 | lianyungang_var_im_rf<-as.data.frame(h2o.varimp(rf)) 244 | 245 | lianyungang_var_im_gbm<-as.data.frame(h2o.varimp(gbm)) 246 | 247 | write.csv(lianyungang_var_im_dl,file='SZ_var_im_dl.csv') 248 | write.csv(lianyungang_var_im_rf,file='SZ_var_im_rf.csv') 249 | write.csv(lianyungang_var_im_gbm,file='SZ_var_im_gbm.csv') 250 | 251 | 252 | 253 | predict_lianyungang_result<-data.frame(GBM_MAE,DL_MAE,RF_MAE, 254 | GBM_MAPE,DL_MAPE,RF_MAPE, 255 | GBM_MRPE,DL_MRPE,RF_MRPE, 256 | DL_run_time, 257 | RF_run_time, 258 | GBM_run_time) 259 | 260 | 261 | 262 | write.csv(predict_lianyungang_result,file="predict25_lianyungang_result.csv") 263 | write.csv(pre1_3,file="predict25_lianyungang.csv") 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | ################################################################## 273 | 274 | 275 | 276 | pred_all<-list() 277 | taus<-seq(from=0.01,to=0.99,length=99) 278 | quantile_pre_time_1<-Sys.time() 279 | for(i in 1:10) 280 | { 281 | m1 <- h2o.deeplearning( 282 | model_id="dl_model_first", 283 | training_frame=train, 284 | validation_frame=valid, ## validation dataset: used for scoring and early stopping 285 | x=x, 286 | y=y, 287 | #activation="Rectifier", ## default 288 | hidden=c(200,200,200), ## default: 2 hidden layers with 200 neurons each 289 | epochs=500, 290 | variable_importances=T , 291 | distribution = 'quantile', 292 | quantile_alpha = taus[i], 293 | seed = 12345 294 | ## not enabled by default 295 | ) 296 | pred <- h2o.predict(m1, newdata = test) 297 | pred <-as.data.frame(pred$predict) 298 | names(pred)<-paste("quantile",taus,sep = "_")[i] 299 | pred_all[[i]]<-pred 300 | } 301 | quantile_pre_time_2<-Sys.time() 302 | 303 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1) 304 | 305 | 306 | 307 | 308 | data<-as.data.frame(pred_all) 309 | 310 | write.csv(data,file="quantile_m.csv") 311 | 312 | data1<-as.matrix(data[11,]) 313 | d<-test$E_demand[11] 314 | data_pdf <- akj(data1, data1) 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | -------------------------------------------------------------------------------- /heat_map_nanjing.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | import numpy as np 5 | %pylab inline 6 | 7 | week = pd.read_excel('dat_month1_12_T.xlsx', index_col=0) 8 | 9 | # Normalize data columns 10 | #nba_norm = (nba - nba.mean()) / (nba.max() - nba.min()) 11 | 12 | # Sort data according to Points, lowest to highest 13 | # This was just a design choice made by Yau 14 | # inplace=False (default) ->thanks SO user d1337 15 | #nba_sort = nba_norm.sort('PTS', ascending=True) 16 | 17 | #nba_sort['PTS'].head(10) 18 | 19 | # Plot it out 20 | fig, ax = plt.subplots() 21 | heatmap = ax.pcolor(week, cmap=plt.cm.Blues) 22 | 23 | # Format 24 | fig = plt.gcf() 25 | fig.set_size_inches(8, 4) 26 | 27 | # turn off the frame 28 | ax.set_frame_on(False) 29 | 30 | # put the major ticks at the middle of each cell 31 | ax.set_yticks(np.arange(week.shape[0]) + 0.5, minor=False) 32 | ax.set_xticks(np.arange(week.shape[1]) + 0.5, minor=False) 33 | 34 | # want a more natural, table-like display 35 | ax.invert_yaxis() 36 | ax.xaxis.tick_top() 37 | 38 | # Set the labels 39 | 40 | # label source:https://en.wikipedia.org/wiki/Basketball_statistics 41 | labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] 42 | # note I could have used nba_sort.columns but made "labels" instead 43 | ax.set_xticklabels(labels, minor=False) 44 | ax.set_yticklabels(week.index, minor=False) 45 | 46 | # rotate the 47 | #plt.xticks(rotation=90) 48 | 49 | ax.grid(False) 50 | 51 | # Turn off all the ticks 52 | ax = plt.gca() 53 | 54 | for t in ax.xaxis.get_major_ticks(): 55 | t.tick1On = False 56 | t.tick2On = False 57 | for t in ax.yaxis.get_major_ticks(): 58 | t.tick1On = False 59 | t.tick2On = False 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /month_barplot.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.ticker as ticker 6 | 7 | import os 8 | 9 | os.chdir("D:/Rdata/Third_paper/third_paper_data/") 10 | dat_nanjing=pd.read_excel("nanjing_month_week.xlsx",index_col=[0]) 11 | 12 | month1=dat_nanjing['20140101':'20140131']['E_demand'].sum() 13 | month2=dat_nanjing['20140201':'20140228']['E_demand'].sum() 14 | month3=dat_nanjing['20140301':'20140331']['E_demand'].sum() 15 | month4=dat_nanjing['20140401':'20140430']['E_demand'].sum() 16 | month5=dat_nanjing['20140501':'20140531']['E_demand'].sum() 17 | month6=dat_nanjing['20140601':'20140630']['E_demand'].sum() 18 | month7=dat_nanjing['20140701':'20140731']['E_demand'].sum() 19 | month8=dat_nanjing['20140801':'20140831']['E_demand'].sum() 20 | month9=dat_nanjing['20140901':'20140930']['E_demand'].sum() 21 | month10=dat_nanjing['20141001':'20141031']['E_demand'].sum() 22 | month11=dat_nanjing['20141101':'20141130']['E_demand'].sum() 23 | month12=dat_nanjing['20141201':'20141231']['E_demand'].sum() 24 | 25 | y=[month1,month2,month3,month4,month5,month6,month7,month8,month9,month10,month11,month12] 26 | 27 | N=len(y) 28 | 29 | x=range(N) 30 | #name_list = ('January', 'February', 'March', 'April', 31 | # 'May','June','July','August','September', 32 | #'October','November','December') 33 | 34 | 35 | name_list = ('Jan', 'Feb', 'Mar', 'Apr', 36 | 'May','Jun','Jul','Aug','Sep', 37 | 'Oct','Nov','Dec') 38 | 39 | pos_list = np.arange(len(name_list)) 40 | ax = plt.axes() 41 | ax.xaxis.set_major_locator(ticker.FixedLocator((pos_list))) 42 | ax.xaxis.set_major_formatter(ticker.FixedFormatter((name_list))) 43 | ax.spines['right'].set_visible(False) 44 | ax.spines['top'].set_visible(False) 45 | ax.yaxis.set_ticks_position('left') 46 | ax.xaxis.set_ticks_position('bottom') 47 | plt.bar(x,y,width=0.7,align='center',color='darkorange',edgecolor='darkorange') 48 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 49 | plt.xlabel('Month') 50 | plt.ylabel('Electricity consumption(KWh)') 51 | #plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5]) 52 | plt.title('Monthly electricity consumption in Nanjing') 53 | plt.bar 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /nanjing_week_month.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoguozhi/paper_code/eec3042cc81e9706f5a64787f6c869f9bd1ee13c/nanjing_week_month.png -------------------------------------------------------------------------------- /predict_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.dates as mdates 5 | from sklearn.neighbors import KernelDensity 6 | from scipy.stats import norm 7 | from scipy import stats 8 | import os 9 | 10 | os.chdir("D:/Rdata/Third_paper/third_paper_data/") 11 | 12 | pre_dat=pd.read_excel('nanjing_pre.xlsx',index_col=[0]) 13 | 14 | pre_dat_1=pre_dat[356:365] 15 | #pre_dat_1=pre_dat 16 | #pre_dat_1.plot(kind='line',ylim=(10000,30000),) 17 | 18 | 19 | fig=plt.figure(figsize=(10,5)) 20 | ax1=fig.add_subplot(111) 21 | #plt.figure(figsize=(10,5)) 22 | ax1.plot(pre_dat_1['E_demand'],label='True value',linewidth=3) 23 | ax1.plot(pre_dat_1['DL'],label='Deep learning',linewidth=2,linestyle='dashed',marker='o') 24 | ax1.plot(pre_dat_1['GBM'],label='Gradient boosting',linewidth=2,linestyle='dashed',marker='d') 25 | ax1.plot(pre_dat_1['RF'],label='Random forest',linewidth=2,linestyle='dashed',marker='p') 26 | ax1.set_ylim(10000,30000) 27 | plt.xlabel('Date(day)') 28 | plt.ylabel('Electricity consumption(KWh)') 29 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 30 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=1)) 31 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 32 | #plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 33 | ax1.legend(loc='upper left') 34 | 35 | 36 | 37 | 38 | ############################################################ 39 | #####plot for predict 40 | fig=plt.figure(figsize=(10,5)) 41 | ax1=fig.add_subplot(111) 42 | #plt.figure(figsize=(10,5)) 43 | ax1.plot(pre_dat_1['E_demand'],label='True value',linewidth=3) 44 | ax1.plot(pre_dat_1['DL'],label='Deep learning',linewidth=2,linestyle='dashed') 45 | ax1.plot(pre_dat_1['GBM'],label='Gradient boosting',linewidth=2,linestyle='dashed') 46 | ax1.plot(pre_dat_1['RF'],label='Random forest',linewidth=2,linestyle='dashed') 47 | ax1.axvline(pre_dat_1.index[335],color='r',linewidth=3) 48 | ax1.set_ylim(5000,40000) 49 | plt.xlabel('Date(day)') 50 | plt.ylabel('Electricity consumption(KWh)') 51 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 52 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16)) 53 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 54 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 55 | ax1.legend(loc='upper left') 56 | 57 | 58 | 59 | ################################################################ 60 | #######plot for real value for Lianyungang 61 | 62 | real_dat=pd.read_excel('lianyungang_total.xlsx',index_col=[0]) 63 | 64 | fig=plt.figure(figsize=(10,5)) 65 | ax1=fig.add_subplot(111) 66 | #plt.figure(figsize=(10,5)) 67 | ax1.plot(real_dat['value'],label='True value',linewidth=3) 68 | ax1.set_ylim(5000,40000) 69 | plt.xlabel('Date(day)') 70 | plt.ylabel('Electricity consumption(KWh)') 71 | plt.title('Electricity consumption in Lianyungang in 2014') 72 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 73 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16)) 74 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 75 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 76 | ax1.legend(loc='upper left') 77 | 78 | 79 | ################################################################ 80 | #######plot for real value for Suzhou 81 | 82 | real_dat=pd.read_excel('suzhou_total.xlsx',index_col=[0]) 83 | 84 | fig=plt.figure(figsize=(10,5)) 85 | ax1=fig.add_subplot(111) 86 | #plt.figure(figsize=(10,5)) 87 | ax1.plot(real_dat['value'],label='True value',linewidth=3) 88 | ax1.set_ylim(5000,40000) 89 | plt.xlabel('Date(day)') 90 | plt.ylabel('Electricity consumption(KWh)') 91 | plt.title('Electricity consumption in Suzhou in 2014') 92 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 93 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16)) 94 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 95 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 96 | ax1.legend(loc='upper left') 97 | 98 | ################################################################ 99 | #######plot for real value for Nanjing 100 | 101 | real_dat=pd.read_excel('nanjing_total.xlsx',index_col=[0]) 102 | 103 | fig=plt.figure(figsize=(10,5)) 104 | ax1=fig.add_subplot(111) 105 | #plt.figure(figsize=(10,5)) 106 | ax1.plot(real_dat['value'],label='True value',linewidth=3) 107 | ax1.set_ylim(5000,40000) 108 | plt.xlabel('Date(day)') 109 | plt.ylabel('Electricity consumption(KWh)') 110 | plt.title('Electricity consumption in Nanjing in 2014') 111 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 112 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=16)) 113 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 114 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 115 | ax1.legend(loc='upper left') 116 | 117 | 118 | 119 | 120 | ########################################################################### 121 | #############wendu and power demand in Nanjing 122 | 123 | total_dat=pd.read_excel('nanjing_total_feature.xlsx',index_col=[0]) 124 | total_dat['average_person']=total_dat['E_demand']/3000 125 | 126 | total_dat1=total_dat['20140601':'20140831'] 127 | 128 | fig=plt.figure(figsize=(10,5)) 129 | ax1=fig.add_subplot(111) 130 | 131 | ax1.plot(total_dat1['average_person'],label='Electricity consumption',linewidth=3) 132 | ax1.plot(total_dat1['wdh_var1(t)'],label='Temperature',linewidth=3,color='m') 133 | ax1.axvline(x=total_dat1.index[64],color='black',linewidth=1,linestyle='dashed',label='Local high temperature') 134 | ax1.axvline(x=total_dat1.index[39],color='black',linewidth=1,linestyle='dashed') 135 | ax1.axvline(x=total_dat1.index[50],color='black',linewidth=1,linestyle='dashed') 136 | ax1.axvline(x=total_dat1.index[59],color='black',linewidth=1,linestyle='dashed') 137 | 138 | ax1.set_ylim(0,60) 139 | plt.xlabel('Date(day)') 140 | plt.ylabel('Electricity consumption(KWh) and temperature') 141 | plt.title('Electricity consumption and temperature in Nanjing') 142 | #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 143 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=4)) 144 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 145 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 146 | ax1.legend(loc='upper left') 147 | 148 | 149 | ########################################################################### 150 | #############wendu and power demand in Suzhou 151 | 152 | total_dat=pd.read_excel('suzhou_total_feature.xlsx',index_col=[0]) 153 | total_dat['average_person']=total_dat['E_demand']/3000 154 | 155 | total_dat1=total_dat['20140601':'20140831'] 156 | 157 | fig=plt.figure(figsize=(10,5)) 158 | ax1=fig.add_subplot(111) 159 | 160 | ax1.plot(total_dat1['average_person'],label='Electricity consumption',linewidth=3) 161 | ax1.plot(total_dat1['wdh_var1(t)'],label='Temperature',linewidth=3,color='m') 162 | ax1.axvline(x=total_dat1.index[64],color='black',linewidth=1,linestyle='dashed',label='Local high temperature') 163 | ax1.axvline(x=total_dat1.index[39],color='black',linewidth=1,linestyle='dashed') 164 | ax1.axvline(x=total_dat1.index[49],color='black',linewidth=1,linestyle='dashed') 165 | ax1.axvline(x=total_dat1.index[59],color='black',linewidth=1,linestyle='dashed') 166 | ax1.axvline(x=total_dat1.index[84],color='black',linewidth=1,linestyle='dashed') 167 | 168 | 169 | ax1.set_ylim(0,60) 170 | plt.xlabel('Date(day)') 171 | plt.ylabel('Electricity consumption(KWh) and temperature') 172 | plt.title('Electricity consumption and temperature in Suzhou') 173 | #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 174 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=4)) 175 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 176 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 177 | ax1.legend(loc='upper left') 178 | 179 | 180 | 181 | ########################################################################### 182 | #############wendu and power demand in Lianyungang 183 | 184 | total_dat=pd.read_excel('lianyungang_total_feature.xlsx',index_col=[0]) 185 | total_dat['average_person']=total_dat['E_demand']/3000 186 | 187 | total_dat1=total_dat['20140601':'20140831'] 188 | 189 | fig=plt.figure(figsize=(10,5)) 190 | ax1=fig.add_subplot(111) 191 | 192 | ax1.plot(total_dat1['average_person'],label='Electricity consumption',linewidth=3) 193 | ax1.plot(total_dat1['wdh_var1(t)'],label='Temperature',linewidth=3,color='m') 194 | ax1.axvline(x=total_dat1.index[64],color='black',linewidth=1,linestyle='dashed',label='Local high temperature') 195 | ax1.axvline(x=total_dat1.index[29],color='black',linewidth=1,linestyle='dashed') 196 | ax1.axvline(x=total_dat1.index[50],color='black',linewidth=1,linestyle='dashed') 197 | ax1.axvline(x=total_dat1.index[82],color='black',linewidth=1,linestyle='dashed') 198 | 199 | 200 | ax1.set_ylim(0,60) 201 | plt.xlabel('Date(day)') 202 | plt.ylabel('Electricity consumption(KWh) and temperature') 203 | plt.title('Electricity consumption and temperature in Lianyungang') 204 | #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 205 | ax1.xaxis.set_major_locator(mdates.DayLocator(interval=4)) 206 | ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d')) 207 | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45) 208 | ax1.legend(loc='upper left') 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | ################################################################## 223 | ####南京 224 | 225 | dat_density=pd.read_csv('nanjing_quantile_m.csv',index_col=[0]) 226 | 227 | dat_density=dat_density.T 228 | 229 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values) 230 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values) 231 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values) 232 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values) 233 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values) 234 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values) 235 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values) 236 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values) 237 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values) 238 | 239 | fig, ax = plt.subplots(nrows=3, ncols=3) 240 | fig.set_size_inches(20,15) 241 | 242 | x=np.linspace(10000, 30000, 99) 243 | 244 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel') 245 | ax[0,0].axvline(x=18404,color='r',linewidth=3) 246 | ax[0,0].set_title('2014-12-22') 247 | 248 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel') 249 | ax[0,1].axvline(x=18188,color='r',linewidth=3) 250 | ax[0,1].set_title('2014-12-23') 251 | 252 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel') 253 | ax[0,2].axvline(x=17003,color='r',linewidth=3) 254 | ax[0,2].set_title('2014-12-24') 255 | 256 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel') 257 | ax[1,0].axvline(x=17997,color='r',linewidth=3) 258 | ax[1,0].set_title('2014-12-25') 259 | 260 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel') 261 | ax[1,1].axvline(x=17714,color='r',linewidth=3) 262 | ax[1,1].set_title('2014-12-26') 263 | 264 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel') 265 | ax[1,2].axvline(x=18576,color='r',linewidth=3) 266 | ax[1,2].set_title('2014-12-27') 267 | 268 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel') 269 | ax[2,0].axvline(x=18963,color='r',linewidth=3) 270 | ax[2,0].set_title('2014-12-28') 271 | 272 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel') 273 | ax[2,1].axvline(x=16182,color='r',linewidth=3) 274 | ax[2,1].set_title('2014-12-29') 275 | 276 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel') 277 | ax[2,2].axvline(x=15920,color='r',linewidth=3) 278 | ax[2,2].set_title('2014-12-30') 279 | 280 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 281 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 282 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 283 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 284 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 285 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 286 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 287 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 288 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 289 | 290 | ax[0,0].set_xlabel('Electricity consumption(KWh)') 291 | ax[0,0].set_ylabel('Probability density') 292 | ax[0,0].legend(loc='upper right') 293 | 294 | ax[0,1].set_xlabel('Electricity consumption(KWh)') 295 | ax[0,1].set_ylabel('Probability density') 296 | ax[0,1].legend(loc='upper right') 297 | 298 | ax[0,2].set_xlabel('Electricity consumption(KWh)') 299 | ax[0,2].set_ylabel('Probability density') 300 | ax[0,2].legend(loc='upper right') 301 | 302 | ax[1,0].set_xlabel('Electricity consumption(KWh)') 303 | ax[1,0].set_ylabel('Probability density') 304 | ax[1,0].legend(loc='upper right') 305 | 306 | ax[1,1].set_xlabel('Electricity consumption(KWh)') 307 | ax[1,1].set_ylabel('Probability density') 308 | ax[1,1].legend(loc='upper right') 309 | 310 | ax[1,2].set_xlabel('Electricity consumption(KWh)') 311 | ax[1,2].set_ylabel('Probability density') 312 | ax[1,2].legend(loc='upper right') 313 | 314 | ax[2,0].set_xlabel('Electricity consumption(KWh)') 315 | ax[2,0].set_ylabel('Probability density') 316 | ax[2,0].legend(loc='upper right') 317 | 318 | ax[2,1].set_xlabel('Electricity consumption(KWh)') 319 | ax[2,1].set_ylabel('Probability density') 320 | ax[2,1].legend(loc='upper right') 321 | 322 | ax[2,2].set_xlabel('Electricity consumption(KWh)') 323 | ax[2,2].set_ylabel('Probability density') 324 | ax[2,2].legend(loc='upper right') 325 | 326 | ################################################################################ 327 | ##############苏州########################## 328 | dat_density=pd.read_csv('suzhou_quantile_m.csv',index_col=[0]) 329 | 330 | dat_density=dat_density.T 331 | 332 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values) 333 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values) 334 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values) 335 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values) 336 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values) 337 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values) 338 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values) 339 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values) 340 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values) 341 | 342 | fig, ax = plt.subplots(nrows=3, ncols=3) 343 | fig.set_size_inches(20,15) 344 | 345 | x=np.linspace(10000, 30000, 99) 346 | 347 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel') 348 | ax[0,0].axvline(x=20071,color='r',linewidth=3) 349 | ax[0,0].set_title('2014-12-22') 350 | 351 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel') 352 | ax[0,1].axvline(x=19069,color='r',linewidth=3) 353 | ax[0,1].set_title('2014-12-23') 354 | 355 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel') 356 | ax[0,2].axvline(x=18218,color='r',linewidth=3) 357 | ax[0,2].set_title('2014-12-24') 358 | 359 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel') 360 | ax[1,0].axvline(x=18874,color='r',linewidth=3) 361 | ax[1,0].set_title('2014-12-25') 362 | 363 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel') 364 | ax[1,1].axvline(x=18487,color='r',linewidth=3) 365 | ax[1,1].set_title('2014-12-26') 366 | 367 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel') 368 | ax[1,2].axvline(x=19158,color='r',linewidth=3) 369 | ax[1,2].set_title('2014-12-27') 370 | 371 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel') 372 | ax[2,0].axvline(x=20061,color='r',linewidth=3) 373 | ax[2,0].set_title('2014-12-28') 374 | 375 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel') 376 | ax[2,1].axvline(x=17648,color='r',linewidth=3) 377 | ax[2,1].set_title('2014-12-29') 378 | 379 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel') 380 | ax[2,2].axvline(x=17474,color='r',linewidth=3) 381 | ax[2,2].set_title('2014-12-30') 382 | 383 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 384 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 385 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 386 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 387 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 388 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 389 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 390 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 391 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 392 | 393 | ax[0,0].set_xlabel('Electricity consumption(KWh)') 394 | ax[0,0].set_ylabel('Probability density') 395 | ax[0,0].legend(loc='upper right') 396 | 397 | ax[0,1].set_xlabel('Electricity consumption(KWh)') 398 | ax[0,1].set_ylabel('Probability density') 399 | ax[0,1].legend(loc='upper right') 400 | 401 | ax[0,2].set_xlabel('Electricity consumption(KWh)') 402 | ax[0,2].set_ylabel('Probability density') 403 | ax[0,2].legend(loc='upper right') 404 | 405 | ax[1,0].set_xlabel('Electricity consumption(KWh)') 406 | ax[1,0].set_ylabel('Probability density') 407 | ax[1,0].legend(loc='upper right') 408 | 409 | ax[1,1].set_xlabel('Electricity consumption(KWh)') 410 | ax[1,1].set_ylabel('Probability density') 411 | ax[1,1].legend(loc='upper right') 412 | 413 | ax[1,2].set_xlabel('Electricity consumption(KWh)') 414 | ax[1,2].set_ylabel('Probability density') 415 | ax[1,2].legend(loc='upper right') 416 | 417 | ax[2,0].set_xlabel('Electricity consumption(KWh)') 418 | ax[2,0].set_ylabel('Probability density') 419 | ax[2,0].legend(loc='upper right') 420 | 421 | ax[2,1].set_xlabel('Electricity consumption(KWh)') 422 | ax[2,1].set_ylabel('Probability density') 423 | ax[2,1].legend(loc='upper right') 424 | 425 | ax[2,2].set_xlabel('Electricity consumption(KWh)') 426 | ax[2,2].set_ylabel('Probability density') 427 | ax[2,2].legend(loc='upper right') 428 | 429 | 430 | 431 | ################################################################################ 432 | ##############连云港########################## 433 | dat_density=pd.read_csv('lianyungang_quantile_m.csv',index_col=[0]) 434 | 435 | dat_density=dat_density.T 436 | 437 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values) 438 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values) 439 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values) 440 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values) 441 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values) 442 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values) 443 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values) 444 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values) 445 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values) 446 | 447 | fig, ax = plt.subplots(nrows=3, ncols=3) 448 | fig.set_size_inches(20,15) 449 | 450 | x=np.linspace(10000, 30000, 99) 451 | 452 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel') 453 | ax[0,0].axvline(x=15172,color='r',linewidth=3) 454 | ax[0,0].set_title('2014-12-22') 455 | 456 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel') 457 | ax[0,1].axvline(x=14331,color='r',linewidth=3) 458 | ax[0,1].set_title('2014-12-23') 459 | 460 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel') 461 | ax[0,2].axvline(x=13838,color='r',linewidth=3) 462 | ax[0,2].set_title('2014-12-24') 463 | 464 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel') 465 | ax[1,0].axvline(x=14283,color='r',linewidth=3) 466 | ax[1,0].set_title('2014-12-25') 467 | 468 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel') 469 | ax[1,1].axvline(x=14585,color='r',linewidth=3) 470 | ax[1,1].set_title('2014-12-26') 471 | 472 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel') 473 | ax[1,2].axvline(x=14723,color='r',linewidth=3) 474 | ax[1,2].set_title('2014-12-27') 475 | 476 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel') 477 | ax[2,0].axvline(x=14979,color='r',linewidth=3) 478 | ax[2,0].set_title('2014-12-28') 479 | 480 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel') 481 | ax[2,1].axvline(x=13440,color='r',linewidth=3) 482 | ax[2,1].set_title('2014-12-29') 483 | 484 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel') 485 | ax[2,2].axvline(x=13224,color='r',linewidth=3) 486 | ax[2,2].set_title('2014-12-30') 487 | 488 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 489 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 490 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 491 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 492 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 493 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 494 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 495 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 496 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 497 | 498 | ax[0,0].set_xlabel('Electricity consumption(KWh)') 499 | ax[0,0].set_ylabel('Probability density') 500 | ax[0,0].legend(loc='upper right') 501 | 502 | ax[0,1].set_xlabel('Electricity consumption(KWh)') 503 | ax[0,1].set_ylabel('Probability density') 504 | ax[0,1].legend(loc='upper right') 505 | 506 | ax[0,2].set_xlabel('Electricity consumption(KWh)') 507 | ax[0,2].set_ylabel('Probability density') 508 | ax[0,2].legend(loc='upper right') 509 | 510 | ax[1,0].set_xlabel('Electricity consumption(KWh)') 511 | ax[1,0].set_ylabel('Probability density') 512 | ax[1,0].legend(loc='upper right') 513 | 514 | ax[1,1].set_xlabel('Electricity consumption(KWh)') 515 | ax[1,1].set_ylabel('Probability density') 516 | ax[1,1].legend(loc='upper right') 517 | 518 | ax[1,2].set_xlabel('Electricity consumption(KWh)') 519 | ax[1,2].set_ylabel('Probability density') 520 | ax[1,2].legend(loc='upper right') 521 | 522 | ax[2,0].set_xlabel('Electricity consumption(KWh)') 523 | ax[2,0].set_ylabel('Probability density') 524 | ax[2,0].legend(loc='upper right') 525 | 526 | ax[2,1].set_xlabel('Electricity consumption(KWh)') 527 | ax[2,1].set_ylabel('Probability density') 528 | ax[2,1].legend(loc='upper right') 529 | 530 | ax[2,2].set_xlabel('Electricity consumption(KWh)') 531 | ax[2,2].set_ylabel('Probability density') 532 | ax[2,2].legend(loc='upper right') 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | -------------------------------------------------------------------------------- /quantile_pre.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | rm(list = ls()) 4 | setwd("D:/Rdata/Third_paper/third_paper_data") 5 | 6 | library(readxl) 7 | library(h2o) 8 | library(dplyr) 9 | 10 | h2o.init(nthreads = -1) 11 | 12 | set.seed(12345) 13 | 14 | dat<-read_excel("lianyungang_total_feature.xlsx") 15 | 16 | dim(dat) 17 | 18 | 19 | dat1<-na.omit(dat) 20 | 21 | dat2<-dat1[,2:58] 22 | 23 | names(dat2) 24 | 25 | month<-as.factor(dat1$month) 26 | 27 | day_of_w<-as.factor(dat1$day_of_w) 28 | 29 | air<-as.factor(dat1$air) 30 | 31 | rain<-as.factor(dat1$rain) 32 | 33 | 34 | levels(air)[levels(air)=="优"]<-"6" 35 | levels(air)[levels(air)=="良"]<-"5" 36 | levels(air)[levels(air)=="轻度污染"]<-"4" 37 | levels(air)[levels(air)=="中度污染"]<-"3" 38 | levels(air)[levels(air)=="重度污染"]<-"2" 39 | levels(air)[levels(air)=="严重污染"]<-"1" 40 | 41 | air<-as.numeric(air) 42 | 43 | levels(month)[levels(month)=="January"]<-"1" 44 | levels(month)[levels(month)=="February"]<-"2" 45 | levels(month)[levels(month)=="March"]<-"3" 46 | 47 | levels(month)[levels(month)=="April"]<-"4" 48 | levels(month)[levels(month)=="May"]<-"5" 49 | 50 | levels(month)[levels(month)=="June"]<-"6" 51 | levels(month)[levels(month)=="July"]<-"7" 52 | levels(month)[levels(month)=="August"]<-"8" 53 | levels(month)[levels(month)=="September"]<-"9" 54 | levels(month)[levels(month)=="October"]<-"10" 55 | levels(month)[levels(month)=="November"]<-"11" 56 | levels(month)[levels(month)=="December"]<-"12" 57 | 58 | levels(month) 59 | 60 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1" 61 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2" 62 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3" 63 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4" 64 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5" 65 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6" 66 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7" 67 | 68 | levels(day_of_w) 69 | 70 | dat2$month<-month 71 | dat2$air<-air 72 | dat2$rain<-rain 73 | dat2$day_of_w<-day_of_w 74 | 75 | 76 | 77 | 78 | y <- "E_demand" #response column: digits 0-9 79 | x <- setdiff(names(dat2), y) #vector of predictor column names 80 | 81 | dat3<-as.h2o(dat2,destination_frame = "dat3") 82 | 83 | dat4<-dat3[1:333,] 84 | 85 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345) 86 | 87 | 88 | 89 | 90 | # first part of the data, without labels for unsupervised learning 91 | train <- splits[[1]] 92 | 93 | # second part of the data, with labels for supervised learning 94 | valid <- splits[[2]] 95 | 96 | test<-dat3[334:358,] 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | pred_all<-list() 111 | taus<-seq(from=0.01,to=0.99,length=99) 112 | quantile_pre_time_1<-Sys.time() 113 | for(i in 1:99) 114 | { 115 | m1 <- h2o.deeplearning( 116 | model_id="dl_model_first", 117 | training_frame=train, 118 | validation_frame=valid, ## validation dataset: used for scoring and early stopping 119 | x=x, 120 | y=y, 121 | #activation="Rectifier", ## default 122 | hidden=c(200,200,200), ## default: 2 hidden layers with 200 neurons each 123 | epochs=500, 124 | variable_importances=T , 125 | distribution = 'quantile', 126 | quantile_alpha = taus[i], 127 | seed = 12345 128 | ## not enabled by default 129 | ) 130 | pred <- h2o.predict(m1, newdata = test) 131 | pred <-as.data.frame(pred$predict) 132 | names(pred)<-paste("quantile",taus,sep = "_")[i] 133 | pred_all[[i]]<-pred 134 | } 135 | quantile_pre_time_2<-Sys.time() 136 | 137 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1) 138 | 139 | 140 | 141 | 142 | data<-as.data.frame(pred_all) 143 | 144 | write.csv(data,file="lianyungang_quantile_25m.csv") 145 | 146 | rm(list = ls()) 147 | 148 | 149 | dat<-read_excel("suzhou_total_feature.xlsx") 150 | 151 | dim(dat) 152 | 153 | #dat<-h2o.importFile(path = "total_feature.xlsx",destination_frame = "dat") 154 | #dat1<-as.data.frame(dat) 155 | dat1<-na.omit(dat) 156 | 157 | dat2<-dat1[,2:58] 158 | 159 | names(dat2) 160 | 161 | month<-as.factor(dat1$month) 162 | 163 | day_of_w<-as.factor(dat1$day_of_w) 164 | 165 | air<-as.factor(dat1$air) 166 | 167 | rain<-as.factor(dat1$rain) 168 | 169 | 170 | levels(air)[levels(air)=="优"]<-"6" 171 | levels(air)[levels(air)=="良"]<-"5" 172 | levels(air)[levels(air)=="轻度污染"]<-"4" 173 | levels(air)[levels(air)=="中度污染"]<-"3" 174 | levels(air)[levels(air)=="重度污染"]<-"2" 175 | levels(air)[levels(air)=="严重污染"]<-"1" 176 | 177 | air<-as.numeric(air) 178 | 179 | levels(month)[levels(month)=="January"]<-"1" 180 | levels(month)[levels(month)=="February"]<-"2" 181 | levels(month)[levels(month)=="March"]<-"3" 182 | 183 | levels(month)[levels(month)=="April"]<-"4" 184 | levels(month)[levels(month)=="May"]<-"5" 185 | 186 | levels(month)[levels(month)=="June"]<-"6" 187 | levels(month)[levels(month)=="July"]<-"7" 188 | levels(month)[levels(month)=="August"]<-"8" 189 | levels(month)[levels(month)=="September"]<-"9" 190 | levels(month)[levels(month)=="October"]<-"10" 191 | levels(month)[levels(month)=="November"]<-"11" 192 | levels(month)[levels(month)=="December"]<-"12" 193 | 194 | levels(month) 195 | 196 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1" 197 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2" 198 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3" 199 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4" 200 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5" 201 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6" 202 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7" 203 | 204 | levels(day_of_w) 205 | 206 | dat2$month<-month 207 | dat2$air<-air 208 | dat2$rain<-rain 209 | dat2$day_of_w<-day_of_w 210 | 211 | 212 | 213 | 214 | y <- "E_demand" #response column: digits 0-9 215 | x <- setdiff(names(dat2), y) #vector of predictor column names 216 | 217 | dat3<-as.h2o(dat2,destination_frame = "dat3") 218 | 219 | dat4<-dat3[1:333,] 220 | 221 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345) 222 | 223 | 224 | 225 | 226 | # first part of the data, without labels for unsupervised learning 227 | train <- splits[[1]] 228 | 229 | # second part of the data, with labels for supervised learning 230 | valid <- splits[[2]] 231 | 232 | test<-dat3[334:358,] 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | pred_all<-list() 247 | taus<-seq(from=0.01,to=0.99,length=99) 248 | quantile_pre_time_1<-Sys.time() 249 | for(i in 1:99) 250 | { 251 | m1 <- h2o.deeplearning( 252 | model_id="dl_model_first", 253 | training_frame=train, 254 | validation_frame=valid, ## validation dataset: used for scoring and early stopping 255 | x=x, 256 | y=y, 257 | #activation="Rectifier", ## default 258 | hidden=c(200,200,200), ## default: 2 hidden layers with 200 neurons each 259 | epochs=500, 260 | variable_importances=T , 261 | distribution = 'quantile', 262 | quantile_alpha = taus[i], 263 | seed = 12345 264 | ## not enabled by default 265 | ) 266 | pred <- h2o.predict(m1, newdata = test) 267 | pred <-as.data.frame(pred$predict) 268 | names(pred)<-paste("quantile",taus,sep = "_")[i] 269 | pred_all[[i]]<-pred 270 | } 271 | quantile_pre_time_2<-Sys.time() 272 | 273 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1) 274 | 275 | 276 | 277 | 278 | data<-as.data.frame(pred_all) 279 | 280 | write.csv(data,file="suzhou_quantile_25m.csv") 281 | 282 | rm(list = ls()) 283 | 284 | 285 | 286 | dat<-read_excel("nanjing_total_feature.xlsx") 287 | 288 | dim(dat) 289 | 290 | #dat<-h2o.importFile(path = "total_feature.xlsx",destination_frame = "dat") 291 | #dat1<-as.data.frame(dat) 292 | dat1<-na.omit(dat) 293 | 294 | dat2<-dat1[,2:58] 295 | 296 | names(dat2) 297 | 298 | month<-as.factor(dat1$month) 299 | 300 | day_of_w<-as.factor(dat1$day_of_w) 301 | 302 | air<-as.factor(dat1$air) 303 | 304 | rain<-as.factor(dat1$rain) 305 | 306 | 307 | levels(air)[levels(air)=="优"]<-"6" 308 | levels(air)[levels(air)=="良"]<-"5" 309 | levels(air)[levels(air)=="轻度污染"]<-"4" 310 | levels(air)[levels(air)=="中度污染"]<-"3" 311 | levels(air)[levels(air)=="重度污染"]<-"2" 312 | levels(air)[levels(air)=="严重污染"]<-"1" 313 | 314 | air<-as.numeric(air) 315 | 316 | levels(month)[levels(month)=="January"]<-"1" 317 | levels(month)[levels(month)=="February"]<-"2" 318 | levels(month)[levels(month)=="March"]<-"3" 319 | 320 | levels(month)[levels(month)=="April"]<-"4" 321 | levels(month)[levels(month)=="May"]<-"5" 322 | 323 | levels(month)[levels(month)=="June"]<-"6" 324 | levels(month)[levels(month)=="July"]<-"7" 325 | levels(month)[levels(month)=="August"]<-"8" 326 | levels(month)[levels(month)=="September"]<-"9" 327 | levels(month)[levels(month)=="October"]<-"10" 328 | levels(month)[levels(month)=="November"]<-"11" 329 | levels(month)[levels(month)=="December"]<-"12" 330 | 331 | levels(month) 332 | 333 | levels(day_of_w)[levels(day_of_w)=="Monday"]<-"1" 334 | levels(day_of_w)[levels(day_of_w)=="Tuesday"]<-"2" 335 | levels(day_of_w)[levels(day_of_w)=="Wednesday"]<-"3" 336 | levels(day_of_w)[levels(day_of_w)=="Thursday"]<-"4" 337 | levels(day_of_w)[levels(day_of_w)=="Friday"]<-"5" 338 | levels(day_of_w)[levels(day_of_w)=="Saturday"]<-"6" 339 | levels(day_of_w)[levels(day_of_w)=="Sunday"]<-"7" 340 | 341 | levels(day_of_w) 342 | 343 | dat2$month<-month 344 | dat2$air<-air 345 | dat2$rain<-rain 346 | dat2$day_of_w<-day_of_w 347 | 348 | 349 | 350 | 351 | y <- "E_demand" #response column: digits 0-9 352 | x <- setdiff(names(dat2), y) #vector of predictor column names 353 | 354 | dat3<-as.h2o(dat2,destination_frame = "dat3") 355 | 356 | dat4<-dat3[1:333,] 357 | 358 | splits <- h2o.splitFrame(dat4, ratios = 0.8, seed = 12345) 359 | 360 | 361 | 362 | 363 | # first part of the data, without labels for unsupervised learning 364 | train <- splits[[1]] 365 | 366 | # second part of the data, with labels for supervised learning 367 | valid <- splits[[2]] 368 | 369 | test<-dat3[334:358,] 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | pred_all<-list() 384 | taus<-seq(from=0.01,to=0.99,length=99) 385 | quantile_pre_time_1<-Sys.time() 386 | for(i in 1:99) 387 | { 388 | m1 <- h2o.deeplearning( 389 | model_id="dl_model_first", 390 | training_frame=train, 391 | validation_frame=valid, ## validation dataset: used for scoring and early stopping 392 | x=x, 393 | y=y, 394 | #activation="Rectifier", ## default 395 | hidden=c(200,200,200), ## default: 2 hidden layers with 200 neurons each 396 | epochs=500, 397 | variable_importances=T , 398 | distribution = 'quantile', 399 | quantile_alpha = taus[i], 400 | seed = 12345 401 | ## not enabled by default 402 | ) 403 | pred <- h2o.predict(m1, newdata = test) 404 | pred <-as.data.frame(pred$predict) 405 | names(pred)<-paste("quantile",taus,sep = "_")[i] 406 | pred_all[[i]]<-pred 407 | } 408 | quantile_pre_time_2<-Sys.time() 409 | 410 | quantile_run_time_2<-(quantile_pre_time_2-quantile_pre_time_1) 411 | 412 | 413 | 414 | data<-as.data.frame(pred_all) 415 | 416 | write.csv(data,file="nanjing_quantile_25m.csv") 417 | -------------------------------------------------------------------------------- /taibao/.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoguozhi/paper_code/eec3042cc81e9706f5a64787f6c869f9bd1ee13c/taibao/.RData -------------------------------------------------------------------------------- /taibao/EX-04-01.R: -------------------------------------------------------------------------------- 1 | ############################################################# 2 | # Description: 3 | # 1.for lecture 'my introduction to R' 4 | # 2.No.: CH-04, EX-01: 5 | # 3.Purpose: threshold mean regression 6 | # 4.Author: Qifa Xu 7 | # 5.Founded: Apr 09, 2015 8 | # 6.Revised: Apr 09, 2015 9 | # 7.Reference: 10 | # ########################################################### 11 | # Contents: 12 | # 1. generate data 13 | # 2. do regression with real data 14 | # 3. define functions 15 | # 4. find the optimal threshold 16 | # 5. estimate threshold regression model 17 | ######################################################### 18 | 19 | # 0. Initialize 20 | setwd("F:/programe/lecture/my introduction to R") 21 | rm(list = ls()) 22 | 23 | # 1. generate data 24 | beta <- c(3,2,5) 25 | threshold <- 0.3 26 | n <- 200 27 | x <- matrix(runif(n), nrow=n, ncol=1) 28 | eps <- rnorm(n=n) 29 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + eps/10 30 | dat <- data.frame(y=y, x1=x[,1]) 31 | plot(dat$x1, y) 32 | 33 | # 2. do regression with real data 34 | lm(y~x1+I(x1*(x1>threshold)), data=dat) 35 | 36 | # 3. define functions 37 | source('sub-01.R') 38 | 39 | # 4. find the optimal threshold 40 | (gamopt <- gamsearch.mr(var=dat$x1, dat)) 41 | 42 | # 5. estimate threshold regression model 43 | # (1) make model 44 | (thrmodel.mr <- lm(y~x1+I(x1*(x1>gamopt)), data=dat)) 45 | summary(thrmodel.mr) 46 | 47 | # (2) show results 48 | xs <- seq(min(dat$x1), max(dat$x1), length=500) 49 | ys.hat <- predict(thrmodel.mr, newdata=data.frame(x1=xs)) 50 | 51 | plot(dat$x1, y, xlab='x', ylab='y') 52 | lines(xs[xs=gamopt], ys.hat[xs>=gamopt], lwd=2, col='red') 54 | 55 | 56 | -------------------------------------------------------------------------------- /taibao/EX-04-02.R: -------------------------------------------------------------------------------- 1 | ############################################################# 2 | # Description: 3 | # 1.for lecture 'my introduction to R' 4 | # 2.No.: CH-04, EX-02: 5 | # 3.Purpose: threshold quantile regression 6 | # 4.Author: Qifa Xu 7 | # 5.Founded: Apr 09, 2015 8 | # 6.Revised: Apr 09, 2015 9 | # 7.Reference: 10 | # ########################################################### 11 | # Contents: 12 | # 1. generate data 13 | # 2. do regression with real data 14 | # 3. define functions 15 | # 4. find the optimal threshold 16 | # 5. estimate threshold regression model 17 | ######################################################### 18 | 19 | # 0. Initialize 20 | setwd("F:/programe/lecture/my introduction to R") 21 | rm(list = ls()) 22 | 23 | 24 | # 1. generate data 25 | beta <- c(3, 2, 5) 26 | threshold <- 0.3 27 | n <- 200 28 | x <- matrix(runif(n), nrow=n, ncol=1) 29 | eps <- rchisq(n=n, df=3) 30 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps 31 | dat <- data.frame(y=y, x1=x[,1]) 32 | plot(dat$x1, y) 33 | 34 | # 2. do regression with real data 35 | library(quantreg) 36 | taus <- seq(0.1, 0.9, by=0.2) 37 | rq(y~x1+I(x1*(x1>threshold)), tau=taus, data=dat) 38 | 39 | # 3. define functions 40 | source('sub-01.R') 41 | 42 | # 4. find the optimal threshold 43 | gamopt <- rep(NA, length=length(taus)) 44 | for (i in seq_along(taus)){ 45 | gamopt[i] <- gamsearch.qr(var=dat$x1, tau=taus[i], dat) 46 | } 47 | names(gamopt) <- paste('tau=', taus, sep='') 48 | print(gamopt) 49 | 50 | 51 | # 5. estimate threshold regression model 52 | # (1) make model 53 | (thrmodel.qr <- rq(y~x1+I(x1*(x1>gamopt[1])), tau=taus, data=dat)) 54 | summary(thrmodel.qr) 55 | coef(thrmodel.qr) # compare with those true values 56 | # eps <- rchisq(n=n, df=3) 57 | # y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps 58 | # Q.y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*F.inv(eps) 59 | (F.inv <-qchisq(p=taus, df=3)) 60 | beta[2] + F.inv # slopes in the lower interval 61 | beta[2] + beta[3] + F.inv # slopes in the upper interval 62 | 63 | # (2) show results 64 | xs <- seq(min(dat$x1), max(dat$x1), length=500) 65 | ys.hat <- predict(thrmodel.qr, newdata=data.frame(x1=xs)) 66 | 67 | plot(dat$x1, y, xlab='x', ylab='y') 68 | for (i in 1:length(taus)){ 69 | lines(xs[xs=gamopt], ys.hat[xs>=gamopt,i], lty=i, lwd=2) 71 | } 72 | 73 | 74 | -------------------------------------------------------------------------------- /taibao/IPAD.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoguozhi/paper_code/eec3042cc81e9706f5a64787f6c869f9bd1ee13c/taibao/IPAD.xlsx -------------------------------------------------------------------------------- /taibao/a.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /taibao/prog-00.R: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | # Description: 3 | # 1.for threshold regression simulation 4 | # 2.No.: 01 5 | # 3.Purpose: threshold mean regression 6 | # 4.Reference: non 7 | # 5.Author: Qifa Xu 8 | # 6.Founded: Mar 17, 2015. 9 | # 7.Revised: Mar 18, 2015. 10 | ######################################################## 11 | # Contents: 12 | # 1. generate data 13 | # 2. do regression with real data 14 | ######################################################### 15 | 16 | # 0. initialize 17 | setwd('F:/programe/paper/QR+goods') 18 | rm(list=ls()) 19 | 20 | # 1. generate data 21 | beta <- c(3,2,10) 22 | threshold <- 0.3 23 | n <- 200 24 | x <- matrix(runif(n), nrow=n, ncol=1) 25 | eps <- rnorm(n=n) 26 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + eps 27 | dat <- data.frame(y=y, x1=x[,1]) 28 | plot(dat$x1, y) 29 | 30 | # 2. do regression with real data 31 | lm(y~x1+I(x1*(x1>threshold)), data=dat) 32 | 33 | # 3. define functions 34 | source('sub-01.R') 35 | 36 | # 4. find the optimal threshold 37 | (gamopt <- gamsearch.mr(var=dat$x1, dat)) 38 | 39 | # 5. estimate threshold regression model 40 | # (1) make model 41 | (thrmodel.mr <- lm(y~x1+I(x1*(x1>gamopt)), data=dat)) 42 | summary(thrmodel.mr) 43 | 44 | # (2) show results 45 | xs <- seq(min(dat$x1), max(dat$x1), length=500) 46 | ys.hat <- predict(thrmodel.mr, newdata=data.frame(x1=xs)) 47 | 48 | plot(dat$x1, y, xlab='x', ylab='y') 49 | lines(xs[xs=gamopt], ys.hat[xs>=gamopt], lwd=2, col='red') 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /taibao/prog-01.R: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | # Description: 3 | # 1.for threshold regression simulation 4 | # 2.No.: 02 5 | # 3.Purpose: threshold quantile regression 6 | # 4.Reference: non 7 | # 5.Author: Qifa Xu 8 | # 6.Founded: Mar 17, 2015. 9 | # 7.Revised: Mar 18, 2015. 10 | ######################################################## 11 | # Contents: 12 | # 1. generate data 13 | # 2. do regression with real data 14 | ######################################################### 15 | 16 | # 0. initialize 17 | setwd('E:/QR+goods') 18 | rm(list=ls()) 19 | 20 | # 1. generate data 21 | beta <- c(3, 2, 5) 22 | threshold <- 0.3 23 | n <- 200 24 | x <- matrix(runif(n), nrow=n, ncol=1) 25 | eps <- rchisq(n=n, df=3) 26 | y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps/10 27 | dat <- data.frame(y=y, x1=x[,1]) 28 | plot(dat$x1, y) 29 | 30 | # 2. do regression with real data 31 | library(quantreg) 32 | taus <- seq(0.1, 0.9, by=0.2) 33 | rq(y~x1+I(x1*(x1>threshold)), tau=taus, data=dat) 34 | 35 | # 3. define functions 36 | source('sub-01.R') 37 | 38 | # 4. find the optimal threshold 39 | gamopt <- rep(NA, length=length(taus)) 40 | for (i in seq_along(taus)){ 41 | gamopt[i] <- gamsearch.qr(var=dat$x1, tau=taus[i], dat) 42 | } 43 | names(gamopt) <- paste('tau=', taus, sep='') 44 | print(gamopt) 45 | 46 | 47 | # 5. estimate threshold regression model 48 | # (1) make model 49 | (thrmodel.qr <- rq(y~x1+I(x1*(x1>gamopt[1])), tau=taus, data=dat)) 50 | summary(thrmodel.qr) 51 | coef(thrmodel.qr) # compare with those true values 52 | # eps <- rchisq(n=n, df=3) 53 | # y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps 54 | # Q.y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*F.inv(eps) 55 | (F.inv <-qchisq(p=taus, df=3)) 56 | beta[2] + F.inv # slopes in the lower interval 57 | beta[2] + beta[3] + F.inv # slopes in the upper interval 58 | 59 | # (2) show results 60 | xs <- seq(min(dat$x1), max(dat$x1), length=500) 61 | ys.hat <- predict(thrmodel.qr, newdata=data.frame(x1=xs)) 62 | 63 | plot(dat$x1, y, xlab='x', ylab='y') 64 | for (i in 1:length(taus)){ 65 | lines(xs[xs=gamopt], ys.hat[xs>=gamopt,i], lty=i, lwd=2) 67 | } 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /taibao/prog-01old.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | setwd('E:/QR+goods') 4 | rm(list=ls()) 5 | # 1. read data 6 | library(xlsx) 7 | dat <- read.xlsx(file='IPAD.xlsx', sheetName='all', startRow=1, endRow=436, colIndex=2:9) 8 | head(dat) 9 | class(dat) 10 | names(dat) <- c('credit', 'grade', 'popular', 'price', 'sale', 'RevAmou', 'RevGrad', 'No') 11 | summary(dat) 12 | 13 | # 2. data process 14 | dat <- na.omit(dat) 15 | 16 | # data <- data.frame() 17 | # for (j in 1:ncol(dat)){ 18 | # data[,j] <- as.data.frame(dat[,j]) 19 | # # data <- cbind(data, as.data.frame(dat[,j])) 20 | # } 21 | 22 | credit <- dat$credit 23 | grade <- dat$grade 24 | popular <- dat$popular 25 | price <- as.numeric(matrix(dat$price)) 26 | sale <- as.numeric(matrix(dat$sale)) 27 | RevAmou <- as.numeric(matrix(dat$RevAmou)) 28 | RevGrad <- dat$RevGrad 29 | No <- dat$No 30 | 31 | dat <- data.frame(sale, credit, grade, popular, price, RevAmou, RevGrad, No) 32 | 33 | plot(dat$sale~dat$price) 34 | 35 | 36 | cor(dat) 37 | 38 | # 3. make model in mean regression 39 | model.lm <- lm(sale~., data=dat) 40 | summary(model.lm) 41 | 42 | # 4. make model in quantile regression 43 | library(quantreg) 44 | taus <- seq(0.1, 0.9, length=5) 45 | model.rq <- rq(sale~., tau=taus, data=dat) 46 | plot(summary(model.rq)) 47 | summary(model.rq) 48 | 49 | 50 | # 5. make threshold model in mean regression 51 | # (1) define functions 52 | source('sub.R') 53 | 54 | # (2) find the optimal threshold 55 | (gamopt <- gamsearch(dat=dat)) 56 | 57 | # (3) estimate threshold model 58 | thrmodel.lm <- lm(sale~price+I(price*(price>gamopt))+grade+credit 59 | +popular+RevAmou+RevGrad+No, data=dat) 60 | summary(thrmodel.lm) 61 | round(coef(thrmodel.lm), digits=4) 62 | 63 | # 6. make threshold model in quantile regression 64 | # (1) define functions 65 | 66 | 67 | # (2) find the optimal threshold 68 | tau <- 0.7 69 | (gamopt.rq <- gamsearch.rq(dat=dat, tau=tau, var=price)) 70 | 71 | 72 | # (3) estimate threshold model 73 | thrmodel.rq <- rq(sale~price+I(price*(price>gamopt.rq))+grade+credit 74 | +popular+RevAmou+RevGrad+No, tau=tau, data=dat) 75 | summary(thrmodel.rq) 76 | -------------------------------------------------------------------------------- /taibao/prog-02.R: -------------------------------------------------------------------------------- 1 | ######################################################## 2 | # Description: 3 | # 1.for threshold regression simulation 4 | # 2.No.: 02 5 | # 3.Purpose: threshold quantile regression 6 | # 4.Reference: non 7 | # 5.Author: Qifa Xu 8 | # 6.Founded: Mar 17, 2015. 9 | # 7.Revised: Mar 18, 2015. 10 | ######################################################## 11 | # Contents: 12 | # 1. read data 13 | # 2. data process 14 | # 3. make model in mean regression 15 | # 4. make model in quantile regression 16 | # 5. make threshold model in mean regression 17 | # 6. make threshold model in quantile regression 18 | ######################################################### 19 | 20 | # 0. initialize 21 | setwd('E:/QR+goods') 22 | rm(list=ls()) 23 | 24 | # 1. read data 25 | library(xlsx) 26 | dat <- read.xlsx(file='IPAD.xlsx', sheetName='all', startRow=1, endRow=436, colIndex=2:9) 27 | head(dat) 28 | class(dat) 29 | names(dat) <- c('credit', 'grade', 'popular', 'price', 'sale', 'RevAmou', 'RevGrad', 'No') 30 | summary(dat) 31 | 32 | # 2. data process 33 | 34 | # data <- data.frame() 35 | # for (j in 1:ncol(dat)){ 36 | # data[,j] <- as.data.frame(dat[,j]) 37 | # # data <- cbind(data, as.data.frame(dat[,j])) 38 | # } 39 | 40 | credit <- dat$credit 41 | grade <- dat$grade 42 | popular <- dat$popular 43 | price <- as.numeric(matrix(dat$price)) 44 | sale <- as.numeric(matrix(dat$sale)) 45 | RevAmou <- as.numeric(matrix(dat$RevAmou)) 46 | RevGrad <- dat$RevGrad 47 | No <- dat$No 48 | 49 | dat <- data.frame(sale, credit, grade, popular, price, RevAmou, RevGrad, No) 50 | dat <- na.omit(dat) 51 | 52 | plot(dat$sale~dat$price) 53 | 54 | round(cor(dat), digits=4) 55 | 56 | # 3. make model in mean regression 57 | model.lm <- lm(sale~., data=dat) 58 | summary(model.lm) 59 | 60 | # 4. make model in quantile regression 61 | library(quantreg) 62 | taus <- seq(0.1, 0.9, length=5) 63 | model.rq <- rq(sale~., tau=taus, data=dat) 64 | plot(summary(model.rq)) 65 | summary(model.rq) 66 | round(coef(model.rq), digits=4) 67 | 68 | # 5. make threshold model in mean regression 69 | # (1) define functions 70 | source('sub-02.R') 71 | 72 | # (2) find the optimal threshold 73 | (gamopt <- gamsearch.mr(var=price, dat=dat)) 74 | 75 | # (3) estimate threshold model 76 | thrmodel.mr <- lm(sale~price+I(price*(price>gamopt))+grade+credit 77 | +popular+RevAmou+RevGrad+No, data=dat) 78 | summary(thrmodel.mr) 79 | 80 | # (1) make model 81 | (thrmodel.mr <- lm(y~x1+I(x1*(x1>gamopt)), data=dat)) 82 | summary(thrmodel.mr) 83 | 84 | # (2) show results 85 | prices <- seq(min(dat$price), max(dat$price), length=6838) 86 | sales.hat <- predict(thrmodel.mr, newdata=data.frame(price=prices)) 87 | 88 | plot(dat$x1, y, xlab='x', ylab='y') 89 | lines(xs[xs=gamopt], ys.hat[xs>=gamopt], lwd=2, col='red') 91 | # sale~price+I(price*(price>gamma))+grade+credit+popular+RevAmou+RevGrad+No 92 | 93 | # 6. make threshold model in quantile regression 94 | # (1) define functions 95 | 96 | 97 | # (2) find the optimal threshold 98 | tau <- 0.1 99 | (gamopt.qr <- gamsearch.qr(dat=dat, tau=tau, var=price)) 100 | 101 | 102 | # (3) estimate threshold model 103 | thrmodel.qr <- rq(sale~price+I(price*(price>gamopt.qr))+grade+credit 104 | +popular+RevAmou+RevGrad+No, tau=tau, data=dat) 105 | summary(thrmodel.qr) 106 | # 5. estimate threshold regression model 107 | # (1) make model 108 | (thrmodel.qr.1 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[1], data=dat)) 109 | (thrmodel.qr.2 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[2], data=dat)) 110 | (thrmodel.qr.3 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[3], data=dat)) 111 | (thrmodel.qr.4 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[4], data=dat)) 112 | (thrmodel.qr.5 <- rq(sale~price+I(price*(price>gamopt)), tau=taus[5], data=dat)) 113 | summary(thrmodel.qr) 114 | coef(thrmodel.qr) # compare with those true values 115 | # eps <- rchisq(n=n, df=3) 116 | # y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*eps 117 | # Q.y <- beta[1] + beta[2]*x[,1] + beta[3]*(x[,1]*(x[,1] > threshold)) + x[,1]*F.inv(eps) 118 | (F.inv <-qchisq(p=taus, df=3)) 119 | beta[2] + F.inv # slopes in the lower interval 120 | beta[2] + beta[3] + F.inv # slopes in the upper interval 121 | 122 | # (2) show results 123 | xs <- seq(min(dat$price), max(dat$price), length=6838) 124 | ys.hat.1 <- predict(thrmodel.qr.1, newdata=data.frame(price=xs)) 125 | ys.hat.2 <- predict(thrmodel.qr.2, newdata=data.frame(price=xs)) 126 | ys.hat.3 <- predict(thrmodel.qr.3, newdata=data.frame(price=xs)) 127 | ys.hat.4 <- predict(thrmodel.qr.4, newdata=data.frame(price=xs)) 128 | ys.hat.5 <- predict(thrmodel.qr.5, newdata=data.frame(price=xs)) 129 | cbind(ys.hat.1,ys.hat.2,ys.hat.3,ys.hat.4,ys.hat.5) 130 | plot(dat$price, sale, xlab='price', ylab='sale') 131 | for (i in 1:length(taus)){ 132 | lines(xs[xs=gamopt], ys.hat[xs>=gamopt,i], lty=i, lwd=2) 134 | } 135 | 136 | -------------------------------------------------------------------------------- /taibao/sub-01.R: -------------------------------------------------------------------------------- 1 | # sub functions for goods pricing 2 | # 1. define loss function for mean regression 3 | loss.mr <- function(gam, dat){ 4 | fmla <- y ~ x1 + I(x1 * (x1 > gam)) 5 | model <- lm(formula=fmla, data=dat) 6 | sse <- sum(model$residuals^2) 7 | sse 8 | } 9 | 10 | # 2. define gamma serch function 11 | gamsearch.mr <- function(var=x, dat){ 12 | # browser() 13 | min <- min(var) 14 | max <- max(var) 15 | gams <- seq(min, max, length=500) 16 | los <- rep(NA, length(gams)) 17 | for (i in 1:length(gams)){ 18 | los[i] <- loss.mr(gam=gams[i], dat=dat) 19 | } 20 | plot(gams, los, type='l') 21 | optgam <- gams[which.min(los)] 22 | optgam 23 | } 24 | 25 | # 3. define loss function for quantile regression 26 | loss.qr <- function(gam, tau, dat){ 27 | fmla <- y ~ x1 + I(x1 * (x1 > gam)) 28 | model <- rq(fmla, tau=tau, data=dat) 29 | rho <- model$rho 30 | rho 31 | } 32 | 33 | 34 | # 4. define gamma serch function for quantile regression 35 | gamsearch.qr <- function(var=x, tau, dat){ 36 | min <- min(var)*1.2 37 | max <- max(var)/1.2 38 | gams <- seq(min, max, length=100) 39 | los <- rep(NA, length(gams)) 40 | for (i in 1:length(gams)){ 41 | los[i] <- loss.qr(gam=gams[i], tau=tau, dat=dat) 42 | } 43 | plot(gams, los, type='l') 44 | optgam <- gams[which.min(los)] 45 | optgam 46 | } 47 | 48 | -------------------------------------------------------------------------------- /taibao/sub-02.R: -------------------------------------------------------------------------------- 1 | # sub functions for goods pricing 2 | # 1. define loss function for mean regression 3 | loss.mr <- function(gam, dat){ 4 | # fmla <- y ~ x1 + I(x1 * (x1 > gam)) 5 | fmla <- sale~price+I(price*(price>gam))+grade+credit 6 | +popular+RevAmou+RevGrad+No 7 | model <- lm(formula=fmla, data=dat) 8 | sse <- sum(model$residuals^2) 9 | sse 10 | } 11 | 12 | # 2. define gamma serch function 13 | gamsearch.mr <- function(var=x, dat){ 14 | # browser() 15 | min <- min(var) 16 | max <- max(var) 17 | gams <- seq(min, max, length=100) 18 | los <- rep(NA, length(gams)) 19 | for (i in 1:length(gams)){ 20 | los[i] <- loss.mr(gam=gams[i], dat=dat) 21 | } 22 | plot(gams, los, type='l') 23 | optgam <- gams[which.min(los)] 24 | optgam 25 | } 26 | 27 | # 3. define loss function for quantile regression 28 | loss.qr <- function(gam, tau, dat){ 29 | fmla <- sale~price+I(price*(price>gam))+grade+credit 30 | +popular+RevAmou+RevGrad+No 31 | model <- rq(fmla, tau=tau, data=dat) 32 | rho <- model$rho 33 | rho 34 | } 35 | 36 | 37 | # 4. define gamma serch function for quantile regression 38 | gamsearch.qr <- function(var=x, tau, dat){ 39 | min <- min(var)*1.2 40 | max <- max(var)/1.2 41 | gams <- seq(min, max, length=100) 42 | los <- rep(NA, length(gams)) 43 | for (i in 1:length(gams)){ 44 | los[i] <- loss.qr(gam=gams[i], tau=tau, dat=dat) 45 | } 46 | plot(gams, los, type='l') 47 | optgam <- gams[which.min(los)] 48 | optgam 49 | } 50 | 51 | -------------------------------------------------------------------------------- /xingqi.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.dates as mdates 5 | from sklearn.neighbors import KernelDensity 6 | from scipy.stats import norm 7 | from scipy import stats 8 | import os 9 | 10 | os.chdir("D:/Rdata/Third_paper/third_paper_data/") 11 | 12 | total_dat=pd.read_excel('nanjing_total_feature.xlsx',index_col=[0]) 13 | 14 | 15 | total_dat_1=total_dat['20140106':'20141228'] 16 | xingqi_data=total_dat_1[['E_demand','month']] 17 | xingqi=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'] 18 | 19 | fig=plt.figure(figsize=(10,5)) 20 | ax1=fig.add_subplot(111) 21 | x=range(len(xingqi)) 22 | for i in range(1,51): 23 | ax1.plot(x,xingqi_data[7*i:7*i+7]['E_demand'].values,'r-',linewidth=3,color='m',alpha=0.5) 24 | 25 | 26 | ax1.plot(x,xingqi_data[0:7]['E_demand'].values,'r-',linewidth=3,color='m',alpha=0.5,label="Electricity consumption") 27 | ax1.axvline(x=0,color='black',linewidth=1,linestyle='dashed') 28 | ax1.axvline(x=1,color='black',linewidth=1,linestyle='dashed') 29 | ax1.axvline(x=2,color='black',linewidth=1,linestyle='dashed') 30 | ax1.axvline(x=3,color='black',linewidth=1,linestyle='dashed') 31 | ax1.axvline(x=4,color='black',linewidth=1,linestyle='dashed') 32 | ax1.axvline(x=5,color='black',linewidth=1,linestyle='dashed') 33 | ax1.axvline(x=6,color='black',linewidth=1,linestyle='dashed',label="Date") 34 | 35 | ax1.set_ylim(8000,35000) 36 | plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 37 | plt.xticks(x, xingqi) 38 | plt.margins(0.001) 39 | plt.title("Electricity consumption from the weekly perspective") 40 | plt.xlabel('Date(day)') 41 | plt.ylabel('Electricity consumption(KWh)') 42 | ax1.legend(loc="upper right") 43 | 44 | 45 | -------------------------------------------------------------------------------- /最新预测.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.dates as mdates 5 | from sklearn.neighbors import KernelDensity 6 | from scipy.stats import norm 7 | from scipy import stats 8 | import os 9 | 10 | os.chdir("D:/Rdata/Third_paper/third_paper_data/") 11 | 12 | ################################################################## 13 | ####南京 14 | 15 | dat_density=pd.read_csv('nanjing_quantile_25m.csv',index_col=[0]) 16 | dat_real_value=pd.read_excel('nanjing_total.xlsx',index_col=[0]) 17 | 18 | dat_density=dat_density.T 19 | n_pre=dat_density.shape[1] 20 | n_real_value=dat_real_value[(365-n_pre):365] 21 | 22 | 23 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values) 24 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values) 25 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values) 26 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values) 27 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values) 28 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values) 29 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values) 30 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values) 31 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values) 32 | 33 | 34 | 35 | fig, ax = plt.subplots(nrows=3, ncols=3) 36 | fig.set_size_inches(20,15) 37 | 38 | x=np.linspace(10000, 30000, 99) 39 | 40 | y_max=np.max([np.max(density1(x)),np.max(density2(x)), 41 | np.max(density3(x)),np.max(density4(x)), 42 | np.max(density5(x)),np.max(density6(x)), 43 | np.max(density7(x)),np.max(density8(x)), 44 | np.max(density9(x))]) 45 | 46 | 47 | 48 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel') 49 | ax[0,0].axvline(x=n_real_value.iloc[0,0],color='r',linewidth=3) 50 | ax[0,0].set_ylim(0,y_max) 51 | ax[0,0].set_title('2014-12-22') 52 | 53 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel') 54 | ax[0,1].axvline(x=n_real_value.iloc[1,0],color='r',linewidth=3) 55 | ax[0,1].set_ylim(0,y_max) 56 | ax[0,1].set_title('2014-12-23') 57 | 58 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel') 59 | ax[0,2].axvline(x=n_real_value.iloc[2,0],color='r',linewidth=3) 60 | ax[0,2].set_ylim(0,y_max) 61 | ax[0,2].set_title('2014-12-24') 62 | 63 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel') 64 | ax[1,0].axvline(x=n_real_value.iloc[3,0],color='r',linewidth=3) 65 | ax[1,0].set_ylim(0,y_max) 66 | ax[1,0].set_title('2014-12-25') 67 | 68 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel') 69 | ax[1,1].axvline(x=n_real_value.iloc[4,0],color='r',linewidth=3) 70 | ax[1,1].set_ylim(0,y_max) 71 | ax[1,1].set_title('2014-12-26') 72 | 73 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel') 74 | ax[1,2].axvline(x=n_real_value.iloc[5,0],color='r',linewidth=3) 75 | ax[1,2].set_ylim(0,y_max) 76 | ax[1,2].set_title('2014-12-27') 77 | 78 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel') 79 | ax[2,0].axvline(x=n_real_value.iloc[6,0],color='r',linewidth=3) 80 | ax[2,0].set_ylim(0,y_max) 81 | ax[2,0].set_title('2014-12-28') 82 | 83 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel') 84 | ax[2,1].axvline(x=n_real_value.iloc[7,0],color='r',linewidth=3) 85 | ax[2,1].set_ylim(0,y_max) 86 | ax[2,1].set_title('2014-12-29') 87 | 88 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel') 89 | ax[2,2].axvline(x=n_real_value.iloc[8,0],color='r',linewidth=3) 90 | ax[2,2].set_ylim(0,y_max) 91 | ax[2,2].set_title('2014-12-30') 92 | 93 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 94 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 95 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 96 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 97 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 98 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 99 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 100 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 101 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 102 | 103 | ax[0,0].set_xlabel('Electricity consumption(KWh)') 104 | ax[0,0].set_ylabel('Probability density') 105 | ax[0,0].legend(loc='upper right') 106 | 107 | ax[0,1].set_xlabel('Electricity consumption(KWh)') 108 | ax[0,1].set_ylabel('Probability density') 109 | ax[0,1].legend(loc='upper right') 110 | 111 | ax[0,2].set_xlabel('Electricity consumption(KWh)') 112 | ax[0,2].set_ylabel('Probability density') 113 | ax[0,2].legend(loc='upper right') 114 | 115 | ax[1,0].set_xlabel('Electricity consumption(KWh)') 116 | ax[1,0].set_ylabel('Probability density') 117 | ax[1,0].legend(loc='upper right') 118 | 119 | ax[1,1].set_xlabel('Electricity consumption(KWh)') 120 | ax[1,1].set_ylabel('Probability density') 121 | ax[1,1].legend(loc='upper right') 122 | 123 | ax[1,2].set_xlabel('Electricity consumption(KWh)') 124 | ax[1,2].set_ylabel('Probability density') 125 | ax[1,2].legend(loc='upper right') 126 | 127 | ax[2,0].set_xlabel('Electricity consumption(KWh)') 128 | ax[2,0].set_ylabel('Probability density') 129 | ax[2,0].legend(loc='upper right') 130 | 131 | ax[2,1].set_xlabel('Electricity consumption(KWh)') 132 | ax[2,1].set_ylabel('Probability density') 133 | ax[2,1].legend(loc='upper right') 134 | 135 | ax[2,2].set_xlabel('Electricity consumption(KWh)') 136 | ax[2,2].set_ylabel('Probability density') 137 | ax[2,2].legend(loc='upper right') 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | ################################################################################ 156 | ##############苏州########################## 157 | dat_density=pd.read_csv('suzhou_quantile_m.csv',index_col=[0]) 158 | dat_real_value=pd.read_excel('suzhou_total.xlsx',index_col=[0]) 159 | dat_density=dat_density.T 160 | n_pre=dat_density.shape[1] 161 | n_real_value=dat_real_value[(365-n_pre):365] 162 | 163 | 164 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values) 165 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values) 166 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values) 167 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values) 168 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values) 169 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values) 170 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values) 171 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values) 172 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values) 173 | 174 | fig, ax = plt.subplots(nrows=3, ncols=3) 175 | fig.set_size_inches(20,15) 176 | 177 | x=np.linspace(10000, 30000, 99) 178 | 179 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel') 180 | ax[0,0].axvline(x=n_real_value.iloc[0,0],color='r',linewidth=3) 181 | ax[0,0].set_title('2014-12-22') 182 | 183 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel') 184 | ax[0,1].axvline(x=n_real_value.iloc[1,0],color='r',linewidth=3) 185 | ax[0,1].set_title('2014-12-23') 186 | 187 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel') 188 | ax[0,2].axvline(x=n_real_value.iloc[2,0],color='r',linewidth=3) 189 | ax[0,2].set_title('2014-12-24') 190 | 191 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel') 192 | ax[1,0].axvline(x=n_real_value.iloc[3,0],color='r',linewidth=3) 193 | ax[1,0].set_title('2014-12-25') 194 | 195 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel') 196 | ax[1,1].axvline(x=n_real_value.iloc[4,0],color='r',linewidth=3) 197 | ax[1,1].set_title('2014-12-26') 198 | 199 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel') 200 | ax[1,2].axvline(x=n_real_value.iloc[5,0],color='r',linewidth=3) 201 | ax[1,2].set_title('2014-12-27') 202 | 203 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel') 204 | ax[2,0].axvline(x=n_real_value.iloc[6,0],color='r',linewidth=3) 205 | ax[2,0].set_title('2014-12-28') 206 | 207 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel') 208 | ax[2,1].axvline(x=n_real_value.iloc[7,0],color='r',linewidth=3) 209 | ax[2,1].set_title('2014-12-29') 210 | 211 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel') 212 | ax[2,2].axvline(x=n_real_value.iloc[8,0],color='r',linewidth=3) 213 | ax[2,2].set_title('2014-12-30') 214 | 215 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 216 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 217 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 218 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 219 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 220 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 221 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 222 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 223 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 224 | 225 | ax[0,0].set_xlabel('Electricity consumption(KWh)') 226 | ax[0,0].set_ylabel('Probability density') 227 | ax[0,0].legend(loc='upper right') 228 | 229 | ax[0,1].set_xlabel('Electricity consumption(KWh)') 230 | ax[0,1].set_ylabel('Probability density') 231 | ax[0,1].legend(loc='upper right') 232 | 233 | ax[0,2].set_xlabel('Electricity consumption(KWh)') 234 | ax[0,2].set_ylabel('Probability density') 235 | ax[0,2].legend(loc='upper right') 236 | 237 | ax[1,0].set_xlabel('Electricity consumption(KWh)') 238 | ax[1,0].set_ylabel('Probability density') 239 | ax[1,0].legend(loc='upper right') 240 | 241 | ax[1,1].set_xlabel('Electricity consumption(KWh)') 242 | ax[1,1].set_ylabel('Probability density') 243 | ax[1,1].legend(loc='upper right') 244 | 245 | ax[1,2].set_xlabel('Electricity consumption(KWh)') 246 | ax[1,2].set_ylabel('Probability density') 247 | ax[1,2].legend(loc='upper right') 248 | 249 | ax[2,0].set_xlabel('Electricity consumption(KWh)') 250 | ax[2,0].set_ylabel('Probability density') 251 | ax[2,0].legend(loc='upper right') 252 | 253 | ax[2,1].set_xlabel('Electricity consumption(KWh)') 254 | ax[2,1].set_ylabel('Probability density') 255 | ax[2,1].legend(loc='upper right') 256 | 257 | ax[2,2].set_xlabel('Electricity consumption(KWh)') 258 | ax[2,2].set_ylabel('Probability density') 259 | ax[2,2].legend(loc='upper right') 260 | 261 | 262 | 263 | ################################################################################ 264 | ##############连云港########################## 265 | 266 | dat_density=pd.read_csv('lianyungang_quantile_m.csv',index_col=[0]) 267 | dat_real_value=pd.read_excel('lianyungang_total.xlsx',index_col=[0]) 268 | dat_density=dat_density.T 269 | n_pre=dat_density.shape[1] 270 | n_real_value=dat_real_value[(365-n_pre):365] 271 | 272 | 273 | density1 = stats.kde.gaussian_kde(dat_density.iloc[:,0].values) 274 | density2 = stats.kde.gaussian_kde(dat_density.iloc[:,1].values) 275 | density3 = stats.kde.gaussian_kde(dat_density.iloc[:,2].values) 276 | density4 = stats.kde.gaussian_kde(dat_density.iloc[:,3].values) 277 | density5 = stats.kde.gaussian_kde(dat_density.iloc[:,4].values) 278 | density6 = stats.kde.gaussian_kde(dat_density.iloc[:,5].values) 279 | density7 = stats.kde.gaussian_kde(dat_density.iloc[:,6].values) 280 | density8 = stats.kde.gaussian_kde(dat_density.iloc[:,7].values) 281 | density9 = stats.kde.gaussian_kde(dat_density.iloc[:,8].values) 282 | 283 | fig, ax = plt.subplots(nrows=3, ncols=3) 284 | fig.set_size_inches(20,15) 285 | 286 | x=np.linspace(10000, 30000, 99) 287 | 288 | ax[0,0].plot(x, density1(x),linewidth=3,label='Gaussian kernel') 289 | ax[0,0].axvline(x=n_real_value.iloc[0,0],color='r',linewidth=3) 290 | ax[0,0].set_title('2014-12-22') 291 | 292 | ax[0,1].plot(x, density2(x),linewidth=3,label='Gaussian kernel') 293 | ax[0,1].axvline(x=n_real_value.iloc[1,0],color='r',linewidth=3) 294 | ax[0,1].set_title('2014-12-23') 295 | 296 | ax[0,2].plot(x, density3(x),linewidth=3,label='Gaussian kernel') 297 | ax[0,2].axvline(x=n_real_value.iloc[2,0],color='r',linewidth=3) 298 | ax[0,2].set_title('2014-12-24') 299 | 300 | ax[1,0].plot(x, density4(x),linewidth=3,label='Gaussian kernel') 301 | ax[1,0].axvline(x=n_real_value.iloc[3,0],color='r',linewidth=3) 302 | ax[1,0].set_title('2014-12-25') 303 | 304 | ax[1,1].plot(x, density5(x),linewidth=3,label='Gaussian kernel') 305 | ax[1,1].axvline(x=n_real_value.iloc[4,0],color='r',linewidth=3) 306 | ax[1,1].set_title('2014-12-26') 307 | 308 | ax[1,2].plot(x, density6(x),linewidth=3,label='Gaussian kernel') 309 | ax[1,2].axvline(x=n_real_value.iloc[5,0],color='r',linewidth=3) 310 | ax[1,2].set_title('2014-12-27') 311 | 312 | ax[2,0].plot(x, density7(x),linewidth=3,label='Gaussian kernel') 313 | ax[2,0].axvline(x=n_real_value.iloc[6,0],color='r',linewidth=3) 314 | ax[2,0].set_title('2014-12-28') 315 | 316 | ax[2,1].plot(x, density8(x),linewidth=3,label='Gaussian kernel') 317 | ax[2,1].axvline(x=n_real_value.iloc[7,0],color='r',linewidth=3) 318 | ax[2,1].set_title('2014-12-29') 319 | 320 | ax[2,2].plot(x, density9(x),linewidth=3,label='Gaussian kernel') 321 | ax[2,2].axvline(x=n_real_value.iloc[8,0],color='r',linewidth=3) 322 | ax[2,2].set_title('2014-12-30') 323 | 324 | ax[0,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 325 | ax[0,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 326 | ax[0,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 327 | ax[1,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 328 | ax[1,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 329 | ax[1,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 330 | ax[2,0].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 331 | ax[2,1].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 332 | ax[2,2].ticklabel_format(style='sci', axis='y', scilimits=(0,0)) 333 | 334 | ax[0,0].set_xlabel('Electricity consumption(KWh)') 335 | ax[0,0].set_ylabel('Probability density') 336 | ax[0,0].legend(loc='upper right') 337 | 338 | ax[0,1].set_xlabel('Electricity consumption(KWh)') 339 | ax[0,1].set_ylabel('Probability density') 340 | ax[0,1].legend(loc='upper right') 341 | 342 | ax[0,2].set_xlabel('Electricity consumption(KWh)') 343 | ax[0,2].set_ylabel('Probability density') 344 | ax[0,2].legend(loc='upper right') 345 | 346 | ax[1,0].set_xlabel('Electricity consumption(KWh)') 347 | ax[1,0].set_ylabel('Probability density') 348 | ax[1,0].legend(loc='upper right') 349 | 350 | ax[1,1].set_xlabel('Electricity consumption(KWh)') 351 | ax[1,1].set_ylabel('Probability density') 352 | ax[1,1].legend(loc='upper right') 353 | 354 | ax[1,2].set_xlabel('Electricity consumption(KWh)') 355 | ax[1,2].set_ylabel('Probability density') 356 | ax[1,2].legend(loc='upper right') 357 | 358 | ax[2,0].set_xlabel('Electricity consumption(KWh)') 359 | ax[2,0].set_ylabel('Probability density') 360 | ax[2,0].legend(loc='upper right') 361 | 362 | ax[2,1].set_xlabel('Electricity consumption(KWh)') 363 | ax[2,1].set_ylabel('Probability density') 364 | ax[2,1].legend(loc='upper right') 365 | 366 | ax[2,2].set_xlabel('Electricity consumption(KWh)') 367 | ax[2,2].set_ylabel('Probability density') 368 | ax[2,2].legend(loc='upper right') 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | --------------------------------------------------------------------------------