├── .DS_Store
├── .gitattributes
├── Data
    └── electricity-normalized.csv
├── PI_class_EnbPI.py
├── README.md
├── SPCI_class.py
├── data.py
├── requirements.txt
├── tutorial_electric_EnbPI_SPCI.ipynb
├── utils_EnbPI.py
├── utils_SPCI.py
└── visualize.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hamrel-cxu/SPCI-code/d598dd32099402bc1b6d636f471a155001c0e54e/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/PI_class_EnbPI.py:
--------------------------------------------------------------------------------
  1 | # from tensorflow.keras.optimizers import Adam
  2 | # import keras
  3 | import pandas as pd
  4 | import numpy as np
  5 | # from keras.models import clone_model
  6 | from sklearn.linear_model import LogisticRegression
  7 | from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ
  8 | from statsmodels.tsa.statespace.exponential_smoothing import ExponentialSmoothing
  9 | from statsmodels.tsa.statespace.sarimax import SARIMAX
 10 | import math
 11 | import time as time
 12 | import utils_EnbPI as util
 13 | import warnings
 14 | warnings.filterwarnings("ignore")
 15 | 
 16 | 
 17 | class prediction_interval():
 18 |     '''
 19 |         Create prediction intervals using different methods (i.e., EnbPI, J+aB ICP, Weighted, Time-series)
 20 |     '''
 21 | 
 22 |     def __init__(self, fit_func, X_train, X_predict, Y_train, Y_predict):
 23 |         self.regressor = fit_func
 24 |         self.X_train = X_train
 25 |         self.X_predict = X_predict
 26 |         self.Y_train = Y_train
 27 |         self.Y_predict = Y_predict
 28 |         # Predicted training data centers by EnbPI
 29 |         self.Ensemble_train_interval_centers = []
 30 |         # Predicted test data centers by EnbPI
 31 |         self.Ensemble_pred_interval_centers = []
 32 |         self.Ensemble_online_resid = np.array([])  # LOO scores
 33 |         self.Ensemble_pred_interval_ends = []  # Upper and lower end
 34 |         self.beta_hat_bins = []
 35 |         self.ICP_fitted_func = []  # it only store 1 fitted ICP func.
 36 |         self.ICP_resid = np.array([])
 37 |         self.WeightCP_online_resid = np.array([])
 38 |         self.JaB_boot_samples_idx = 0
 39 |         self.JaB_boot_predictions = 0
 40 | 
 41 |     def fit_bootstrap_models_online(self, B, miss_test_idx):
 42 |         '''
 43 |           Train B bootstrap estimators from subsets of (X_train, Y_train), compute aggregated predictors, and compute the residuals
 44 |         '''
 45 |         n = len(self.X_train)
 46 |         n1 = len(self.X_predict)
 47 |         # hold indices of training data for each f^b
 48 |         boot_samples_idx = util.generate_bootstrap_samples(n, n, B)
 49 |         # hold predictions from each f^b
 50 |         boot_predictions = np.zeros((B, (n+n1)), dtype=float)
 51 |         # for i^th column, it shows which f^b uses i in training (so exclude in aggregation)
 52 |         in_boot_sample = np.zeros((B, n), dtype=bool)
 53 |         out_sample_predict = np.zeros((n, n1))
 54 |         start = time.time()
 55 |         for b in range(B):
 56 |             model = self.regressor
 57 |             # NOTE: it is CRITICAL to clone the model, as o/w it will OVERFIT to the model across different iterations of bootstrap S_b.
 58 |             # I originally did not understand that doing so is necessary but now know it
 59 |             if self.regressor.__class__.__name__ == 'Sequential':
 60 |                 start1 = time.time()
 61 |                 model = clone_model(self.regressor)
 62 |                 opt = Adam(5e-4)
 63 |                 model.compile(loss='mean_squared_error', optimizer=opt)
 64 |                 callback = keras.callbacks.EarlyStopping(
 65 |                     monitor='loss', patience=10)
 66 |                 bsize = int(0.1*len(np.unique(boot_samples_idx[b])))  # Was 0.1
 67 |                 if self.regressor.name == 'NeuralNet':
 68 |                     # verbose definition here: https://keras.io/api/models/model_training_apis/#fit-method. 0 means silent
 69 |                     # NOTE: I do NOT want epoches to be too large, as we then tend to be too close to the actual Y_t, NOT f(X_t).
 70 |                     # Epoch was 250
 71 |                     model.fit(self.X_train[boot_samples_idx[b], :], self.Y_train[boot_samples_idx[b], ],
 72 |                               epochs=250, batch_size=bsize, callbacks=[callback], verbose=0)
 73 |                 else:
 74 |                     # This is RNN, mainly have different shape and decrease epochs for faster computation
 75 |                     model.fit(self.X_train[boot_samples_idx[b], :], self.Y_train[boot_samples_idx[b], ],
 76 |                               epochs=10, batch_size=bsize, callbacks=[callback], verbose=0)
 77 |                 # NOTE, this multiplied by B tells us total estimation time
 78 |                 print(
 79 |                     f'Took {time.time()-start1} secs to fit the {b}th boostrap model')
 80 |             else:
 81 |                 model = model.fit(self.X_train[boot_samples_idx[b], :],
 82 |                                   self.Y_train[boot_samples_idx[b], ])
 83 |             boot_predictions[b] = model.predict(
 84 |                 np.r_[self.X_train, self.X_predict]).flatten()
 85 |             in_boot_sample[b, boot_samples_idx[b]] = True
 86 |         print(
 87 |             f'Finish Fitting B Bootstrap models, took {time.time()-start} secs.')
 88 |         start = time.time()
 89 |         keep = []
 90 |         for i in range(n):
 91 |             b_keep = np.argwhere(~(in_boot_sample[:, i])).reshape(-1)
 92 |             if(len(b_keep) > 0):
 93 |                 # NOTE: Append these training centers too see their magnitude
 94 |                 # The reason is sometimes they are TOO close to actual Y.
 95 |                 self.Ensemble_train_interval_centers.append(
 96 |                     boot_predictions[b_keep, i].mean())
 97 |                 resid_LOO = self.Y_train[i] - \
 98 |                     boot_predictions[b_keep, i].mean()
 99 |                 out_sample_predict[i] = boot_predictions[b_keep, n:].mean(0)
100 |                 keep = keep+[b_keep]
101 |             else:  # if aggregating an empty set of models, predict zero everywhere
102 |                 resid_LOO = self.Y_train[i]
103 |                 out_sample_predict[i] = np.zeros(n1)
104 |             self.Ensemble_online_resid = np.append(
105 |                 self.Ensemble_online_resid, resid_LOO)
106 |             keep = keep+[]
107 |         # print(f'Max LOO training residual is {np.max(self.Ensemble_online_resid)}')
108 |         # print(f'Min LOO training residual is {np.min(self.Ensemble_online_resid)}')
109 |         sorted_out_sample_predict = out_sample_predict.mean(
110 |             axis=0)  # length n1
111 |         resid_out_sample = self.Y_predict-sorted_out_sample_predict
112 |         if len(miss_test_idx) > 0:
113 |             # Replace missing residuals with that from the immediate predecessor that is not missing, as
114 |             # o/w we are not assuming prediction data are missing
115 |             for l in range(len(miss_test_idx)):
116 |                 i = miss_test_idx[l]
117 |                 if i > 0:
118 |                     j = i-1
119 |                     while j in miss_test_idx[:l]:
120 |                         j -= 1
121 |                     resid_out_sample[i] = resid_out_sample[j]
122 | 
123 |                 else:
124 |                     # The first Y during testing is missing, let it be the last of the training residuals
125 |                     # note, training data already takes out missing values, so doing is is fine
126 |                     resid_out_sample[0] = self.Ensemble_online_resid[-1]
127 |         self.Ensemble_online_resid = np.append(
128 |             self.Ensemble_online_resid, resid_out_sample)
129 |         # print(f'Finish Computing LOO residuals, took {time.time()-start} secs.')
130 |         # print(f'Max LOO test residual is {np.max(self.Ensemble_online_resid[n:])}')
131 |         # print(f'Min LOO test residual is {np.min(self.Ensemble_online_resid[n:])}')
132 |         self.Ensemble_pred_interval_centers = sorted_out_sample_predict
133 | 
134 |     def compute_PIs_Ensemble_online(self, alpha, stride, smallT=False):
135 |         # If smallT, we would only start with the last n number of LOO residuals, rather than use the full length T ones. Used in change detection
136 |         ''' NOTE: smallT can be important if time-series is very dynamic, in which case training MORE data may actaully be worse (because quantile longer)'''
137 |         n = len(self.X_train)
138 |         if smallT:
139 |             past_window = 1000 if len(self.X_train) > 2000 else 100
140 |             past_window = 500
141 |             n = min(past_window, len(self.X_train))
142 |         # Now f^b and LOO residuals have been constructed from earlier
143 |         out_sample_predict = self.Ensemble_pred_interval_centers
144 |         start = time.time()
145 |         # Matrix, where each row is a UNIQUE slice of residuals with length stride.
146 |         resid_strided = util.strided_app(
147 |             self.Ensemble_online_resid[len(self.X_train)-n:-1], n, stride)
148 |         print(f'Shape of slided residual lists is {resid_strided.shape}')
149 |         num_unique_resid = resid_strided.shape[0]
150 |         width_left = np.zeros(num_unique_resid)
151 |         width_right = np.zeros(num_unique_resid)
152 |         # # NEW, alpha becomes alpha_t. Uncomment things below if we decide to use this upgraded EnbPI
153 |         # alpha_t = alpha
154 |         # errs = []
155 |         # gamma = 0.005
156 |         # method = 'simple'  # 'simple' or 'complex'
157 |         # self.alphas = []
158 |         for i in range(num_unique_resid):
159 |             # for p in range(stride):  # NEW for adaptive alpha
160 |             past_resid = resid_strided[i, :]
161 |             # The number of bins will be determined INSIDE binning
162 |             beta_hat_bin = util.binning(past_resid, alpha)
163 |             # beta_hat_bin = util.binning(past_resid, alpha_t)
164 |             self.beta_hat_bins.append(beta_hat_bin)
165 |             width_left[i] = np.percentile(
166 |                 past_resid, math.ceil(100*beta_hat_bin))
167 |             width_right[i] = np.percentile(
168 |                 past_resid, math.ceil(100*(1-alpha+beta_hat_bin)))
169 |             # width_right[i] = np.percentile(past_resid, math.ceil(100*(1-alpha_t+beta_hat_bin)))
170 |             # Y_t = self.Y_predict[i*stride+p]
171 |             # Y_t_hat = out_sample_predict[i*stride+p]
172 |             # lower_t, upper_t = Y_t_hat+width_left[i], Y_t_hat+width_right[i]
173 |             # err = 1 if Y_t < lower_t or Y_t > upper_t else 0
174 |             # errs.append(err)
175 |             # alpha_t = util.adjust_alpha_t(alpha_t, alpha, errs, gamma, method)
176 |             # if alpha_t > 1:
177 |             #     alpha_t = 1
178 |             # if alpha_t < 0:
179 |             #     alpha_t = 0
180 |             # self.alphas.append(alpha_t)
181 |         print(
182 |             f'Finish Computing {num_unique_resid} UNIQUE Prediction Intervals, took {time.time()-start} secs.')
183 |         # This is because |width|=T1/stride.
184 |         width_left = np.repeat(width_left, stride)
185 |         # This is because |width|=T1/stride.
186 |         width_right = np.repeat(width_right, stride)
187 |         PIs_Ensemble = pd.DataFrame(np.c_[out_sample_predict+width_left,
188 |                                           out_sample_predict+width_right], columns=['lower', 'upper'])
189 |         self.Ensemble_pred_interval_ends = PIs_Ensemble
190 |         # print(time.time()-start)
191 |         return PIs_Ensemble
192 | 
193 |     '''
194 |         Jackknife+-after-bootstrap (used in Figure 8)
195 |     '''
196 | 
197 |     def fit_bootstrap_models(self, B):
198 |         '''
199 |           Train B bootstrap estimators and calculate LOO predictions on X_train and X_predict
200 |         '''
201 |         n = len(self.X_train)
202 |         boot_samples_idx = util.generate_bootstrap_samples(n, n, B)
203 |         n1 = len(np.r_[self.X_train, self.X_predict])
204 |         # P holds the predictions from individual bootstrap estimators
205 |         predictions = np.zeros((B, n1), dtype=float)
206 |         for b in range(B):
207 |             model = self.regressor
208 |             if self.regressor.__class__.__name__ == 'Sequential':
209 |                 model = clone_model(self.regressor)
210 |                 opt = Adam(5e-4)
211 |                 model.compile(loss='mean_squared_error', optimizer=opt)
212 |                 callback = keras.callbacks.EarlyStopping(
213 |                     monitor='loss', patience=10)
214 |                 if self.regressor.name == 'NeuralNet':
215 |                     model.fit(self.X_train[boot_samples_idx[b], :], self.Y_train[boot_samples_idx[b], ],
216 |                               epochs=1000, batch_size=100, callbacks=[callback], verbose=0)
217 |                 else:
218 |                     # This is RNN, mainly have different shape and decrease epochs
219 |                     model.fit(self.X_train[boot_samples_idx[b], :], self.Y_train[boot_samples_idx[b], ],
220 |                               epochs=100, batch_size=100, callbacks=[callback], verbose=0)
221 |             else:
222 |                 model = model.fit(self.X_train[boot_samples_idx[b], :],
223 |                                   self.Y_train[boot_samples_idx[b], ])
224 |             predictions[b] = model.predict(
225 |                 np.r_[self.X_train, self.X_predict]).flatten()
226 |         self.JaB_boot_samples_idx = boot_samples_idx
227 |         self.JaB_boot_predictions = predictions
228 | 
229 |     def compute_PIs_JaB(self, alpha):
230 |         '''
231 |         Using mean aggregation
232 |         '''
233 |         n = len(self.X_train)
234 |         n1 = len(self.X_predict)
235 |         boot_samples_idx = self.JaB_boot_samples_idx
236 |         boot_predictions = self.JaB_boot_predictions
237 |         B = len(boot_predictions)
238 |         in_boot_sample = np.zeros((B, n), dtype=bool)
239 |         for b in range(len(in_boot_sample)):
240 |             in_boot_sample[b, boot_samples_idx[b]] = True
241 |         resids_LOO = np.zeros(n)
242 |         muh_LOO_vals_testpoint = np.zeros((n, n1))
243 |         for i in range(n):
244 |             b_keep = np.argwhere(~(in_boot_sample[:, i])).reshape(-1)
245 |             if(len(b_keep) > 0):
246 |                 resids_LOO[i] = np.abs(
247 |                     self.Y_train[i] - boot_predictions[b_keep, i].mean())
248 |                 muh_LOO_vals_testpoint[i] = boot_predictions[b_keep, n:].mean(
249 |                     0)
250 |             else:  # if aggregating an empty set of models, predict zero everywhere
251 |                 resids_LOO[i] = np.abs(self.Y_train[i])
252 |                 muh_LOO_vals_testpoint[i] = np.zeros(n1)
253 |         ind_q = (np.ceil((1-alpha)*(n+1))).astype(int)
254 |         return pd.DataFrame(
255 |             np.c_[np.sort(muh_LOO_vals_testpoint.T - resids_LOO, axis=1).T[-ind_q],
256 |                   np.sort(muh_LOO_vals_testpoint.T + resids_LOO, axis=1).T[ind_q-1]],
257 |             columns=['lower', 'upper'])
258 | 
259 |     '''
260 |         Inductive Conformal Prediction
261 |     '''
262 | 
263 |     def compute_PIs_ICP(self, alpha, l):
264 |         n = len(self.X_train)
265 |         proper_train = np.random.choice(n, l, replace=False)
266 |         X_train = self.X_train[proper_train, :]
267 |         Y_train = self.Y_train[proper_train]
268 |         X_calibrate = np.delete(self.X_train, proper_train, axis=0)
269 |         Y_calibrate = np.delete(self.Y_train, proper_train)
270 |         model = self.regressor
271 |         if self.regressor.__class__.__name__ == 'Sequential':
272 |             callback = keras.callbacks.EarlyStopping(
273 |                 monitor='loss', patience=10)
274 |             bsize = int(0.1*X_train.shape[0])  # Was 0.1
275 |             if self.regressor.name == 'NeuralNet':
276 |                 model.fit(self.X_train, self.Y_train,
277 |                           epochs=250, batch_size=bsize, callbacks=[callback], verbose=0)
278 |             else:
279 |                 # This is RNN, mainly have different epochs
280 |                 model.fit(X_train, Y_train,
281 |                           epochs=10, batch_size=bsize, callbacks=[callback], verbose=0)
282 |             self.ICP_fitted_func.append(model)
283 |         else:
284 |             self.ICP_fitted_func.append(self.regressor.fit(X_train, Y_train))
285 |         predictions_calibrate = self.ICP_fitted_func[0].predict(
286 |             X_calibrate).flatten()
287 |         calibrate_residuals = np.abs(Y_calibrate-predictions_calibrate)
288 |         out_sample_predict = self.ICP_fitted_func[0].predict(
289 |             self.X_predict).flatten()
290 |         self.ICP_resid = np.append(
291 |             self.ICP_resid, calibrate_residuals)  # length n-l
292 |         ind_q = math.ceil(100*(1-alpha))  # 1-alpha%
293 |         width = np.abs(np.percentile(self.ICP_resid, ind_q, axis=-1).T)
294 |         PIs_ICP = pd.DataFrame(np.c_[out_sample_predict-width,
295 |                                      out_sample_predict+width], columns=['lower', 'upper'])
296 |         # print(time.time()-start)
297 |         return PIs_ICP
298 | 
299 |     def compute_PIs_ICP_online(self, alpha, l, stride=1):
300 |         n = len(self.X_train)
301 |         proper_train = np.random.choice(n, l, replace=False)
302 |         X_train = self.X_train[proper_train, :]
303 |         Y_train = self.Y_train[proper_train]
304 |         X_calibrate = np.delete(self.X_train, proper_train, axis=0)
305 |         Y_calibrate = np.delete(self.Y_train, proper_train)
306 |         model = self.regressor
307 |         if self.regressor.__class__.__name__ == 'Sequential':
308 |             callback = keras.callbacks.EarlyStopping(
309 |                 monitor='loss', patience=10)
310 |             bsize = int(0.1*X_train.shape[0])  # Was 0.1
311 |             if self.regressor.name == 'NeuralNet':
312 |                 model.fit(self.X_train, self.Y_train,
313 |                           epochs=250, batch_size=bsize, callbacks=[callback], verbose=0)
314 |             else:
315 |                 # This is RNN, mainly have different epochs
316 |                 model.fit(X_train, Y_train,
317 |                           epochs=10, batch_size=bsize, callbacks=[callback], verbose=0)
318 |             self.ICP_fitted_func.append(model)
319 |         else:
320 |             self.ICP_fitted_func.append(self.regressor.fit(X_train, Y_train))
321 |         predictions_calibrate = self.ICP_fitted_func[0].predict(
322 |             X_calibrate).flatten()
323 |         calibrate_residuals = np.abs(Y_calibrate-predictions_calibrate)
324 |         out_sample_predict = self.ICP_fitted_func[0].predict(
325 |             self.X_predict).flatten()
326 |         out_sample_residuals = np.abs(self.Y_predict-out_sample_predict)
327 |         self.ICP_resid = np.append(
328 |             self.ICP_resid, calibrate_residuals)  # length n-l
329 |         self.ICP_resid = np.append(
330 |             self.ICP_resid, out_sample_residuals)  # length n-l
331 |         resid_strided = util.strided_app(
332 |             self.ICP_resid[:-1], n-l, stride)
333 |         ind_q = math.ceil(100*(1-alpha))  # 1-alpha%
334 |         print(f'Shape of slided residual lists is {resid_strided.shape}')
335 |         num_unique_resid = resid_strided.shape[0]
336 |         width = np.zeros(num_unique_resid)
337 |         for i in range(num_unique_resid):
338 |             past_resid = resid_strided[i, :]
339 |             width[i] = np.percentile(past_resid, ind_q)
340 |         width = np.repeat(width, stride)
341 |         PIs_ICP = pd.DataFrame(np.c_[out_sample_predict-width,
342 |                                      out_sample_predict+width], columns=['lower', 'upper'])
343 |         return PIs_ICP
344 | 
345 |     '''
346 |         Weighted Conformal Prediction
347 |     '''
348 | 
349 |     def compute_PIs_Weighted_ICP(self, alpha, l):
350 |         '''The residuals are weighted by fitting a logistic regression on
351 |            (X_calibrate, C=0) \cup (X_predict, C=1'''
352 |         n = len(self.X_train)
353 |         n1 = len(self.X_predict)
354 |         proper_train = np.random.choice(n, l, replace=False)
355 |         X_train = self.X_train[proper_train, :]
356 |         Y_train = self.Y_train[proper_train]
357 |         X_calibrate = np.delete(self.X_train, proper_train, axis=0)
358 |         Y_calibrate = np.delete(self.Y_train, proper_train)
359 |         # Main difference from ICP
360 |         C_calibrate = np.zeros(n-l)
361 |         C_predict = np.ones(n1)
362 |         X_weight = np.r_[X_calibrate, self.X_predict]
363 |         C_weight = np.r_[C_calibrate, C_predict]
364 |         if len(X_weight.shape) > 2:
365 |             # Reshape for RNN
366 |             tot, _, shap = X_weight.shape
367 |             X_weight = X_weight.reshape((tot, shap))
368 |         clf = LogisticRegression(random_state=0).fit(X_weight, C_weight)
369 |         Prob = clf.predict_proba(X_weight)
370 |         Weights = Prob[:, 1]/(1-Prob[:, 0])  # n-l+n1 in length
371 |         model = self.regressor
372 |         if self.regressor.__class__.__name__ == 'Sequential':
373 |             callback = keras.callbacks.EarlyStopping(
374 |                 monitor='loss', patience=10)
375 |             bsize = int(0.1*X_train.shape[0])  # Was 0.1
376 |             if self.regressor.name == 'NeuralNet':
377 |                 model.fit(self.X_train, self.Y_train,
378 |                           epochs=250, batch_size=bsize, callbacks=[callback], verbose=0)
379 |             else:
380 |                 # This is RNN, mainly have different epochs
381 |                 model.fit(X_train, Y_train,
382 |                           epochs=10, batch_size=bsize, callbacks=[callback], verbose=0)
383 |             self.ICP_fitted_func.append(model)
384 |         else:
385 |             self.ICP_fitted_func.append(self.regressor.fit(X_train, Y_train))
386 |         predictions_calibrate = self.ICP_fitted_func[0].predict(
387 |             X_calibrate).flatten()
388 |         calibrate_residuals = np.abs(Y_calibrate-predictions_calibrate)
389 |         out_sample_predict = self.ICP_fitted_func[0].predict(
390 |             self.X_predict).flatten()
391 |         self.WeightCP_online_resid = np.append(
392 |             self.WeightCP_online_resid, calibrate_residuals)  # length n-1
393 |         width = np.abs(util.weighted_quantile(values=self.WeightCP_online_resid, quantiles=1-alpha,
394 |                                               sample_weight=Weights[:n-l]))
395 |         PIs_ICP = pd.DataFrame(np.c_[out_sample_predict-width,
396 |                                      out_sample_predict+width], columns=['lower', 'upper'])
397 |         # print(time.time()-start)
398 |         return PIs_ICP
399 | 
400 |     def compute_PIs_Weighted_ICP_online(self, alpha, l, stride=1):
401 |         '''The residuals are weighted by fitting a logistic regression on
402 |            (X_calibrate, C=0) \cup (X_predict, C=1'''
403 |         n = len(self.X_train)
404 |         n1 = len(self.X_predict)
405 |         proper_train = np.random.choice(n, l, replace=False)
406 |         X_train = self.X_train[proper_train, :]
407 |         Y_train = self.Y_train[proper_train]
408 |         X_calibrate = np.delete(self.X_train, proper_train, axis=0)
409 |         Y_calibrate = np.delete(self.Y_train, proper_train)
410 |         # Main difference from ICP
411 |         C_calibrate = np.zeros(n-l)
412 |         C_predict = np.ones(n1)
413 |         X_weight = np.r_[X_calibrate, self.X_predict]
414 |         C_weight = np.r_[C_calibrate, C_predict]
415 |         if len(X_weight.shape) > 2:
416 |             # Reshape for RNN
417 |             tot, _, shap = X_weight.shape
418 |             X_weight = X_weight.reshape((tot, shap))
419 |         clf = LogisticRegression(random_state=0).fit(X_weight, C_weight)
420 |         Prob = clf.predict_proba(X_weight)
421 |         Weights = Prob[:, 1]/(1-Prob[:, 0])  # n-l+n1 in length
422 |         model = self.regressor
423 |         if self.regressor.__class__.__name__ == 'Sequential':
424 |             callback = keras.callbacks.EarlyStopping(
425 |                 monitor='loss', patience=10)
426 |             bsize = int(0.1*X_train.shape[0])  # Was 0.1
427 |             if self.regressor.name == 'NeuralNet':
428 |                 model.fit(self.X_train, self.Y_train,
429 |                           epochs=250, batch_size=bsize, callbacks=[callback], verbose=0)
430 |             else:
431 |                 # This is RNN, mainly have different epochs
432 |                 model.fit(X_train, Y_train,
433 |                           epochs=10, batch_size=bsize, callbacks=[callback], verbose=0)
434 |             self.ICP_fitted_func.append(model)
435 |         else:
436 |             self.ICP_fitted_func.append(self.regressor.fit(X_train, Y_train))
437 |         predictions_calibrate = self.ICP_fitted_func[0].predict(
438 |             X_calibrate).flatten()
439 |         calibrate_residuals = np.abs(Y_calibrate-predictions_calibrate)
440 |         out_sample_predict = self.ICP_fitted_func[0].predict(
441 |             self.X_predict).flatten()
442 |         out_sample_residuals = np.abs(self.Y_predict-out_sample_predict)
443 |         self.WeightCP_online_resid = np.append(
444 |             self.WeightCP_online_resid, calibrate_residuals)  # length n-1
445 |         self.WeightCP_online_resid = np.append(
446 |             self.WeightCP_online_resid, out_sample_residuals)  # length n-1
447 |         resid_strided = util.strided_app(
448 |             self.WeightCP_online_resid[:-1], n-l, stride)
449 |         num_unique_resid = resid_strided.shape[0]
450 |         weight_strided = util.strided_app(
451 |             Weights[:-1], n-l, stride)
452 |         width = np.zeros(num_unique_resid)
453 |         for i in range(num_unique_resid):
454 |             past_resid = resid_strided[i, :]
455 |             past_weights = weight_strided[i, :]
456 |             width[i] = np.abs(util.weighted_quantile(values=past_resid,
457 |                                                      quantiles=1-alpha, sample_weight=past_weights))
458 |         width = np.repeat(width, stride)
459 |         PIs_WICP = pd.DataFrame(np.c_[out_sample_predict-width,
460 |                                       out_sample_predict+width], columns=['lower', 'upper'])
461 |         # print(time.time()-start)
462 |         return PIs_WICP
463 | 
464 |     def compute_PIs_tseries_online(self, alpha, name):
465 |         '''
466 |             Use train_size to form model and the rest to be out-sample-prediction
467 |         '''
468 |         # Concatenate training and testing together
469 |         data = pd.DataFrame(np.r_[self.Y_train, self.Y_predict])
470 |         # Train model
471 |         train_size = len(self.Y_train)
472 |         if name == 'ARIMA(10,1,10)':
473 |             training_mod = SARIMAX(data[:train_size], order=(10, 1, 10))
474 |             mod = SARIMAX(data, order=(10, 1, 10))
475 |         if name == 'ExpSmoothing':
476 |             training_mod = ExponentialSmoothing(
477 |                 data[:train_size], trend=True, damped_trend=True, seasonal=24)
478 |             mod = ExponentialSmoothing(
479 |                 data, trend=True, damped_trend=True, seasonal=24)
480 |         if name == 'DynamicFactor':
481 |             training_mod = DynamicFactorMQ(data[:train_size])
482 |             mod = DynamicFactorMQ(data)
483 |         print('training')
484 |         training_res = training_mod.fit(disp=0)
485 |         print('training done')
486 |         # Use in full model
487 |         res = mod.filter(training_res.params)
488 |         # Get the insample prediction interval (which is outsample prediction interval)
489 |         pred = res.get_prediction(
490 |             start=data.index[train_size], end=data.index[-1])
491 |         pred_int = pred.conf_int(alpha=alpha)  # prediction interval
492 |         PIs_res = pd.DataFrame(
493 |             np.c_[pred_int.iloc[:, 0], pred_int.iloc[:, 1]], columns=['lower', 'upper'])
494 |         return(PIs_res)
495 | 
496 |     def Winkler_score(self, PIs_ls, data_name, methods_name, alpha):
497 |         # Examine if each test point is in the intervals
498 |         # If in, then score += width of intervals
499 |         # If not,
500 |         # If true y falls under lower end, score += width + 2*(lower end-true y)/alpha
501 |         # If true y lies above upper end, score += width + 2*(true y-upper end)/alpha
502 |         n1 = len(self.Y_predict)
503 |         score_ls = []
504 |         for i in range(len(methods_name)):
505 |             score = 0
506 |             for j in range(n1):
507 |                 upper = PIs_ls[i].loc[j, 'upper']
508 |                 lower = PIs_ls[i].loc[j, 'lower']
509 |                 width = upper-lower
510 |                 truth = self.Y_predict[j]
511 |                 if (truth >= lower) & (truth <= upper):
512 |                     score += width
513 |                 elif truth < lower:
514 |                     score += width + 2 * (lower-truth)/alpha
515 |                 else:
516 |                     score += width + 2 * (truth-upper)/alpha
517 |             score_ls.append(score)
518 |         return(score_ls)
519 | 
520 |     '''
521 |         All together
522 |     '''
523 | 
524 |     def run_experiments(self, alpha, stride, data_name, itrial, true_Y_predict=[], get_plots=False, none_CP=False, methods=['Ensemble', 'ICP', 'Weighted_ICP'], smallT=False, non_EnbPI_online=False):
525 |         '''
526 |             NOTE: I added a "true_Y_predict" option, which will be used for calibrating coverage under missing data
527 |             In particular, this is needed when the Y_predict we use for training is NOT the same as true Y_predict
528 |             Update:
529 |                 smallT: bool, denotes whether we use ALL past LOO residuals or just a small set. Used in quickest detection (see Sec.6, Fig.7 in the paper)
530 |         '''
531 |         train_size = len(self.X_train)
532 |         np.random.seed(98765+itrial)
533 |         if none_CP:
534 |             results = pd.DataFrame(columns=['itrial', 'dataname',
535 |                                             'method', 'train_size', 'coverage', 'width'])
536 |             print('Not using Conformal Prediction Methods')
537 |             save_name = {'ARIMA(10,1,10)': 'ARIMA',
538 |                          'ExpSmoothing': 'ExpSmoothing',
539 |                          'DynamicFactor': 'DynamicFactor'}
540 |             PIs = []
541 |             for name in save_name.keys():
542 |                 print(f'Running {name}')
543 |                 PI_res = self.compute_PIs_tseries_online(alpha, name=name)
544 |                 if ('Solar' in data_name) | ('Wind' in data_name):
545 |                     PI_res['lower'] = np.maximum(PI_res['lower'], 0)
546 |                 coverage_res = ((np.array(PI_res['lower']) <= self.Y_predict) & (
547 |                     np.array(PI_res['upper']) >= self.Y_predict)).mean()
548 |                 print(f'Average Coverage is {coverage_res}')
549 |                 width_res = (PI_res['upper'] - PI_res['lower']).mean()
550 |                 print(f'Average Width is {width_res}')
551 |                 results.loc[len(results)] = [itrial, data_name, save_name[name],
552 |                                              train_size, coverage_res, width_res]
553 |                 PIs.append(PI_res)
554 |         else:
555 |             results = pd.DataFrame(columns=['itrial', 'dataname', 'muh_fun',
556 |                                             'method', 'train_size', 'coverage', 'width'])
557 |             PIs = []
558 |             for method in methods:
559 |                 print(f'Runnning {method}')
560 |                 if method == 'JaB':
561 |                     PI = self.compute_PIs_JaB(alpha)
562 |                 elif method == 'Ensemble':
563 |                     PI = eval(f'compute_PIs_{method}_online({alpha},{stride},{smallT})',
564 |                               globals(), {k: getattr(self, k) for k in dir(self)})
565 |                 else:
566 |                     l = math.ceil(0.5*len(self.X_train))
567 |                     if non_EnbPI_online:
568 |                         PI = eval(f'compute_PIs_{method}_online({alpha},{l},{stride})',
569 |                                   globals(), {k: getattr(self, k) for k in dir(self)})
570 |                     else:
571 |                         PI = eval(f'compute_PIs_{method}({alpha},{l})',
572 |                                   globals(), {k: getattr(self, k) for k in dir(self)})
573 |                 if ('Solar' in data_name) | ('Wind' in data_name):
574 |                     PI['lower'] = np.maximum(PI['lower'], 0)
575 |                 PIs.append(PI)
576 |                 coverage = ((np.array(PI['lower']) <= self.Y_predict) & (
577 |                     np.array(PI['upper']) >= self.Y_predict)).mean()
578 |                 if len(true_Y_predict) > 0:
579 |                     coverage = ((np.array(PI['lower']) <= true_Y_predict) & (
580 |                         np.array(PI['upper']) >= true_Y_predict)).mean()
581 |                 print(f'Average Coverage is {coverage}')
582 |                 width = (PI['upper'] - PI['lower']).mean()
583 |                 print(f'Average Width is {width}')
584 |                 results.loc[len(results)] = [itrial, data_name,
585 |                                              self.regressor.__class__.__name__, method, train_size, coverage, width]
586 |         if get_plots:
587 |             PIs.append(results)
588 |             return(PIs)
589 |         else:
590 |             return(results)
591 | 
592 | 
593 | class QOOB_or_adaptive_CI():
594 |     '''
595 |         Implementation of the QOOB method (Gupta et al., 2021) or the adaptive CI (Gibbs et al., 2022)
596 |     '''
597 | 
598 |     def __init__(self, fit_func, X_train, X_predict, Y_train, Y_predict):
599 |         self.regressor = fit_func
600 |         self.X_train = X_train
601 |         self.X_predict = X_predict
602 |         self.Y_train = Y_train
603 |         self.Y_predict = Y_predict
604 |     ##############################
605 |     # First on QOOB
606 | 
607 |     def fit_bootstrap_agg_get_lower_upper(self, B, beta_quantiles):
608 |         '''
609 |           Train B bootstrap estimators from subsets of (X_train, Y_train), compute aggregated predictors, compute scors r_i(X_i,Y_i), and finally get the intervals [l_i(X_n+j),u_i(X_n+j)] for each LOO predictor and the jth prediction in test sample
610 |         '''
611 |         n = len(self.X_train)
612 |         n1 = len(self.X_predict)
613 |         # hold indices of training data for each f^b
614 |         boot_samples_idx = util.generate_bootstrap_samples(n, n, B)
615 |         # hold lower and upper quantile predictions from each f^b
616 |         boot_predictions_lower = np.zeros((B, (n+n1)), dtype=float)
617 |         boot_predictions_upper = np.zeros((B, (n+n1)), dtype=float)
618 |         # for i^th column, it shows which f^b uses i in training (so exclude in aggregation)
619 |         in_boot_sample = np.zeros((B, n), dtype=bool)
620 |         out_sample_predict_lower = np.zeros((n, n1))
621 |         out_sample_predict_upper = np.zeros((n, n1))
622 |         start = time.time()
623 |         for b in range(B):
624 |             # Fit quantile random forests
625 |             model = self.regressor
626 |             model = model.fit(self.X_train[boot_samples_idx[b], :],
627 |                               self.Y_train[boot_samples_idx[b], ])
628 |             pred_boot = model.predict_quantiles(
629 |                 np.r_[self.X_train, self.X_predict], quantiles=beta_quantiles)
630 |             boot_predictions_lower[b] = pred_boot[:, 0]
631 |             boot_predictions_upper[b] = pred_boot[:, 1]
632 |             in_boot_sample[b, boot_samples_idx[b]] = True
633 |         print(
634 |             f'Finish Fitting B Bootstrap models, took {time.time()-start} secs.')
635 |         start = time.time()
636 |         self.QOOB_rXY = []  # the non-conformity scores
637 |         for i in range(n):
638 |             b_keep = np.argwhere(~(in_boot_sample[:, i])).reshape(-1)
639 |             if(len(b_keep) > 0):
640 |                 # NOTE: Append these training centers too see their magnitude
641 |                 # The reason is sometimes they are TOO close to actual Y.
642 |                 quantile_lower = boot_predictions_lower[b_keep, i].mean()
643 |                 quantile_upper = boot_predictions_upper[b_keep, i].mean()
644 |                 out_sample_predict_lower[i] = boot_predictions_lower[b_keep, n:].mean(
645 |                     0)
646 |                 out_sample_predict_upper[i] = boot_predictions_upper[b_keep, n:].mean(
647 |                     0)
648 |             else:  # if aggregating an empty set of models, predict zero everywhere
649 |                 print(f'No bootstrap estimator for {i}th LOO estimator')
650 |                 quantile_lower = np.percentile(
651 |                     self.Y_train, beta_quantiles[0]*100)
652 |                 quantile_upper = np.percentile(
653 |                     self.Y_train, beta_quantiles[1]*100)
654 |                 out_sample_predict_lower[i] = np.repeat(quantile_lower, n1)
655 |                 out_sample_predict_upper[i] = np.repeat(quantile_upper, n1)
656 |             self.QOOB_rXY.append(self.get_rXY(
657 |                 self.Y_train[i], quantile_lower, quantile_upper))
658 |         # print('Finish Computing QOOB training' +
659 |         #       r'$\{r_i(X_i,Y_i)\}_{i=1}^N$'+f', took {time.time()-start} secs.')
660 |         # Finally, subtract/add the QOOB_rXY from the predictions
661 |         self.QOOB_rXY = np.array(self.QOOB_rXY)
662 |         out_sample_predict_lower = (
663 |             out_sample_predict_lower.transpose()-self.QOOB_rXY).transpose()
664 |         out_sample_predict_upper = (
665 |             out_sample_predict_upper.transpose()+self.QOOB_rXY).transpose()
666 |         F_minus_i_out_sample = np.r_[
667 |             out_sample_predict_lower, out_sample_predict_upper]
668 |         return F_minus_i_out_sample  # Matrix of shape 2n-by-n1
669 | 
670 |     def compute_QOOB_intervals(self, data_name, itrial, B, alpha=0.1, get_plots=False):
671 |         results = pd.DataFrame(columns=['itrial', 'dataname', 'muh_fun',
672 |                                         'method', 'train_size', 'coverage', 'width'])
673 |         beta_quantiles = [alpha*2, 1-alpha*2]
674 |         # beta_quantiles = [alpha/2, 1-alpha/2]  # Even make thresholds smaller, still not good
675 |         F_minus_i_out_sample = self.fit_bootstrap_agg_get_lower_upper(
676 |             B, beta_quantiles)
677 |         n1 = F_minus_i_out_sample.shape[1]
678 |         PIs = []
679 |         for i in range(n1):
680 |             curr_lower_upper = F_minus_i_out_sample[:, i]
681 |             # print(f'Test point {i}')
682 |             PIs.append(self.get_lower_upper_n_plus_i(curr_lower_upper, alpha))
683 |         PIs = pd.DataFrame(PIs, columns=['lower', 'upper'])
684 |         self.PIs = PIs
685 |         if 'Solar' in data_name:
686 |             PIs['lower'] = np.maximum(PIs['lower'], 0)
687 |         coverage, width = util.ave_cov_width(PIs, self.Y_predict)
688 |         results.loc[len(results)] = [itrial, data_name,
689 |                                      self.regressor.__class__.__name__, 'QOOB', self.X_train.shape[0], coverage, width]
690 |         if get_plots:
691 |             return [PIs, results]
692 |         else:
693 |             return results
694 |     # QOOB helpers
695 | 
696 |     def get_rXY(self, Ytrain_i, quantile_lower, quantile_upper):
697 |         # Get r_i(X_i,Y_i) as in Eq. (2) of QOOB
698 |         if Ytrain_i < quantile_lower:
699 |             return quantile_lower-Ytrain_i
700 |         elif Ytrain_i > quantile_upper:
701 |             return Ytrain_i-quantile_upper  # There was a small error here
702 |         else:
703 |             return 0
704 | 
705 |     # AdaptCI helpers
706 |     def get_Ei(self, Ytrain_i, quantile_lower, quantile_upper):
707 |         return np.max([quantile_lower-Ytrain_i, Ytrain_i-quantile_upper])
708 | 
709 |     def get_lower_upper_n_plus_i(self, curr_lower_upper, alpha):
710 |         # This implements Algorithm 1 of QOOB
711 |         # See https://github.com/AIgen/QOOB/blob/master/MATLAB/methods/QOOB_interval.m for matlab implementation
712 |         n2 = len(curr_lower_upper)
713 |         n = int(n2/2)
714 |         S_ls = np.r_[np.repeat(1, n), np.repeat(0, n)]
715 |         idx_sort = np.argsort(curr_lower_upper)  # smallest to larget
716 |         S_ls = S_ls[idx_sort]
717 |         curr_lower_upper = curr_lower_upper[idx_sort]
718 |         count = 0
719 |         lower_i = np.inf
720 |         upper_i = -np.inf
721 |         threshold = alpha*(n+1)-1
722 |         for i in range(n2):
723 |             if S_ls[i] == 1:
724 |                 count += 1
725 |                 if count > threshold and count-1 <= threshold and lower_i == np.inf:
726 |                     lower_i = curr_lower_upper[i]
727 |                     # print(f'QOOB lower_end {lower_i}')
728 |             else:
729 |                 if count > threshold and count-1 <= threshold and upper_i == -np.inf:
730 |                     upper_i = curr_lower_upper[i]
731 |                     # print(f'QOOB upper_end {upper_i}')
732 |                 count -= 1
733 |         return [lower_i, upper_i]
734 | 
735 |     ##############################
736 |     # Next on AdaptiveCI
737 | 
738 |     def compute_AdaptiveCI_intervals(self, data_name, itrial, l, alpha=0.1, get_plots=False):
739 |         results = pd.DataFrame(columns=['itrial', 'dataname', 'muh_fun',
740 |                                         'method', 'train_size', 'coverage', 'width'])
741 |         n = len(self.X_train)
742 |         proper_train = np.arange(l)
743 |         X_train = self.X_train[proper_train, :]
744 |         Y_train = self.Y_train[proper_train]
745 |         X_calibrate = np.delete(self.X_train, proper_train, axis=0)
746 |         Y_calibrate = np.delete(self.Y_train, proper_train)
747 |         # NOTE: below works when the model can takes in MULTIPLE quantiles together (e.g., the RangerForest)
748 |         model = self.regressor
749 |         model = model.fit(X_train, Y_train)
750 |         quantile_pred = model.predict_quantiles(
751 |             np.r_[X_calibrate, self.X_predict], quantiles=[alpha/2, 1-alpha/2])
752 |         # NOTE: below works for sklearn linear quantile: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.QuantileRegressor.html#sklearn.linear_model.QuantileRegressor
753 |         # # In particular, it is much slower than the quantile RF with similar results
754 |         # model_l, model_u = self.regressor
755 |         # qpred_l, qpred_u = model_l.fit(X_train, Y_train).predict(np.r_[X_calibrate, self.X_predict]), model_u.fit(
756 |         #     X_train, Y_train).predict(np.r_[X_calibrate, self.X_predict])
757 |         # quantile_pred = np.c_[qpred_l, qpred_u]
758 |         self.quantile_pred = quantile_pred
759 |         Dcal_pred = quantile_pred[:n-l]
760 |         Test_pred = quantile_pred[n-l:]
761 |         # TODO: I guess I can use the QOOB idea, by using "get_rXY"
762 |         Dcal_scores = np.array([self.get_Ei(Ycal, quantile_lower, quantile_upper) for Ycal,
763 |                                 quantile_lower, quantile_upper in zip(Y_calibrate, Dcal_pred[:, 0], Dcal_pred[:, 1])])
764 |         self.Escore = Dcal_scores
765 |         # Sequentially get the intervals with adaptive alpha
766 |         alpha_t = alpha
767 |         errs = []
768 |         gamma = 0.005
769 |         method = 'simple'  # 'simple' or 'complex'
770 |         PIs = []
771 |         self.alphas = [alpha_t]
772 |         for t, preds in enumerate(Test_pred):
773 |             lower_pred, upper_pred = preds
774 |             width = np.percentile(Dcal_scores, 100*(1-alpha_t))
775 |             # print(f'At test time {t}')
776 |             # print(f'alpha={alpha_t} & width={width}')
777 |             lower_t, upper_t = lower_pred-width, upper_pred+width
778 |             PIs.append([lower_t, upper_t])
779 |             # Check coverage and update alpha_t
780 |             Y_t = self.Y_predict[t]
781 |             err = 1 if Y_t < lower_t or Y_t > upper_t else 0
782 |             errs.append(err)
783 |             alpha_t = util.adjust_alpha_t(alpha_t, alpha, errs, gamma, method)
784 |             if alpha_t > 1:
785 |                 alpha_t = 1
786 |             if alpha_t < 0:
787 |                 alpha_t = 0
788 |             self.alphas.append(alpha_t)
789 |         PIs = pd.DataFrame(PIs, columns=['lower', 'upper'])
790 |         if 'Solar' in data_name:
791 |             PIs['lower'] = np.maximum(PIs['lower'], 0)
792 |         self.errs = errs
793 |         self.PIs = PIs
794 |         coverage, width = util.ave_cov_width(PIs, self.Y_predict)
795 |         results.loc[len(results)] = [itrial, data_name,
796 |                                      self.regressor.__class__.__name__, 'Adaptive_CI', self.X_train.shape[0], coverage, width]
797 |         if get_plots:
798 |             return [PIs, results]
799 |         else:
800 |             return results
801 |         # TODO: I guess I can use the QOOB idea, by using "get_rXY"
802 |         Dcal_scores = np.array([self.get_Ei(Ycal, quantile_lower, quantile_upper) for Ycal,
803 |                                 quantile_lower, quantile_upper in zip(Y_calibrate, Dcal_pred[:, 0], Dcal_pred[:, 1])])
804 |         self.Escore = Dcal_scores
805 |         # Sequentially get the intervals with adaptive alpha
806 |         alpha_t = alpha
807 |         errs = []
808 |         gamma = 0.005
809 |         method = 'simple'  # 'simple' or 'complex'
810 |         PIs = []
811 |         self.alphas = [alpha_t]
812 |         for t, preds in enumerate(Test_pred):
813 |             lower_pred, upper_pred = preds
814 |             width = np.percentile(Dcal_scores, 100*(1-alpha_t))
815 |             # print(f'At test time {t}')
816 |             # print(f'alpha={alpha_t} & width={width}')
817 |             lower_t, upper_t = lower_pred-width, upper_pred+width
818 |             PIs.append([lower_t, upper_t])
819 |             # Check coverage and update alpha_t
820 |             Y_t = self.Y_predict[t]
821 |             err = 1 if Y_t < lower_t or Y_t > upper_t else 0
822 |             errs.append(err)
823 |             alpha_t = util.adjust_alpha_t(alpha_t, alpha, errs, gamma, method)
824 |             if alpha_t > 1:
825 |                 alpha_t = 1
826 |             if alpha_t < 0:
827 |                 alpha_t = 0
828 |             self.alphas.append(alpha_t)
829 |         PIs = pd.DataFrame(PIs, columns=['lower', 'upper'])
830 |         if 'Solar' in data_name:
831 |             PIs['lower'] = np.maximum(PIs['lower'], 0)
832 |         self.errs = errs
833 |         self.PIs = PIs
834 |         coverage, width = util.ave_cov_width(PIs, self.Y_predict)
835 |         results.loc[len(results)] = [itrial, data_name,
836 |                                      self.regressor.__class__.__name__, 'Adaptive_CI', self.X_train.shape[0], coverage, width]
837 |         if get_plots:
838 |             return [PIs, results]
839 |         else:
840 |             return results
841 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SPCI code
 2 |  Official implementation of the work [Sequential Predictive Conformal Inference for Time Series](https://openreview.net/forum?id=jJeY7w8YRz) (ICML 2023). [Slide](https://bpb-us-w2.wpmucdn.com/sites.gatech.edu/dist/9/2055/files/2023/06/SPCI_slide.pdf) and [Poster](https://bpb-us-w2.wpmucdn.com/sites.gatech.edu/dist/9/2055/files/2023/09/SPCI_poster.pdf) are also available.
 3 |  
 4 |  Please direct questions regarding implementation to cxu310@gatech.edu.
 5 |  
 6 |  See [tutorial_electric_EnbPI_SPCI.ipynb](https://github.com/hamrel-cxu/SPCI-code/blob/main/tutorial_electric_EnbPI_SPCI.ipynb) for comparing SPCI against [EnbPI](https://ieeexplore.ieee.org/abstract/document/10121511), which is an earlier method of ours. We demonstrate significant reduction in interval width on the electric dataset, which is also used in [Nex-CP](https://arxiv.org/abs/2202.13415) (Barber et al., 2022).
 7 | 
 8 |  Installation of dependency:
 9 | 
10 |  ```
11 | pip install -r requirements.txt
12 | ```
13 |  
14 |  If you find our work useful, please consider citing it.
15 |  ```
16 | @InProceedings{xu2023SPCI,
17 |   title = 	 {Sequential Predictive Conformal Inference for Time Series},
18 |   author =       {Xu, Chen and Xie, Yao},
19 |   booktitle = 	 {Proceedings of the 40th International Conference on Machine Learning},
20 |   pages = 	 {38707--38727},
21 |   year = 	 {2023},
22 |   editor = 	 {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan},
23 |   volume = 	 {202},
24 |   series = 	 {Proceedings of Machine Learning Research},
25 |   month = 	 {23--29 Jul},
26 |   publisher =    {PMLR},
27 |   pdf = 	 {https://proceedings.mlr.press/v202/xu23r/xu23r.pdf},
28 |   url = 	 {https://proceedings.mlr.press/v202/xu23r.html}
29 | }
30 |  ```
31 | 


--------------------------------------------------------------------------------
/SPCI_class.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import importlib as ipb
  3 | import pandas as pd
  4 | import numpy as np
  5 | import math
  6 | import time as time
  7 | import utils_SPCI as utils
  8 | import warnings
  9 | import torch
 10 | import pickle
 11 | import matplotlib.pyplot as plt
 12 | from sklearn.ensemble import RandomForestRegressor
 13 | from sklearn.linear_model import LinearRegression
 14 | import pdb
 15 | import data
 16 | import torch.nn as nn
 17 | from sklearn_quantile import RandomForestQuantileRegressor, SampleRandomForestQuantileRegressor
 18 | from numpy.lib.stride_tricks import sliding_window_view
 19 | # from neuralprophet import NeuralProphet
 20 | from skranger.ensemble import RangerForestRegressor
 21 | warnings.filterwarnings("ignore")
 22 | # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 23 | device = torch.device("cpu")
 24 | 
 25 | #### Main Class ####
 26 | 
 27 | 
 28 | def detach_torch(input):
 29 |     return input.cpu().detach().numpy()
 30 | 
 31 | 
 32 | class SPCI_and_EnbPI():
 33 |     '''
 34 |         Create prediction intervals assuming Y_t = f(X_t) + \sigma(X_t)\eps_t
 35 |         Currently, assume the regression function is by default MLP implemented with PyTorch, as it needs to estimate BOTH f(X_t) and \sigma(X_t), where the latter is impossible to estimate using scikit-learn modules
 36 | 
 37 |         Most things carry out, except that we need to have different estimators for f and \sigma.
 38 | 
 39 |         fit_func = None: use MLP above
 40 |     '''
 41 | 
 42 |     def __init__(self, X_train, X_predict, Y_train, Y_predict, fit_func=None):
 43 |         self.regressor = fit_func
 44 |         self.X_train = X_train
 45 |         self.X_predict = X_predict
 46 |         self.Y_train = Y_train
 47 |         self.Y_predict = Y_predict
 48 |         # Predicted training data centers by EnbPI
 49 |         n, n1 = len(self.X_train), len(self.X_predict)
 50 |         self.Ensemble_train_interval_centers = np.ones(n)*np.inf
 51 |         self.Ensemble_train_interval_sigma = np.ones(n)*np.inf
 52 |         # Predicted test data centers by EnbPI
 53 |         self.Ensemble_pred_interval_centers = np.ones(n1)*np.inf
 54 |         self.Ensemble_pred_interval_sigma = np.ones(n1)*np.inf
 55 |         self.Ensemble_online_resid = np.ones(n+n1)*np.inf  # LOO scores
 56 |         self.beta_hat_bins = []
 57 |         ### New ones under development ###
 58 |         self.use_NeuralProphet = False
 59 |         #### Other hyperparameters for training (mostly simulation) ####
 60 |         # point predictor \hat f
 61 |         self.use_WLS = True # Whether to use WLS for fitting (compare with Nex-CP)
 62 |         self.WLS_c = 0.99
 63 |         # QRF training & how it treats the samples
 64 |         self.weigh_residuals = False # Whether we weigh current residuals more.
 65 |         self.c = 0.995 # If self.weight_residuals, weights[s] = self.c ** s, s\geq 0
 66 |         self.n_estimators = 10 # Num trees for QRF
 67 |         self.max_d = 2 # Max depth for fitting QRF
 68 |         self.criterion = 'mse' # 'mse' or 'mae'
 69 |         # search of \beta^* \in [0,\alpha]
 70 |         self.bins = 5 # break [0,\alpha] into bins
 71 |         # how many LOO training residuals to use for training current QRF 
 72 |         self.T1 = None # None = use all
 73 |     def one_boot_prediction(self, Xboot, Yboot, Xfull):
 74 |         if self.use_NeuralProphet:
 75 |             '''
 76 |                 Added NeuralPropeht in
 77 |                 Note, hyperparameters not tuned yet
 78 |             '''
 79 |             Xboot, Yboot = detach_torch(Xboot), detach_torch(Yboot)
 80 |             nlags = 1
 81 |             model = NeuralProphet(
 82 |                 n_forecasts=1,
 83 |                 n_lags=nlags,
 84 |             )
 85 |             df_tmp, _ = utils.make_NP_df(Xboot, Yboot)
 86 |             model = model.add_lagged_regressor(names=self.Xnames)
 87 |             _ = model.fit(df_tmp, freq="D")  # Also outputs the metrics
 88 |             boot_pred = model.predict(self.df_full)['yhat1'].to_numpy()
 89 |             boot_pred[:nlags] = self.Y_train[:nlags]
 90 |             boot_pred = boot_pred.astype(np.float)
 91 |             boot_fX_pred = torch.from_numpy(boot_pred.flatten()).to(device)
 92 |             boot_sigma_pred = 0
 93 |         else:
 94 |             if self.regressor.__class__.__name__ == 'NoneType':
 95 |                 start1 = time.time()
 96 |                 model_f = MLP(self.d).to(device)
 97 |                 optimizer_f = torch.optim.Adam(
 98 |                     model_f.parameters(), lr=1e-3)
 99 |                 if self.fit_sigmaX:
100 |                     model_sigma = MLP(self.d, sigma=True).to(device)
101 |                     optimizer_sigma = torch.optim.Adam(
102 |                         model_sigma.parameters(), lr=2e-3)
103 |                 for epoch in range(300):
104 |                     fXhat = model_f(Xboot)
105 |                     sigmaXhat = torch.ones(len(fXhat)).to(device)
106 |                     if self.fit_sigmaX:
107 |                         sigmaXhat = model_sigma(Xboot)
108 |                     loss = ((Yboot - fXhat)
109 |                             / sigmaXhat).pow(2).mean() / 2
110 |                     optimizer_f.zero_grad()
111 |                     if self.fit_sigmaX:
112 |                         optimizer_sigma.zero_grad()
113 |                     loss.backward()
114 |                     optimizer_f.step()
115 |                     if self.fit_sigmaX:
116 |                         optimizer_sigma.step()
117 |                 with torch.no_grad():
118 |                     boot_fX_pred = model_f(
119 |                         Xfull).flatten().cpu().detach().numpy()
120 |                     boot_sigma_pred = 0
121 |                     if self.fit_sigmaX:
122 |                         boot_sigma_pred = model_sigma(
123 |                             Xfull).flatten().cpu().detach().numpy()
124 |                 print(
125 |                     f'Took {time.time()-start1} secs to finish the {self.b}th boostrap model')
126 |             else:
127 |                 Xboot, Yboot = detach_torch(Xboot), detach_torch(Yboot)
128 |                 Xfull = detach_torch(Xfull)
129 |                 # NOTE, NO sigma estimation because these methods by deFAULT are fitting Y, but we have no observation of errors
130 |                 model = self.regressor
131 |                 if self.use_WLS and isinstance(model,LinearRegression):
132 |                     # To compare with Nex-CP when using WLS
133 |                     # Taken from Nex-CP code
134 |                     n = len(Xboot)
135 |                     tags=self.WLS_c**(np.arange(n,0,-1))
136 |                     model.fit(Xboot, Yboot, sample_weight=tags)
137 |                 else:
138 |                     model.fit(Xboot, Yboot)
139 |                 boot_fX_pred = torch.from_numpy(
140 |                     model.predict(Xfull).flatten()).to(device)
141 |                 boot_sigma_pred = 0
142 |             return boot_fX_pred, boot_sigma_pred
143 | 
144 |     def fit_bootstrap_models_online_multistep(self, B, fit_sigmaX=True, stride=1):
145 |         '''
146 |           Train B bootstrap estimators from subsets of (X_train, Y_train), compute aggregated predictors, and compute the residuals
147 |           fit_sigmaX: If False, just avoid predicting \sigma(X_t) by defaulting it to 1
148 | 
149 |           stride: int. If > 1, then we perform multi-step prediction, where we have to fit stride*B boostrap predictors.
150 |             Idea: train on (X_i,Y_i), i=1,...,n-stride
151 |             Then predict on X_1,X_{1+s},...,X_{1+k*s} where 1+k*s <= n+n1
152 |             Note, when doing LOO prediction thus only care above the points above
153 |         '''
154 |         n, self.d = self.X_train.shape
155 |         self.fit_sigmaX = fit_sigmaX
156 |         n1 = len(self.X_predict)
157 |         N = n-stride+1  # Total training data each one-step predictor sees
158 |         # We make prediction every s step ahead, so these are feature the model sees
159 |         train_pred_idx = np.arange(0, n, stride)
160 |         # We make prediction every s step ahead, so these are feature the model sees
161 |         test_pred_idx = np.arange(n, n+n1, stride)
162 |         self.train_idx = train_pred_idx
163 |         self.test_idx = test_pred_idx
164 |         # Only contains features that are observed every stride steps
165 |         Xfull = torch.vstack(
166 |             [self.X_train[train_pred_idx], self.X_predict[test_pred_idx-n]])
167 |         nsub, n1sub = len(train_pred_idx), len(test_pred_idx)
168 |         for s in range(stride):
169 |             ''' 1. Create containers for predictions '''
170 |             # hold indices of training data for each f^b
171 |             boot_samples_idx = utils.generate_bootstrap_samples(N, N, B)
172 |             # for i^th column, it shows which f^b uses i in training (so exclude in aggregation)
173 |             in_boot_sample = np.zeros((B, N), dtype=bool)
174 |             # hold predictions from each f^b for fX and sigma&b for sigma
175 |             boot_predictionsFX = np.zeros((B, nsub+n1sub))
176 |             boot_predictionsSigmaX = np.ones((B, nsub+n1sub))
177 |             # We actually would just use n1sub rows, as we only observe this number of features
178 |             out_sample_predictFX = np.zeros((n, n1sub))
179 |             out_sample_predictSigmaX = np.ones((n, n1sub))
180 | 
181 |             ''' 2. Start bootstrap prediction '''
182 |             start = time.time()
183 |             if self.use_NeuralProphet:
184 |                 self.df_full, self.Xnames = utils.make_NP_df(
185 |                     Xfull, np.zeros(n + n1))
186 |             for b in range(B):
187 |                 self.b = b
188 |                 Xboot, Yboot = self.X_train[boot_samples_idx[b],
189 |                                             :], self.Y_train[s:s+N][boot_samples_idx[b], ]
190 |                 in_boot_sample[b, boot_samples_idx[b]] = True
191 |                 boot_fX_pred, boot_sigma_pred = self.one_boot_prediction(
192 |                     Xboot, Yboot, Xfull)
193 |                 boot_predictionsFX[b] = boot_fX_pred
194 |                 if self.fit_sigmaX:
195 |                     boot_predictionsSigmaX[b] = boot_sigma_pred
196 |             print(
197 |                 f'{s+1}/{stride} multi-step: finish Fitting {B} Bootstrap models, took {time.time()-start} secs.')
198 | 
199 |             ''' 3. Obtain LOO residuals (train and test) and prediction for test data '''
200 |             start = time.time()
201 |             # Consider LOO, but here ONLY for the indices being predicted
202 |             for j, i in enumerate(train_pred_idx):
203 |                 # j: counter and i: actual index X_{0+j*stride}
204 |                 if i < N:
205 |                     b_keep = np.argwhere(
206 |                         ~(in_boot_sample[:, i])).reshape(-1)
207 |                     if len(b_keep) == 0:
208 |                         # All bootstrap estimators are trained on this model
209 |                         b_keep = 0  # More rigorously, it should be None, but in practice, the difference is minor
210 |                 else:
211 |                     # This feature is not used in training, but used in prediction
212 |                     b_keep = range(B)
213 |                 pred_iFX = boot_predictionsFX[b_keep, j].mean()
214 |                 pred_iSigmaX = boot_predictionsSigmaX[b_keep, j].mean()
215 |                 pred_testFX = boot_predictionsFX[b_keep, nsub:].mean(0)
216 |                 pred_testSigmaX = boot_predictionsSigmaX[b_keep, nsub:].mean(0)
217 |                 # Populate the training prediction
218 |                 # We add s because of multi-step procedure, so f(X_t) is for Y_t+s
219 |                 true_idx = min(i+s, n-1)
220 |                 self.Ensemble_train_interval_centers[true_idx] = pred_iFX
221 |                 self.Ensemble_train_interval_sigma[true_idx] = pred_iSigmaX
222 |                 resid_LOO = (detach_torch(
223 |                     self.Y_train[true_idx]) - pred_iFX) / pred_iSigmaX
224 |                 out_sample_predictFX[i] = pred_testFX
225 |                 out_sample_predictSigmaX[i] = pred_testSigmaX
226 |                 self.Ensemble_online_resid[true_idx] = resid_LOO.item()
227 |             sorted_out_sample_predictFX = out_sample_predictFX[train_pred_idx].mean(
228 |                 0)  # length ceil(n1/stride)
229 |             sorted_out_sample_predictSigmaX = out_sample_predictSigmaX[train_pred_idx].mean(
230 |                 0)  # length ceil(n1/stride)
231 |             pred_idx = np.minimum(test_pred_idx-n+s, n1-1)
232 |             self.Ensemble_pred_interval_centers[pred_idx] = sorted_out_sample_predictFX
233 |             self.Ensemble_pred_interval_sigma[pred_idx] = sorted_out_sample_predictSigmaX
234 |             pred_full_idx = np.minimum(test_pred_idx+s, n+n1-1)
235 |             resid_out_sample = (
236 |                 detach_torch(self.Y_predict[pred_idx]) - sorted_out_sample_predictFX) / sorted_out_sample_predictSigmaX
237 |             self.Ensemble_online_resid[pred_full_idx] = resid_out_sample
238 |         # Sanity check
239 |         num_inf = (self.Ensemble_online_resid == np.inf).sum()
240 |         if num_inf > 0:
241 |             print(
242 |                 f'Something can be wrong, as {num_inf}/{n+n1} residuals are not all computed')
243 |             print(np.where(self.Ensemble_online_resid == np.inf))
244 | 
245 |     def compute_PIs_Ensemble_online(self, alpha, stride=1, smallT=True, past_window=100, use_SPCI=False, quantile_regr='RF'):
246 |         '''
247 |             stride: control how many steps we predict ahead
248 |             smallT: if True, we would only start with the last n number of LOO residuals, rather than use the full length T ones. Used in change detection
249 |                 NOTE: smallT can be important if time-series is very dynamic, in which case training MORE data may actaully be worse (because quantile longer)
250 |                 HOWEVER, if fit quantile regression, set it to be FALSE because we want to have many training pts for the quantile regressor
251 |             use_SPCI: if True, we fit conditional quantile to compute the widths, rather than simply using empirical quantile
252 |         '''
253 |         self.alpha = alpha
254 |         n1 = len(self.X_train)
255 |         self.past_window = past_window # For SPCI, this is the "lag" for predicting quantile
256 |         if smallT:
257 |             # Namely, for special use of EnbPI, only use at most past_window number of LOO residuals.
258 |             n1 = min(self.past_window, len(self.X_train))
259 |         # Now f^b and LOO residuals have been constructed from earlier
260 |         out_sample_predict = self.Ensemble_pred_interval_centers
261 |         out_sample_predictSigmaX = self.Ensemble_pred_interval_sigma
262 |         start = time.time()
263 |         # Matrix, where each row is a UNIQUE slice of residuals with length stride.
264 |         if use_SPCI:
265 |             s = stride
266 |             stride = 1
267 |         # NOTE, NOT ALL rows are actually "observable" in multi-step context, as this is rolling
268 |         resid_strided = utils.strided_app(
269 |             self.Ensemble_online_resid[len(self.X_train) - n1:-1], n1, stride)
270 |         print(f'Shape of slided residual lists is {resid_strided.shape}')
271 |         num_unique_resid = resid_strided.shape[0]
272 |         width_left = np.zeros(num_unique_resid)
273 |         width_right = np.zeros(num_unique_resid)
274 |         # # NEW, alpha becomes alpha_t. Uncomment things below if we decide to use this upgraded EnbPI
275 |         # alpha_t = alpha
276 |         # errs = []
277 |         # gamma = 0.005
278 |         # method = 'simple'  # 'simple' or 'complex'
279 |         # self.alphas = []
280 |         # NOTE: 'max_features='log2', max_depth=2' make the model "simpler", which improves performance in practice
281 |         self.QRF_ls = []
282 |         self.i_star_ls = []
283 |         for i in range(num_unique_resid):
284 |             curr_SigmaX = out_sample_predictSigmaX[i].item()
285 |             if use_SPCI:
286 |                 remainder = i % s
287 |                 if remainder == 0:
288 |                     # Update QRF
289 |                     past_resid = resid_strided[i, :]
290 |                     n2 = self.past_window
291 |                     resid_pred = self.multi_step_QRF(past_resid, i, s, n2)
292 |                 # Use the fitted regressor.
293 |                 # NOTE, residX is NOT the same as before, as it depends on
294 |                 # "past_resid", which has most entries replaced.
295 |                 rfqr= self.QRF_ls[remainder]
296 |                 i_star = self.i_star_ls[remainder]
297 |                 wid_all = rfqr.predict(resid_pred)
298 |                 num_mid = int(len(wid_all)/2)
299 |                 wid_left = wid_all[i_star]
300 |                 wid_right = wid_all[num_mid+i_star]
301 |                 width_left[i] = curr_SigmaX * wid_left
302 |                 width_right[i] = curr_SigmaX * wid_right
303 |                 num_print = int(num_unique_resid / 20)
304 |                 if num_print == 0:
305 |                     print(
306 |                             f'Width at test {i} is {width_right[i]-width_left[i]}')
307 |                 else:
308 |                     if i % num_print == 0:
309 |                         print(
310 |                             f'Width at test {i} is {width_right[i]-width_left[i]}')
311 |             else:
312 |                 past_resid = resid_strided[i, :]
313 |                 # Naive empirical quantile, where we use the SAME residuals for multi-step prediction
314 |                 # The number of bins will be determined INSIDE binning
315 |                 beta_hat_bin = utils.binning(past_resid, alpha)
316 |                 # beta_hat_bin = utils.binning(past_resid, alpha_t)
317 |                 self.beta_hat_bins.append(beta_hat_bin)
318 |                 width_left[i] = curr_SigmaX * np.percentile(
319 |                     past_resid, math.ceil(100 * beta_hat_bin))
320 |                 width_right[i] = curr_SigmaX * np.percentile(
321 |                     past_resid, math.ceil(100 * (1 - alpha + beta_hat_bin)))
322 |         print(
323 |             f'Finish Computing {num_unique_resid} UNIQUE Prediction Intervals, took {time.time()-start} secs.')
324 |         Ntest = len(out_sample_predict)
325 |         # This is because |width|=T1/stride.
326 |         width_left = np.repeat(width_left, stride)[:Ntest]
327 |         # This is because |width|=T1/stride.
328 |         width_right = np.repeat(width_right, stride)[:Ntest]
329 |         PIs_Ensemble = pd.DataFrame(np.c_[out_sample_predict + width_left,
330 |                                           out_sample_predict + width_right], columns=['lower', 'upper'])
331 |         self.PIs_Ensemble = PIs_Ensemble
332 |     '''
333 |         Get Multi-step QRF
334 |     '''
335 | 
336 |     def multi_step_QRF(self, past_resid, i, s, n2):
337 |         '''
338 |             Train multi-step QRF with the most recent residuals
339 |             i: prediction index
340 |             s: num of multi-step, same as stride
341 |             n2: past window w
342 |         '''
343 |         # 1. Get "past_resid" into an auto-regressive fashion
344 |         # This should be more carefully examined, b/c it depends on how long \hat{\eps}_t depends on the past
345 |         # From practice, making it small make intervals wider
346 |         num = len(past_resid)
347 |         resid_pred = past_resid[-n2:].reshape(1, -1)
348 |         residX = sliding_window_view(past_resid[:num-s+1], window_shape=n2)
349 |         for k in range(s):
350 |             residY = past_resid[n2+k:num-(s-k-1)]
351 |             self.train_QRF(residX, residY)
352 |             if i == 0:
353 |                 # Initial training, append QRF to QRF_ls
354 |                 self.QRF_ls.append(self.rfqr)
355 |                 self.i_star_ls.append(self.i_star)
356 |             else:
357 |                 # Retraining, update QRF to QRF_ls
358 |                 self.QRF_ls[k] = self.rfqr
359 |                 self.i_star_ls[k] = self.i_star
360 |         return resid_pred
361 | 
362 |     def train_QRF(self, residX, residY):
363 |         alpha = self.alpha
364 |         beta_ls = np.linspace(start=0, stop=alpha, num=self.bins)
365 |         full_alphas = np.append(beta_ls, 1 - alpha + beta_ls)
366 |         self.common_params = dict(n_estimators = self.n_estimators,
367 |                                   max_depth = self.max_d,
368 |                                   criterion = self.criterion,
369 |                                   n_jobs = -1)
370 |         if residX[:-1].shape[0] > 10000:
371 |             # see API ref. https://sklearn-quantile.readthedocs.io/en/latest/generated/sklearn_quantile.RandomForestQuantileRegressor.html?highlight=RandomForestQuantileRegressor#sklearn_quantile.RandomForestQuantileRegressor
372 |             # NOTE, should NOT warm start, as it makes result poor
373 |             self.rfqr = SampleRandomForestQuantileRegressor(
374 |                 **self.common_params, q=full_alphas)
375 |         else:
376 |             self.rfqr = RandomForestQuantileRegressor(
377 |                 **self.common_params, q=full_alphas)
378 |         # 3. Find best \hat{\beta} via evaluating many quantiles
379 |         # rfqr.fit(residX[:-1], residY)
380 |         sample_weight = None
381 |         if self.weigh_residuals:
382 |             sample_weight = self.c ** np.arange(len(residY), 0, -1)
383 |         if self.T1 is not None:
384 |             self.T1 = min(self.T1, len(residY)) # Sanity check to make sure no errors in training
385 |             self.i_star, _, _, _ = utils.binning_use_RF_quantile_regr(
386 |                 self.rfqr, residX[-(self.T1+1):-1], residY[-self.T1:], residX[-1], beta_ls, sample_weight)
387 |         else:
388 |             self.i_star, _, _, _ = utils.binning_use_RF_quantile_regr(
389 |                 self.rfqr, residX[:-1], residY, residX[-1], beta_ls, sample_weight)
390 |     '''
391 |         All together
392 |     '''
393 | 
394 |     def get_results(self, alpha, data_name, itrial, true_Y_predict=[], method='Ensemble'):
395 |         '''
396 |             NOTE: I added a "true_Y_predict" option, which will be used for calibrating coverage under missing data
397 |             In particular, this is needed when the Y_predict we use for training is NOT the same as true Y_predict
398 |         '''
399 |         results = pd.DataFrame(columns=['itrial', 'dataname', 'muh_fun',
400 |                                         'method', 'train_size', 'coverage', 'width'])
401 |         train_size = len(self.X_train)
402 |         if method == 'Ensemble':
403 |             PI = self.PIs_Ensemble
404 |         Ytest = self.Y_predict.cpu().detach().numpy()
405 |         coverage = ((np.array(PI['lower']) <= Ytest) & (
406 |             np.array(PI['upper']) >= Ytest)).mean()
407 |         if len(true_Y_predict) > 0:
408 |             coverage = ((np.array(PI['lower']) <= true_Y_predict) & (
409 |                 np.array(PI['upper']) >= true_Y_predict)).mean()
410 |         print(f'Average Coverage is {coverage}')
411 |         width = (PI['upper'] - PI['lower']).mean()
412 |         print(f'Average Width is {width}')
413 |         results.loc[len(results)] = [itrial, data_name,
414 |                                      'torch_MLP', method, train_size, coverage, width]
415 |         return results
416 | 
417 | 
418 | class MLP(nn.Module):
419 |     def __init__(self, d, sigma=False):
420 |         super(MLP, self).__init__()
421 |         H = 64
422 |         layers = [nn.Linear(d, H), nn.ReLU(), nn.Linear(
423 |             H, H), nn.ReLU(), nn.Linear(H, 1)]
424 |         self.sigma = sigma
425 |         if self.sigma:
426 |             layers.append(nn.ReLU())
427 |         self.layers = nn.Sequential(*layers)
428 | 
429 |     def forward(self, x):
430 |         perturb = 1e-3 if self.sigma else 0
431 |         return self.layers(x) + perturb
432 | 
433 | 
434 | #### Competing Methods ####
435 | 
436 | 
437 | class QOOB_or_adaptive_CI():
438 |     '''
439 |         Implementation of the QOOB method (Gupta et al., 2021) or the adaptive CI (Gibbs et al., 2022)
440 |     '''
441 | 
442 |     def __init__(self, fit_func, X_train, X_predict, Y_train, Y_predict):
443 |         self.regressor = fit_func
444 |         self.X_train = X_train
445 |         self.X_predict = X_predict
446 |         self.Y_train = Y_train
447 |         self.Y_predict = Y_predict
448 |     ##############################
449 |     # First on QOOB
450 | 
451 |     def fit_bootstrap_agg_get_lower_upper(self, B, beta_quantiles):
452 |         '''
453 |           Train B bootstrap estimators from subsets of (X_train, Y_train), compute aggregated predictors, compute scors r_i(X_i,Y_i), and finally get the intervals [l_i(X_n+j),u_i(X_n+j)] for each LOO predictor and the jth prediction in test sample
454 |         '''
455 |         n = len(self.X_train)
456 |         n1 = len(self.X_predict)
457 |         # hold indices of training data for each f^b
458 |         boot_samples_idx = utils.generate_bootstrap_samples(n, n, B)
459 |         # hold lower and upper quantile predictions from each f^b
460 |         boot_predictions_lower = np.zeros((B, (n + n1)), dtype=float)
461 |         boot_predictions_upper = np.zeros((B, (n + n1)), dtype=float)
462 |         # for i^th column, it shows which f^b uses i in training (so exclude in aggregation)
463 |         in_boot_sample = np.zeros((B, n), dtype=bool)
464 |         out_sample_predict_lower = np.zeros((n, n1))
465 |         out_sample_predict_upper = np.zeros((n, n1))
466 |         start = time.time()
467 |         for b in range(B):
468 |             # Fit quantile random forests
469 |             model = self.regressor
470 |             model = model.fit(self.X_train[boot_samples_idx[b], :],
471 |                               self.Y_train[boot_samples_idx[b], ])
472 |             pred_boot = model.predict_quantiles(
473 |                 np.r_[self.X_train, self.X_predict], quantiles=beta_quantiles)
474 |             boot_predictions_lower[b] = pred_boot[:, 0]
475 |             boot_predictions_upper[b] = pred_boot[:, 1]
476 |             in_boot_sample[b, boot_samples_idx[b]] = True
477 |         print(
478 |             f'Finish Fitting B Bootstrap models, took {time.time()-start} secs.')
479 |         start = time.time()
480 |         self.QOOB_rXY = []  # the non-conformity scores
481 |         for i in range(n):
482 |             b_keep = np.argwhere(~(in_boot_sample[:, i])).reshape(-1)
483 |             if(len(b_keep) > 0):
484 |                 # NOTE: Append these training centers too see their magnitude
485 |                 # The reason is sometimes they are TOO close to actual Y.
486 |                 quantile_lower = boot_predictions_lower[b_keep, i].mean()
487 |                 quantile_upper = boot_predictions_upper[b_keep, i].mean()
488 |                 out_sample_predict_lower[i] = boot_predictions_lower[b_keep, n:].mean(
489 |                     0)
490 |                 out_sample_predict_upper[i] = boot_predictions_upper[b_keep, n:].mean(
491 |                     0)
492 |             else:  # if aggregating an empty set of models, predict zero everywhere
493 |                 print(f'No bootstrap estimator for {i}th LOO estimator')
494 |                 quantile_lower = np.percentile(
495 |                     self.Y_train, beta_quantiles[0] * 100)
496 |                 quantile_upper = np.percentile(
497 |                     self.Y_train, beta_quantiles[1] * 100)
498 |                 out_sample_predict_lower[i] = np.repeat(quantile_lower, n1)
499 |                 out_sample_predict_upper[i] = np.repeat(quantile_upper, n1)
500 |             self.QOOB_rXY.append(self.get_rXY(
501 |                 self.Y_train[i], quantile_lower, quantile_upper))
502 |         # print('Finish Computing QOOB training' +
503 |         #       r'$\{r_i(X_i,Y_i)\}_{i=1}^N$'+f', took {time.time()-start} secs.')
504 |         # Finally, subtract/add the QOOB_rXY from the predictions
505 |         self.QOOB_rXY = np.array(self.QOOB_rXY)
506 |         out_sample_predict_lower = (
507 |             out_sample_predict_lower.transpose() - self.QOOB_rXY).transpose()
508 |         out_sample_predict_upper = (
509 |             out_sample_predict_upper.transpose() + self.QOOB_rXY).transpose()
510 |         F_minus_i_out_sample = np.r_[
511 |             out_sample_predict_lower, out_sample_predict_upper]
512 |         return F_minus_i_out_sample  # Matrix of shape 2n-by-n1
513 | 
514 |     def compute_QOOB_intervals(self, data_name, itrial, B, alpha=0.1, get_plots=False):
515 |         results = pd.DataFrame(columns=['itrial', 'dataname', 'muh_fun',
516 |                                         'method', 'train_size', 'coverage', 'width'])
517 |         beta_quantiles = [alpha * 2, 1 - alpha * 2]
518 |         # beta_quantiles = [alpha/2, 1-alpha/2]  # Even make thresholds smaller, still not good
519 |         F_minus_i_out_sample = self.fit_bootstrap_agg_get_lower_upper(
520 |             B, beta_quantiles)
521 |         n1 = F_minus_i_out_sample.shape[1]
522 |         PIs = []
523 |         for i in range(n1):
524 |             curr_lower_upper = F_minus_i_out_sample[:, i]
525 |             # print(f'Test point {i}')
526 |             PIs.append(self.get_lower_upper_n_plus_i(curr_lower_upper, alpha))
527 |         PIs = pd.DataFrame(PIs, columns=['lower', 'upper'])
528 |         self.PIs = PIs
529 |         if 'Solar' in data_name:
530 |             PIs['lower'] = np.maximum(PIs['lower'], 0)
531 |         coverage, width = utils.ave_cov_width(PIs, self.Y_predict)
532 |         results.loc[len(results)] = [itrial, data_name,
533 |                                      self.regressor.__class__.__name__, 'QOOB', self.X_train.shape[0], coverage, width]
534 |         if get_plots:
535 |             return [PIs, results]
536 |         else:
537 |             return results
538 |     # QOOB helpers
539 | 
540 |     def get_rXY(self, Ytrain_i, quantile_lower, quantile_upper):
541 |         # Get r_i(X_i,Y_i) as in Eq. (2) of QOOB
542 |         if Ytrain_i < quantile_lower:
543 |             return quantile_lower - Ytrain_i
544 |         elif Ytrain_i > quantile_upper:
545 |             return Ytrain_i - quantile_upper  # There was a small error here
546 |         else:
547 |             return 0
548 | 
549 |     # AdaptCI helpers
550 |     def get_Ei(self, Ytrain_i, quantile_lower, quantile_upper):
551 |         return np.max([quantile_lower - Ytrain_i, Ytrain_i - quantile_upper])
552 | 
553 |     def get_lower_upper_n_plus_i(self, curr_lower_upper, alpha):
554 |         # This implements Algorithm 1 of QOOB
555 |         # See https://github.com/AIgen/QOOB/blob/master/MATLAB/methods/QOOB_interval.m for matlab implementation
556 |         n2 = len(curr_lower_upper)
557 |         n = int(n2 / 2)
558 |         S_ls = np.r_[np.repeat(1, n), np.repeat(0, n)]
559 |         idx_sort = np.argsort(curr_lower_upper)  # smallest to larget
560 |         S_ls = S_ls[idx_sort]
561 |         curr_lower_upper = curr_lower_upper[idx_sort]
562 |         count = 0
563 |         lower_i = np.inf
564 |         upper_i = -np.inf
565 |         threshold = alpha * (n + 1) - 1
566 |         for i in range(n2):
567 |             if S_ls[i] == 1:
568 |                 count += 1
569 |                 if count > threshold and count - 1 <= threshold and lower_i == np.inf:
570 |                     lower_i = curr_lower_upper[i]
571 |                     # print(f'QOOB lower_end {lower_i}')
572 |             else:
573 |                 if count > threshold and count - 1 <= threshold and upper_i == -np.inf:
574 |                     upper_i = curr_lower_upper[i]
575 |                     # print(f'QOOB upper_end {upper_i}')
576 |                 count -= 1
577 |         return [lower_i, upper_i]
578 | 
579 |     ##############################
580 |     # Next on AdaptiveCI
581 | 
582 |     def compute_AdaptiveCI_intervals(self, data_name, itrial, l, alpha=0.1, get_plots=False):
583 |         results = pd.DataFrame(columns=['itrial', 'dataname', 'muh_fun',
584 |                                         'method', 'train_size', 'coverage', 'width'])
585 |         n = len(self.X_train)
586 |         proper_train = np.arange(l)
587 |         X_train = self.X_train[proper_train, :]
588 |         Y_train = self.Y_train[proper_train]
589 |         X_calibrate = np.delete(self.X_train, proper_train, axis=0)
590 |         Y_calibrate = np.delete(self.Y_train, proper_train)
591 |         # NOTE: below works when the model can takes in MULTIPLE quantiles together (e.g., the RangerForest)
592 |         model = self.regressor
593 |         model = model.fit(X_train, Y_train)
594 |         quantile_pred = model.predict_quantiles(
595 |             np.r_[X_calibrate, self.X_predict], quantiles=[alpha / 2, 1 - alpha / 2])
596 |         # NOTE: below works for sklearn linear quantile: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.QuantileRegressor.html#sklearn.linear_model.QuantileRegressor
597 |         # # In particular, it is much slower than the quantile RF with similar results
598 |         # model_l, model_u = self.regressor
599 |         # qpred_l, qpred_u = model_l.fit(X_train, Y_train).predict(np.r_[X_calibrate, self.X_predict]), model_u.fit(
600 |         #     X_train, Y_train).predict(np.r_[X_calibrate, self.X_predict])
601 |         # quantile_pred = np.c_[qpred_l, qpred_u]
602 |         self.quantile_pred = quantile_pred
603 |         Dcal_pred = quantile_pred[:n - l]
604 |         Test_pred = quantile_pred[n - l:]
605 |         # TODO: I guess I can use the QOOB idea, by using "get_rXY"
606 |         Dcal_scores = np.array([self.get_Ei(Ycal, quantile_lower, quantile_upper) for Ycal,
607 |                                 quantile_lower, quantile_upper in zip(Y_calibrate, Dcal_pred[:, 0], Dcal_pred[:, 1])])
608 |         self.Escore = Dcal_scores
609 |         # Sequentially get the intervals with adaptive alpha
610 |         alpha_t = alpha
611 |         errs = []
612 |         gamma = 0.005
613 |         method = 'simple'  # 'simple' or 'complex'
614 |         PIs = []
615 |         self.alphas = [alpha_t]
616 |         for t, preds in enumerate(Test_pred):
617 |             lower_pred, upper_pred = preds
618 |             width = np.percentile(Dcal_scores, 100 * (1 - alpha_t))
619 |             # print(f'At test time {t}')
620 |             # print(f'alpha={alpha_t} & width={width}')
621 |             lower_t, upper_t = lower_pred - width, upper_pred + width
622 |             PIs.append([lower_t, upper_t])
623 |             # Check coverage and update alpha_t
624 |             Y_t = self.Y_predict[t]
625 |             err = 1 if Y_t < lower_t or Y_t > upper_t else 0
626 |             errs.append(err)
627 |             alpha_t = utils.adjust_alpha_t(alpha_t, alpha, errs, gamma, method)
628 |             if alpha_t > 1:
629 |                 alpha_t = 1
630 |             if alpha_t < 0:
631 |                 alpha_t = 0
632 |             self.alphas.append(alpha_t)
633 |         PIs = pd.DataFrame(PIs, columns=['lower', 'upper'])
634 |         if 'Solar' in data_name:
635 |             PIs['lower'] = np.maximum(PIs['lower'], 0)
636 |         self.errs = errs
637 |         self.PIs = PIs
638 |         coverage, width = utils.ave_cov_width(PIs, self.Y_predict)
639 |         results.loc[len(results)] = [itrial, data_name,
640 |                                      self.regressor.__class__.__name__, 'Adaptive_CI', self.X_train.shape[0], coverage, width]
641 |         if get_plots:
642 |             return [PIs, results]
643 |         else:
644 |             return results
645 |         # TODO: I guess I can use the QOOB idea, by using "get_rXY"
646 |         Dcal_scores = np.array([self.get_Ei(Ycal, quantile_lower, quantile_upper) for Ycal,
647 |                                 quantile_lower, quantile_upper in zip(Y_calibrate, Dcal_pred[:, 0], Dcal_pred[:, 1])])
648 |         self.Escore = Dcal_scores
649 |         # Sequentially get the intervals with adaptive alpha
650 |         alpha_t = alpha
651 |         errs = []
652 |         gamma = 0.005
653 |         method = 'simple'  # 'simple' or 'complex'
654 |         PIs = []
655 |         self.alphas = [alpha_t]
656 |         for t, preds in enumerate(Test_pred):
657 |             lower_pred, upper_pred = preds
658 |             width = np.percentile(Dcal_scores, 100 * (1 - alpha_t))
659 |             # print(f'At test time {t}')
660 |             # print(f'alpha={alpha_t} & width={width}')
661 |             lower_t, upper_t = lower_pred - width, upper_pred + width
662 |             PIs.append([lower_t, upper_t])
663 |             # Check coverage and update alpha_t
664 |             Y_t = self.Y_predict[t]
665 |             err = 1 if Y_t < lower_t or Y_t > upper_t else 0
666 |             errs.append(err)
667 |             alpha_t = utils.adjust_alpha_t(alpha_t, alpha, errs, gamma, method)
668 |             if alpha_t > 1:
669 |                 alpha_t = 1
670 |             if alpha_t < 0:
671 |                 alpha_t = 0
672 |             self.alphas.append(alpha_t)
673 |         PIs = pd.DataFrame(PIs, columns=['lower', 'upper'])
674 |         if 'Solar' in data_name:
675 |             PIs['lower'] = np.maximum(PIs['lower'], 0)
676 |         self.errs = errs
677 |         self.PIs = PIs
678 |         coverage, width = utils.ave_cov_width(PIs, self.Y_predict)
679 |         results.loc[len(results)] = [itrial, data_name,
680 |                                      self.regressor.__class__.__name__, 'Adaptive_CI', self.X_train.shape[0], coverage, width]
681 |         if get_plots:
682 |             return [PIs, results]
683 |         else:
684 |             return results
685 | 
686 | 
687 | def NEX_CP(X, Y, x, alpha, weights=[], tags=[], seed=1103):
688 |     '''
689 |     # Barber et al. 2022: Nex-CP
690 |     # weights are used for computing quantiles for the prediction interval
691 |     # tags are used as weights in weighted least squares regression
692 |     '''
693 |     n = len(Y)
694 | 
695 |     if(len(tags) == 0):
696 |         tags = np.ones(n + 1)
697 | 
698 |     if(len(weights) == 0):
699 |         weights = np.ones(n + 1)
700 |     if(len(weights) == n):
701 |         weights = np.r_[weights, 1]
702 |     weights = weights / np.sum(weights)
703 |     np.random.seed(seed)
704 |     # randomly permute one weight for the regression
705 |     random_ind = int(np.where(np.random.multinomial(1, weights, 1))[1])
706 |     tags[np.c_[random_ind, n]] = tags[np.c_[n, random_ind]]
707 | 
708 |     XtX = (X.T * tags[:-1]).dot(X) + np.outer(x, x) * tags[-1]
709 |     a = Y - X.dot(np.linalg.solve(XtX, (X.T * tags[:-1]).dot(Y)))
710 |     b = -X.dot(np.linalg.solve(XtX, x)) * tags[-1]
711 |     a1 = -x.T.dot(np.linalg.solve(XtX, (X.T * tags[:-1]).dot(Y)))
712 |     b1 = 1 - x.T.dot(np.linalg.solve(XtX, x)) * tags[-1]
713 |     # if we run weighted least squares on (X[1,],Y[1]),...(X[n,],Y[n]),(x,y)
714 |     # then a + b*y = residuals of data points 1,..,n
715 |     # and a1 + b1*y = residual of data point n+1
716 | 
717 |     y_knots = np.sort(
718 |         np.unique(np.r_[((a - a1) / (b1 - b))[b1 - b != 0], ((-a - a1) / (b1 + b))[b1 + b != 0]]))
719 |     y_inds_keep = np.where(((np.abs(np.outer(a1 + b1 * y_knots, np.ones(n)))
720 |                              > np.abs(np.outer(np.ones(len(y_knots)), a) + np.outer(y_knots, b))) *
721 |                             weights[:-1]).sum(1) <= 1 - alpha)[0]
722 |     y_PI = np.array([y_knots[y_inds_keep.min()], y_knots[y_inds_keep.max()]])
723 |     if(weights[:-1].sum() <= 1 - alpha):
724 |         y_PI = np.array([-np.inf, np.inf])
725 |     return y_PI
726 | 
727 | #### Testing functions based on methods above #####
728 | 
729 | 
730 | wind_loc = 0  # Can change this to try wind prediction on different locations
731 | 
732 | 
733 | def test_EnbPI_or_SPCI(main_condition, results_EnbPI_SPCI, itrial=0):
734 |     '''
735 |     Arguments:
736 | 
737 |         main_condition: Contain these three below:
738 |             bool. simulation:  True use simulated data. False use solar
739 |                 simul_type: int. 1 = simple state-space. 2 = non-statioanry. 3 = heteroskedastic
740 |                 The latter 2 follows from case 3 in paper
741 |             bool. use_SPCI: True use `quantile_regr`. False use empirical quatile
742 |             str. quantile_regr:  Which quantile regression to fit residuals (e.g., "RF", "LR")
743 | 
744 |     Other (not arguments)
745 | 
746 |         fit_func: None or sklearn module with methods `.fit` & `.predict`. If None, use MLP above
747 | 
748 |         fit_sigmaX: bool. True if to fit heteroskedastic errors. ONLY activated if fit_func is NONE (i.e. MLP), because errors are unobserved so `.fit()` does not work
749 | 
750 |         smallT: bool. True if empirical quantile uses not ALL T residual in the past to get quantile (should be tuned as sometimes longer memory causes poor coverage)
751 |             past_window: int. If smallT True, EnbPI uses `past_window` most residuals to get width. FOR quantile_regr of residuals, it determines the dimension of the "feature" that predict new quantile of residuals autoregressively
752 | 
753 |     Results:
754 |         dict: contains dictionary of coverage and width under different training fraction (fix alpha) under various argument combinations
755 |     '''
756 |     simulation, use_SPCI, quantile_regr, use_NeuralProphet = main_condition
757 |     non_stat_solar, save_dict_rolling = results_EnbPI_SPCI.other_conditions
758 |     train_ls, alpha = results_EnbPI_SPCI.train_ls, results_EnbPI_SPCI.alpha
759 |     univariate, filter_zero = results_EnbPI_SPCI.data_conditions
760 |     result_cov, result_width = [], []
761 |     for train_frac in train_ls:
762 |         print('########################################')
763 |         print(f'Train frac at {train_frac}')
764 |         ''' Get Data '''
765 |         if simulation:
766 |             simul_type = results_EnbPI_SPCI.simul_type  # 1, 2, 3
767 |             fit_sigmaX = True if simul_type == 3 else False  # If we fit variance given X_t
768 |             simul_name_dict = {1: 'simulation_state_space',
769 |                                2: 'simulate_nonstationary', 3: 'simulate_heteroskedastic'}
770 |             data_name = simul_name_dict[simul_type]
771 |             simul_loader = data.simulate_data_loader()
772 |             Data_dict = simul_loader.get_simul_data(simul_type)
773 |             X_full, Y_full = Data_dict['X'].to(
774 |                 device), Data_dict['Y'].to(device)
775 |             B = 20
776 |             past_window = 500
777 |             fit_func = None
778 |             # if simul_type == 3:
779 |             #     fit_func = None  # It is MLP above
780 |             # else:
781 |             #     fit_func = RandomForestRegressor(n_estimators=10, criterion='mse',
782 |             #                                      bootstrap=False, n_jobs=-1, random_state=1103+itrial)
783 |         else:
784 |             data_name = results_EnbPI_SPCI.data_name
785 |             dloader = data.real_data_loader()
786 |             solar_args = [univariate, filter_zero, non_stat_solar]
787 |             wind_args = [wind_loc]
788 |             X_full, Y_full = dloader.get_data(data_name, solar_args, wind_args)
789 |             RF_seed = 1103+itrial
790 |             if data_name == 'solar':
791 |                 fit_func = RandomForestRegressor(n_estimators=10, criterion='mse',
792 |                                                  bootstrap=False, n_jobs=-1, random_state=RF_seed)
793 |                 past_window = 200 if use_SPCI else 300
794 |             if data_name == 'electric':
795 |                 fit_func = RandomForestRegressor(n_estimators=10, max_depth=1, criterion='mse',
796 |                                                  bootstrap=False, n_jobs=-1, random_state=RF_seed)
797 |                 past_window = 300
798 |             if data_name == 'wind':
799 |                 fit_func = RandomForestRegressor(n_estimators=10, max_depth=1, criterion='mse',
800 |                                                  bootstrap=False, n_jobs=-1, random_state=RF_seed)
801 |                 past_window = 300
802 |             Y_full, X_full = torch.from_numpy(Y_full).float().to(
803 |                 device), torch.from_numpy(X_full).float().to(device)
804 |             fit_sigmaX = False
805 |             B = 25
806 |         N = int(X_full.shape[0] * train_frac)
807 |         X_train, X_predict, Y_train, Y_predict = X_full[:
808 |                                                         N], X_full[N:], Y_full[:N], Y_full[N:]
809 | 
810 |         ''' Train '''
811 |         EnbPI = SPCI_and_EnbPI(
812 |             X_train, X_predict, Y_train, Y_predict, fit_func=fit_func)
813 |         EnbPI.use_NeuralProphet = use_NeuralProphet
814 |         stride = results_EnbPI_SPCI.stride
815 |         EnbPI.fit_bootstrap_models_online_multistep(
816 |             B, fit_sigmaX=fit_sigmaX, stride=stride)
817 |         # Under cond quantile, we are ALREADY using the last window for prediction so smallT is FALSE, instead, we use ALL residuals in the past (in a sliding window fashion) for training the quantile regressor
818 |         smallT = not use_SPCI
819 |         EnbPI.compute_PIs_Ensemble_online(
820 |             alpha, smallT=smallT, past_window=past_window, use_SPCI=use_SPCI,
821 |             quantile_regr=quantile_regr, stride=stride)
822 |         results = EnbPI.get_results(alpha, data_name, itrial)
823 | 
824 |         ''' Save results '''
825 |         result_cov.append(results['coverage'].item())
826 |         result_width.append(results['width'].item())
827 |         PI = EnbPI.PIs_Ensemble
828 |         if use_SPCI:
829 |             if use_NeuralProphet:
830 |                 results_EnbPI_SPCI.PIs_SPCINeuralProphet = PI
831 |             else:
832 |                 results_EnbPI_SPCI.PIs_SPCI = PI
833 |         else:
834 |             results_EnbPI_SPCI.PIs_EnbPI = PI
835 |         Ytest = EnbPI.Y_predict.cpu().detach().numpy()
836 |         results_EnbPI_SPCI.dict_rolling[f'Itrial{itrial}'] = PI
837 |         name = 'SPCI' if use_SPCI else 'EnbPI'
838 |         if use_NeuralProphet:
839 |             name = 'SPCI-NeuralProphet'
840 |         if save_dict_rolling:
841 |             with open(f'{name}_{data_name}_train_frac_{np.round(train_frac,2)}_alpha_{alpha}.p', 'wb') as fp:
842 |                 pickle.dump(results_EnbPI_SPCI.dict_rolling, fp,
843 |                             protocol=pickle.HIGHEST_PROTOCOL)
844 |         if simulation:
845 |             # # Examine recovery of F and Sigma
846 |             fig, ax = plt.subplots(2, 2, figsize=(12, 8))
847 |             ax[0, 0].plot(Data_dict['f(X)'])
848 |             Y_t_hat = EnbPI.Ensemble_pred_interval_centers
849 |             ax[0, 1].plot(Y_t_hat)
850 |             ax[1, 0].plot(Data_dict['Eps'])
851 |             ax[1, 1].plot(EnbPI.Ensemble_online_resid)
852 |             titles = [r'True $f(X)$', r'Est $f(X)$',
853 |                       r'True $\epsilon$', r'Est $\epsilon$']
854 |             fig.tight_layout()
855 |             for i, ax_i in enumerate(ax.flatten()):
856 |                 ax_i.set_title(titles[i])
857 |             fig.tight_layout()
858 |             plt.show()
859 |             plt.close()
860 |     results_EnbPI_SPCI.dict_full[name] = np.vstack(
861 |         [result_cov, result_width])
862 |     results_EnbPI_SPCI.Ytest = Ytest
863 |     results_EnbPI_SPCI.train_size = N
864 |     results_EnbPI_SPCI.data_name = data_name
865 |     utils.dict_to_latex(results_EnbPI_SPCI.dict_full, train_ls)
866 |     return results_EnbPI_SPCI
867 | 
868 | 
869 | def test_adaptive_CI(results_Adapt_CI, itrial=0):
870 |     train_ls, alpha = results_Adapt_CI.train_ls, results_Adapt_CI.alpha
871 |     non_stat_solar, save_dict_rolling = results_Adapt_CI.other_conditions
872 |     univariate, filter_zero = results_Adapt_CI.data_conditions
873 |     # NOTE: the variance of this method seems high, and I often need to tune a LOT to avoid yielding very very high coverage.
874 |     data_name = results_Adapt_CI.data_name
875 |     cov_ls, width_ls = [], []
876 |     for train_frac in train_ls:
877 |         # As it is split conformal, the result can be random, so we repeat over seed
878 |         seeds = [524, 1103, 1111, 1214, 1228]
879 |         seeds = [seed+itrial+1231 for seed in seeds]
880 |         cov_tmp_ls, width_tmp_ls = [], []
881 |         print('########################################')
882 |         print(f'Train frac at {train_frac} over {len(seeds)} seeds')
883 |         PI_ls = []
884 |         for seed in seeds:
885 |             data_name = results_Adapt_CI.data_name
886 |             dloader = data.real_data_loader()
887 |             solar_args = [univariate, filter_zero, non_stat_solar]
888 |             wind_args = [wind_loc]
889 |             X_full, Y_full = dloader.get_data(data_name, solar_args, wind_args)
890 |             N = int(X_full.shape[0] * train_frac)
891 |             X_train, X_predict, Y_train, Y_predict = X_full[:
892 |                                                             N], X_full[N:], Y_full[:N], Y_full[N:]
893 |             if non_stat_solar:
894 |                 # More complex yields wider intervals and more conservative coverage
895 |                 fit_func = RangerForestRegressor(
896 |                     n_estimators=5, quantiles=True, seed=seed)
897 |             else:
898 |                 fit_func = RangerForestRegressor(
899 |                     n_estimators=10, quantiles=True, seed=seed)
900 |             PI_test_adaptive = QOOB_or_adaptive_CI(
901 |                 fit_func, X_train, X_predict, Y_train, Y_predict)
902 |             PI_test_adaptive.compute_AdaptiveCI_intervals(
903 |                 data_name, 0, l=int(0.75 * X_train.shape[0]),
904 |                 alpha=alpha)
905 |             PIs_AdaptiveCI = PI_test_adaptive.PIs
906 |             PI_ls.append(PIs_AdaptiveCI)
907 |             Ytest = PI_test_adaptive.Y_predict
908 |             coverage = ((np.array(PIs_AdaptiveCI['lower']) <= Ytest)
909 |                         & (np.array(PIs_AdaptiveCI['upper']) >= Ytest))
910 |             width = (
911 |                 (np.array(PIs_AdaptiveCI['upper']) - np.array(PIs_AdaptiveCI['lower'])))
912 |             cov_tmp_ls.append(coverage)
913 |             width_tmp_ls.append(width)
914 |         lowers = np.mean([a['lower'] for a in PI_ls], axis=0)
915 |         uppers = np.mean([a['upper'] for a in PI_ls], axis=0)
916 |         PIs_AdaptiveCI = pd.DataFrame(
917 |             np.c_[lowers, uppers], columns=['lower', 'upper'])
918 |         coverage = np.vstack(cov_tmp_ls).mean(axis=0)
919 |         width = np.vstack(width_tmp_ls).mean(axis=0)
920 |         results_Adapt_CI.PIs_AdaptiveCI = PIs_AdaptiveCI
921 |         results_Adapt_CI.dict_rolling[f'Itrial{itrial}'] = PIs_AdaptiveCI
922 |         if save_dict_rolling:
923 |             with open(f'AdaptiveCI_{data_name}_train_frac_{np.round(train_frac,2)}_alpha_{alpha}.p', 'wb') as fp:
924 |                 pickle.dump(results_Adapt_CI.dict_rolling, fp,
925 |                             protocol=pickle.HIGHEST_PROTOCOL)
926 |         cov_ls.append(np.mean(coverage))
927 |         width_ls.append(np.mean(width))
928 |     results_Adapt_CI.dict_full['AdaptiveCI'] = np.vstack(
929 |         [cov_ls, width_ls])
930 |     utils.dict_to_latex(results_Adapt_CI.dict_full, train_ls)
931 |     return results_Adapt_CI
932 | 
933 | 
934 | def test_NEX_CP(results_NEX_CP, itrial=0):
935 |     train_ls, alpha = results_NEX_CP.train_ls, results_NEX_CP.alpha
936 |     non_stat_solar, save_dict_rolling = results_NEX_CP.other_conditions
937 |     univariate, filter_zero = results_NEX_CP.data_conditions
938 |     cov, width = [], []
939 |     data_name = results_NEX_CP.data_name
940 |     dloader = data.real_data_loader()
941 |     solar_args = [univariate, filter_zero, non_stat_solar]
942 |     wind_args = [wind_loc]
943 |     X_full, Y_full = dloader.get_data(data_name, solar_args, wind_args)
944 |     N = len(X_full)
945 |     for train_frac in train_ls:
946 |         train_size = int(train_frac * N)
947 |         PI_nexCP_WLS = np.zeros((N, 2))
948 |         for n in np.arange(train_size, N):
949 |             # weights and tags (parameters for new methods)
950 |             rho = 0.99
951 |             rho_LS = 0.99
952 |             weights = rho**(np.arange(n, 0, -1))
953 |             tags = rho_LS**(np.arange(n, -1, -1))
954 |             PI_nexCP_WLS[n, :] = NEX_CP(X_full[:n, :], Y_full[:n], X_full[n, :], alpha,
955 |                                         weights=weights, tags=tags, seed=1103+itrial)
956 |             inc = int((N - train_size) / 20)
957 |             if (n - train_size) % inc == 0:
958 |                 print(
959 |                     f'NEX-CP WLS width at {n-train_size} is: {PI_nexCP_WLS[n,1] - PI_nexCP_WLS[n,0]}')
960 |         cov_nexCP_WLS = (PI_nexCP_WLS[train_size:, 0] <= Y_full[train_size:N]) *\
961 |             (PI_nexCP_WLS[train_size:, 1] >= Y_full[train_size:N])
962 |         PI_width_nexCP_WLS = PI_nexCP_WLS[train_size:,
963 |                                           1] - PI_nexCP_WLS[train_size:, 0]
964 |         PI_nexCP_WLS = PI_nexCP_WLS[train_size:]
965 |         PI_nexCP_WLS = pd.DataFrame(PI_nexCP_WLS, columns=['lower', 'upper'])
966 |         cov.append(np.mean(cov_nexCP_WLS))
967 |         width.append(np.mean(PI_width_nexCP_WLS))
968 |         print(
969 |             f'At {train_frac} tot data \n cov: {cov[-1]} & width: {width[-1]}')
970 |         # Rolling coverage and width
971 |         # cov_moving = utils.rolling_avg(cov_nexCP_WLS)
972 |         # width_moving = utils.rolling_avg(PI_width_nexCP_WLS)
973 |         results_NEX_CP.PI_nexCP_WLS = PI_nexCP_WLS
974 |         results_NEX_CP.dict_rolling[f'Itrial{itrial}'] = PI_nexCP_WLS
975 |         if save_dict_rolling:
976 |             with open(f'NEXCP_{data_name}_train_frac_{np.round(train_frac,2)}_alpha_{alpha}.p', 'wb') as fp:
977 |                 pickle.dump(results_NEX_CP.dict_rolling, fp,
978 |                             protocol=pickle.HIGHEST_PROTOCOL)
979 |     results_NEX_CP.dict_full['NEXCP'] = np.vstack([cov, width])
980 |     utils.dict_to_latex(results_NEX_CP.dict_full, train_ls)
981 |     return results_NEX_CP
982 | 
983 | ################################################################
984 | ################################################################
985 | 
986 | ################################################################
987 | ################################################################
988 | ################################################################
989 | ################################################################
990 | ################################################################
991 | ################################################################
992 | ################################################################
993 | ################################################################
994 | ################################################################
995 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
  1 | from scipy.sparse import random
  2 | import pandas as pd
  3 | import numpy as np
  4 | import warnings
  5 | import torch
  6 | import pickle
  7 | import utils_EnbPI
  8 | import matplotlib.pyplot as plt
  9 | from sklearn.preprocessing import OneHotEncoder
 10 | import os
 11 | warnings.filterwarnings("ignore")
 12 | 
 13 | 
 14 | class real_data_loader():
 15 |     def __init__(self):
 16 |         pass
 17 | 
 18 |     def get_data(self, data_name, solar_args=None, wind_args=None):
 19 |         if data_name == 'solar':
 20 |             # Get solar data WITH time t as covariate
 21 |             univariate, filter_zero, non_stat_solar = solar_args
 22 |             Y_full, X_full_old, X_full_nonstat = self.get_non_stationary_solar(
 23 |                 univariate=univariate, filter_zero=filter_zero)
 24 |             if non_stat_solar:
 25 |                 X_full = X_full_nonstat
 26 |             else:
 27 |                 X_full = X_full_old
 28 |         if data_name == 'electric':
 29 |             X_full, Y_full = self.electric_dataset()
 30 |         if data_name == 'wind':
 31 |             wind_loc = wind_args[0]
 32 |             X_full, Y_full = self.get_wind_real(location=wind_loc)
 33 |         return X_full, Y_full
 34 | 
 35 |     def get_wind_real(self, location=0):
 36 |         # Stationary real
 37 |         rootpath = 'Data/data_k30'
 38 |         wind = np.load(os.path.join(rootpath, 'sample_wind.npy'))
 39 |         # len-T vector, denoting wind speed
 40 |         speeds = wind[:, location, 1]
 41 |         data_y = speeds
 42 |         print(f'Shape of full data at location {location}')
 43 |         print(data_y.shape)
 44 |         data_x = rolling(data_y, window=10)
 45 |         N = len(data_x)
 46 |         return data_x, data_y[-N:]
 47 | 
 48 |     def get_non_stationary_solar(self, univariate=True, max_N=2000, filter_zero=False):
 49 |         # Stationary real
 50 |         data = utils_EnbPI.read_data(3, 'Data/Solar_Atl_data.csv', 10000)
 51 |         data_y = data['DHI'].to_numpy()  # Convert to numpy
 52 |         if univariate:
 53 |             # Univariate feature
 54 |             data_x_old = rolling(data_y, window=20)
 55 |         else:
 56 |             # Multivariate feature
 57 |             data_x_old = data.loc[:, data.columns
 58 |                                   != 'DHI'].to_numpy()  # Convert to numpy
 59 |         # Add one-hot-encoded DAY features using // (or hour features using %)
 60 |         hours = int(data_y.shape[0]/365)
 61 |         N = data_x_old.shape[0]
 62 |         day_feature = False
 63 |         if day_feature:
 64 |             # Day one-hot 0,...,364
 65 |             one_hot_feature = (np.arange(N) // hours).reshape(-1, 1)
 66 |         else:
 67 |             # Hourly one-hot 0,...,23
 68 |             one_hot_feature = (np.arange(N) % hours).reshape(-1, 1)
 69 |         one_hot_feature = OneHotEncoder().fit_transform(one_hot_feature).toarray()
 70 |         data_x_new = np.c_[one_hot_feature, data_x_old]
 71 |         data_y, data_x_old, data_x_new = data_y[-max_N:
 72 |                                                 ], data_x_old[-max_N:], data_x_new[-max_N:]
 73 |         if filter_zero:
 74 |             nonzero_idx = data_y > 0.2
 75 |             data_y, data_x_old, data_x_new = data_y[nonzero_idx], data_x_old[nonzero_idx], data_x_new[nonzero_idx]
 76 |         return data_y, data_x_old, data_x_new
 77 | 
 78 |     def electric_dataset(self):
 79 |         # ELEC2 data set
 80 |         # downloaded from https://www.kaggle.com/yashsharan/the-elec2-dataset
 81 |         data = pd.read_csv(f'Data/electricity-normalized.csv')
 82 |         col_names = data.columns
 83 |         data = data.to_numpy()
 84 | 
 85 |         # remove the first stretch of time where 'transfer' does not vary
 86 |         data = data[17760:]
 87 | 
 88 |         # set up variables for the task (predicting 'transfer')
 89 |         covariate_col = ['nswprice', 'nswdemand', 'vicprice', 'vicdemand']
 90 |         response_col = 'transfer'
 91 |         # keep data points for 9:00am - 12:00pm
 92 |         keep_rows = np.where((data[:, 2] > data[17, 2])
 93 |                              & (data[:, 2] < data[24, 2]))[0]
 94 | 
 95 |         X = data[keep_rows][:, np.where(
 96 |             [t in covariate_col for t in col_names])[0]]
 97 |         Y = data[keep_rows][:, np.where(
 98 |             col_names == response_col)[0]].flatten()
 99 |         X = X.astype('float64')
100 |         Y = Y.astype('float64')
101 | 
102 |         return X, Y
103 | 
104 | 
105 | class simulate_data_loader():
106 |     def __init__(self):
107 |         pass
108 | 
109 |     def get_simul_data(self, simul_type):
110 |         if simul_type == 1:
111 |             Data_dict = self.simulation_state_space(
112 |                 num_pts=2000, alpha=0.9, beta=0.9)
113 |         if simul_type == 2:
114 |             Data_dict = self.simulation_non_stationary()
115 |             Data_dict['X'] = torch.from_numpy(Data_dict['X']).float()
116 |             Data_dict['Y'] = torch.from_numpy(Data_dict['Y']).float()
117 |         if simul_type == 3:
118 |             # NOTE: somehow for this case, currently RF quantile regression does not yield shorter interval. We may tune past window to get different results (like decrease it to 250) if need
119 |             Data_dict = self.simultaion_heteroskedastic()
120 |         return Data_dict
121 | 
122 |     def simulation_state_space(self, num_pts, alpha, beta):
123 |         '''
124 |             Y_t = alpha*Y_{t-1}+\eps_t
125 |             \eps_t = beta*\eps_{t-1}+v_t
126 |             v_t ~ N(0,1)
127 |             So X_t = Y_{t-1}, f(X_t) = alpha*X_t
128 |             If t = 0:
129 |                 X_t = 0, Y_t=\eps_t = v_t
130 |         '''
131 |         v0 = torch.randn(1)
132 |         Y, X, fX, eps = [v0], [torch.zeros(1)], [torch.zeros(1)], [v0]
133 |         scale = torch.sqrt(torch.ones(1)*0.1)
134 |         for _ in range(num_pts-1):
135 |             vt = torch.randn(1)*scale
136 |             X.append(Y[-1])
137 |             fX.append(alpha*Y[-1])
138 |             eps.append(beta*eps[-1]+vt)
139 |             Y.append(fX[-1]+eps[-1])
140 |         Y, X, fX, eps = torch.hstack(Y), torch.vstack(
141 |             X), torch.vstack(fX), torch.hstack(eps)
142 |         return {'Y': Y.float(), 'X': X.float(), 'f(X)': fX, 'Eps': eps}
143 | 
144 |     def simulation_non_stationary(self):
145 |         with open(f'Data_nochangepts_nonlinear.p', 'rb') as fp:
146 |             Data_dc_old = pickle.load(fp)
147 |         fXold = Data_dc_old['f(X)']
148 |         gX = non_stationarity(len(fXold))
149 |         fXnew = gX*fXold
150 |         for _ in ['quick_plot']:
151 |             fig, ax = plt.subplots(figsize=(12, 3))
152 |             ax.plot(fXold, label='old f(X)')
153 |             ax.plot(fXnew, label='new f(X)')
154 |             ax.legend()
155 |         Data_dc_new = {}
156 |         for key in Data_dc_old.keys():
157 |             if key == 'Y':
158 |                 continue
159 |             if key == 'X':
160 |                 Data_dc_new[key] = np.c_[
161 |                     np.arange(Data_dc_old[key].shape[0]) % 12, Data_dc_old[key]]
162 |             elif key == 'f(X)':
163 |                 Data_dc_new[key] = fXnew
164 |             else:
165 |                 Data_dc_new[key] = Data_dc_old[key]
166 |         Data_dc_new['Y'] = Data_dc_new['f(X)']+Data_dc_new['Eps']
167 |         Data_dc_old['Y'] = Data_dc_new['Y']
168 |         Data_dc_old['f(X)'] = Data_dc_new['f(X)']
169 |         # return Data_dc_old, Data_dc_new
170 |         return Data_dc_new
171 | 
172 |     def simultaion_heteroskedastic(self):
173 |         ''' Note, the difference from earlier case 3 in paper is that
174 |             1) I reduce d from 100 to 20,
175 |             2) I let X to be different, so sigmaX differs
176 |                 The sigmaX is a linear model so this effect in X is immediate
177 |             I keep the same AR(1) eps & everything else.'''
178 |         def True_mod_nonlinear_pre(feature):
179 |             '''
180 |             Input:
181 |             Output:
182 |             Description:
183 |                 f(feature): R^d -> R
184 |             '''
185 |             # Attempt 3 Nonlinear model:
186 |             # f(X)=sqrt(1+(beta^TX)+(beta^TX)^2+(beta^TX)^3), where 1 is added in case beta^TX is zero
187 |             d = len(feature)
188 |             np.random.seed(0)
189 |             # e.g. 20% of the entries are NON-missing
190 |             beta1 = random(1, d, density=0.2).A
191 |             betaX = np.abs(beta1.dot(feature))
192 |             return (betaX + betaX**2 + betaX**3)**(1/4)
193 |         Tot, d = 1000, 20
194 |         Fmap = True_mod_nonlinear_pre
195 |         # Multiply each random feature by exponential component, which is repeated every Tot/365 elements
196 |         mult = np.exp(0.01*np.mod(np.arange(Tot), 100))
197 |         X = np.random.rand(Tot, d)*mult.reshape(-1, 1)
198 |         fX = np.array([Fmap(x) for x in X]).flatten()
199 |         beta_Sigma = np.ones(d)
200 |         sigmaX = np.maximum(X.dot(beta_Sigma).T, 0)
201 |         with open(f'Data_nochangepts_nonlinear.p', 'rb') as fp:
202 |             Data_dc = pickle.load(fp)
203 |         eps = Data_dc['Eps']
204 |         Y = fX + sigmaX*eps[:Tot]
205 |         np.random.seed(1103)
206 |         idx = np.random.choice(Tot, Tot, replace=False)
207 |         Y, X, fX, sigmaX, eps = Y[idx], X[idx], fX[idx], sigmaX[idx], eps[idx]
208 |         return {'Y': torch.from_numpy(Y).float(), 'X': torch.from_numpy(X).float(), 'f(X)': fX, 'sigma(X)': sigmaX, 'Eps': eps}
209 | 
210 | 
211 | ''' Data Helpers '''
212 | 
213 | 
214 | def non_stationarity(N):
215 |     '''
216 |         Compute g(t)=t'*sin(2*pi*t'/12), which is multiplicative on top of f(X), where
217 |         t' = t mod 12 (for seaonality)
218 |     '''
219 |     cycle = 12
220 |     trange = np.arange(N)
221 |     tprime = trange % cycle
222 |     term2 = np.sin(2*np.pi*tprime/cycle)
223 |     return tprime*term2
224 | 
225 | 
226 | def rolling(a, window):
227 |     shape = (a.size - window + 1, window)
228 |     strides = (a.itemsize, a.itemsize)
229 |     return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
230 | 
231 | ###
232 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | statsmodels
2 | sklearn_quantile
3 | skranger


--------------------------------------------------------------------------------
/utils_EnbPI.py:
--------------------------------------------------------------------------------
   1 | from scipy.stats import skewnorm
   2 | from scipy.linalg import norm
   3 | from sklearn.linear_model import RidgeCV
   4 | from sklearn.ensemble import RandomForestRegressor
   5 | import seaborn as sns
   6 | import pickle
   7 | import matplotlib.pyplot as plt
   8 | import itertools
   9 | import pandas as pd
  10 | import numpy as np
  11 | import math
  12 | from scipy.sparse import random
  13 | import PI_class_EnbPI as EnbPI  # For me
  14 | import matplotlib.cm as cm
  15 | # from keras.layers import LSTM, Dense, Dropout
  16 | # from keras.models import Sequential
  17 | # from tensorflow.keras.optimizers import Adam
  18 | import calendar
  19 | import matplotlib.transforms as transforms
  20 | import importlib
  21 | import sys
  22 | importlib.reload(sys.modules['PI_class_EnbPI'])  # For me
  23 | titlesize = 20
  24 | plt.rcParams.update({'axes.labelsize': titlesize-2, 'axes.titlesize': titlesize,
  25 |                     'legend.fontsize': titlesize-2, 'xtick.labelsize': titlesize-4, 'ytick.labelsize': titlesize-4})
  26 | 
  27 | '''Simulation Section '''
  28 | '''Define True Models and Errors '''
  29 | 
  30 | 
  31 | def F_inv(alpha):
  32 |     '''
  33 |     Description:
  34 |         Used to compute oracle width when errors are not strongly mixing. It is a skewed normal.
  35 |     '''
  36 |     rv = skewnorm(a=5, loc=0, scale=1)  # a is skewness parameter
  37 |     return rv.ppf(alpha)
  38 | 
  39 | 
  40 | def F_inv_stronglymixing(alpha):
  41 |     '''
  42 |     Description:
  43 |         Used to compute oracle width when errors are strongly mixing. Hidden xi_t follow normal distribution
  44 |     '''
  45 |     rho = 0.6
  46 |     mean = 0 / (1 - rho)
  47 |     std = np.sqrt(0.1 / (1 - rho**2))
  48 |     return norm.ppf(alpha, loc=mean, scale=std)
  49 | 
  50 | 
  51 | def F_inv_stronglymixingDGP(alpha):
  52 |     return norm.ppf(alpha, loc=0, scale=np.sqrt(0.1))
  53 | 
  54 | 
  55 | def beta_star_comp(alpha, stronglymixing):
  56 |     # NOTE, just do this numerically, since F_inv typically do not have closed form so taking gradient to minimize the difference is not needed
  57 |     bins = 1000
  58 |     if stronglymixing:
  59 |         Finv = F_inv_stronglymixing
  60 |     else:
  61 |         Finv = F_inv
  62 |     beta_is = np.linspace(start=0, stop=alpha, num=bins)
  63 |     width = np.zeros(bins)
  64 |     for i in range(bins):
  65 |         width[i] = Finv(1 - alpha + beta_is[i]) - Finv(beta_is[i])
  66 |     i_star = np.argmin(width)
  67 |     return beta_is[i_star]
  68 | 
  69 | 
  70 | def True_mod_linear_pre(feature):
  71 |     '''
  72 |     Input:
  73 |     Output:
  74 |     Description:
  75 |         f(feature): R^d -> R
  76 |     '''
  77 |     # Attempt 0: Fit Linear model on this data
  78 |     d = len(feature)
  79 |     np.random.seed(0)
  80 |     beta0 = np.random.uniform(size=d)  # fully non-missing
  81 |     return beta0.dot(feature)
  82 | 
  83 | 
  84 | def True_mod_linear_post(feature):
  85 |     '''
  86 |     Input:
  87 |     Output:
  88 |     Description:
  89 |         f(feature): R^d -> R
  90 |     '''
  91 |     # Attempt 0: Fit Linear model on this data
  92 |     d = len(feature)
  93 |     np.random.seed(0)
  94 |     beta0 = np.random.uniform(high=5, size=d)  # fully non-missing
  95 |     return beta0.dot(feature)
  96 | 
  97 | 
  98 | def True_mod_lasso_pre(feature):
  99 |     '''
 100 |     Input:
 101 |     Output:
 102 |     Description:
 103 |         f(feature): R^d -> R
 104 |     '''
 105 |     # Attempt 2, pre change: High-dimensional linear model; coincide with the example I give for the assumption
 106 |     d = len(feature)
 107 |     np.random.seed(0)
 108 |     # e.g. 20% of the entries are NON-missing
 109 |     beta1 = random(1, d, density=0.2).A
 110 |     return beta1.dot(feature)
 111 | 
 112 | 
 113 | def True_mod_lasso_post(feature):
 114 |     '''
 115 |     Input:
 116 |     Output:
 117 |     Description:
 118 |         f(feature): R^d -> R
 119 |     '''
 120 |     # Attempt 2, post change: High-dimensional linear model; coincide with the example I give for the assumption
 121 |     d = len(feature)
 122 |     np.random.seed(1)
 123 |     # e.g. 40% of the entries are NON-missing
 124 |     beta1 = random(1, d, density=0.4).A
 125 |     return beta1.dot(feature)
 126 | 
 127 | 
 128 | def True_mod_nonlinear_pre(feature):
 129 |     '''
 130 |     Input:
 131 |     Output:
 132 |     Description:
 133 |         f(feature): R^d -> R
 134 |     '''
 135 |     # Attempt 3 Nonlinear model:
 136 |     # f(X)=sqrt(1+(beta^TX)+(beta^TX)^2+(beta^TX)^3), where 1 is added in case beta^TX is zero
 137 |     d = len(feature)
 138 |     np.random.seed(0)
 139 |     # e.g. 20% of the entries are NON-missing
 140 |     beta1 = random(1, d, density=0.2).A
 141 |     betaX = np.abs(beta1.dot(feature))
 142 |     return (betaX + betaX**2 + betaX**3)**(1/4)
 143 | 
 144 | 
 145 | def True_mod_nonlinear_post(feature, tseries=False):
 146 |     d = len(feature)
 147 |     np.random.seed(0)
 148 |     # e.g. 20% of the entries are NON-missing
 149 |     beta1 = random(1, d, density=0.2).A
 150 |     betaX = np.abs(beta1.dot(feature))
 151 |     return (betaX + betaX**2 + betaX**3)**(1/2)
 152 |     # if tseries:
 153 |     #     return betaX + betaX**2 + betaX**3
 154 |     # else:
 155 |     #     return (betaX + betaX**2 + betaX**3)**(2 / 3)
 156 | 
 157 | 
 158 | def DGP(True_mod_pre, True_mod_post='', T_tot=1000, tseries=False, high_dim=True, change_points=False, change_frac=0.6, stronglymixing=False):
 159 |     '''
 160 |     Description:
 161 |         Create Y_t=f(X_t)+eps_t, eps_t ~ F from above
 162 |         To draw eps_t ~ F, just use F^-1(U).
 163 |     '''
 164 |     np.random.seed(0)
 165 |     Y = np.zeros(T_tot)
 166 |     FX = np.zeros(T_tot)
 167 |     U = np.random.uniform(size=T_tot)
 168 |     Errs = np.zeros(T_tot)
 169 |     if stronglymixing:
 170 |         Finv = F_inv_stronglymixingDGP
 171 |         rho = 0.6
 172 |     else:
 173 |         Finv = F_inv
 174 |         rho = 0
 175 |     Errs[0] = Finv(U[0])
 176 |     for i in range(1, T_tot):
 177 |         Errs[i] = rho * Errs[i - 1] + Finv(U[i])
 178 |     # NOTE; T_tot is NOT Ttrain, so if d is too large, we may never recover it well...
 179 |     if tseries:
 180 |         if change_points:
 181 |             # where change point appears
 182 |             T_cut = math.ceil(change_frac * (T_tot - 100))
 183 |             pre_change = DGP_tseries(
 184 |                 True_mod_pre, T_cut + 100, Errs[:T_cut + 100])
 185 |             post_change = DGP_tseries(
 186 |                 True_mod_post, T_tot - T_cut, Errs[T_cut:], tseries=True)
 187 |             data_full = {}
 188 |             for key in pre_change.keys():
 189 |                 # Note, CANNOT use np.append, as arrays are 2D
 190 |                 data_full[key] = np.concatenate(
 191 |                     (pre_change[key], post_change[key]))
 192 |             return data_full
 193 |         else:
 194 |             return DGP_tseries(True_mod_pre, T_tot, Errs)
 195 |     else:
 196 |         if high_dim:
 197 |             # NOTE: When ||d||_0=c d I need d ~ (1-e^{-1})/c T = (1-e^{-1})/c * (T_tot * train_frac) to AT LEAST allow possible recovery by each S_b. So if I want better approximation (e.g. ||d||_0 = c2 |S_b|), I would let d ~ (1-e^{-1})/c * T_tot*train_frac*c_2. HERE, train_frac=0.5, c=0.2, so we can tweak c2 to roughly have d ~ 0.8 T_tot
 198 |             d = math.ceil(T_tot * 0.8)
 199 |         else:
 200 |             d = math.ceil(T_tot / 10)
 201 |         X = np.random.random((T_tot, d))
 202 |         if change_points:
 203 |             # where change point appears
 204 |             T_cut = math.ceil(change_frac * T_tot)
 205 |             for i in range(T_cut):
 206 |                 FX[i] = True_mod_pre(X[i])
 207 |                 Y[i] = FX[i] + Errs[i]
 208 |             for i in range(T_cut, T_tot):
 209 |                 FX[i] = True_mod_post(X[i])
 210 |                 Y[i] = FX[i] + Errs[i]
 211 |         else:
 212 |             for i in range(T_tot):
 213 |                 FX[i] = True_mod_pre(X[i])
 214 |                 Y[i] = FX[i] + Errs[i]
 215 |         return {'Y': Y, 'X': X, 'f(X)': FX, 'Eps': Errs}
 216 | 
 217 | 
 218 | def DGP_tseries(True_mod, T_tot, Errs, tseries=False):
 219 |     '''
 220 |     Description:
 221 |         Create Y_t=f(X_t)+eps_t, eps_t ~ F from above
 222 |         To draw eps_t ~ F, just use F^-1(U).
 223 |     '''
 224 |     np.random.seed(0)
 225 |     Y = np.zeros(T_tot)
 226 |     FX = np.zeros(T_tot)
 227 |     # NOTE; T_tot is NOT Ttrain, so if d is too large, we may never recover it well...
 228 |     d = 100  # Can be anything, which is the length of past window.
 229 |     X = np.zeros((T_tot - d, d))
 230 |     # Initialize the first two by hand, because "True_mod" must take a vector
 231 |     Y[0] = Errs[0]
 232 |     # Because I assume features are normalized
 233 |     FX[1] = np.random.uniform(size=1)
 234 |     Y[1] = FX[1] + Errs[1]
 235 |     for t in range(2, T_tot):
 236 |         if t < d:
 237 |             X_t = Y[:t]
 238 |             X_t = (X_t - np.mean(X_t)) / np.std(X_t)
 239 |             X_t = np.append(X_t, np.zeros(d-t))  # pad by zeros
 240 |         if t > d:
 241 |             X_t = Y[t - d:t]
 242 |             X_t = (X_t - np.mean(X_t)) / np.std(X_t)
 243 |         if t > d:
 244 |             X[t - d] = X_t
 245 |         if tseries:
 246 |             FX[t] = True_mod(X_t, tseries=True)
 247 |         else:
 248 |             FX[t] = True_mod(X_t)
 249 |         Y[t] = FX[t] + Errs[t]
 250 |     Y = Y[d:]
 251 |     FX = FX[d:]
 252 |     Errs = Errs[d:]
 253 |     return {'Y': Y, 'X': X, 'f(X)': FX, 'Eps': Errs}
 254 | 
 255 | 
 256 | def quick_plt(Data_dc, current_regr, tseries, stronglymixing, change_points=False, args=[]):
 257 |     # Easy visualization of data
 258 |     fig, ax = plt.subplots(figsize=(3, 3))
 259 |     if change_points:
 260 |         Tstar, _ = args
 261 |         start_plt = Tstar - 50
 262 |         end_plt = Tstar + 50
 263 | 
 264 |     else:
 265 |         start_plt = -100
 266 |         end_plt = -1
 267 |     ax.plot(Data_dc['Y'][start_plt:end_plt], label=r'$Y_t$')
 268 |     ax.plot(Data_dc['f(X)'][start_plt:end_plt], label=r'$f(X_t)$')
 269 |     ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.27), ncol=2)
 270 |     tse = '_tseries' if tseries else ''
 271 |     strongm = '_mixing' if stronglymixing else ''
 272 |     regr_name = '_' + current_regr.__class__.__name__
 273 |     if change_points:
 274 |         plt.savefig(
 275 |             f'Simulation/Raw_data_changepts{tse}{strongm}{regr_name}.pdf', dpi=300, bbox_inches='tight',
 276 |             pad_inches=0)
 277 |     else:
 278 |         plt.savefig(
 279 |             f'Simulation/Raw_data_nochangepts{tse}{strongm}{regr_name}.pdf', dpi=300, bbox_inches='tight',
 280 |             pad_inches=0)
 281 |     plt.show()
 282 | 
 283 | 
 284 | '''Fitting part'''
 285 | 
 286 | 
 287 | def split_and_train(Data_dc, train_frac, mathcalA, alpha, itrial, return_full=False, smallT=False):
 288 |     '''
 289 |     Input:
 290 |         alpha and itrial allow us to iterate over alpha and trial number
 291 |     '''
 292 |     data_y_numpy, data_x_numpy = Data_dc['Y'], Data_dc['X']
 293 |     total_data_points = data_y_numpy.shape[0]
 294 |     train_size = math.ceil(train_frac * total_data_points)
 295 |     # Optional, just because sometimes I want to cut of the single digits (e.g. 501->500)
 296 |     train_size = round(train_size / 10) * 10
 297 |     X_train = data_x_numpy[:train_size, :]
 298 |     X_predict = data_x_numpy[train_size:, :]
 299 |     Y_train = data_y_numpy[:train_size]
 300 |     Y_predict = data_y_numpy[train_size:]
 301 |     current_mod = EnbPI.prediction_interval(
 302 |         mathcalA,  X_train, X_predict, Y_train, Y_predict)
 303 |     if mathcalA.__class__.__name__ == 'Sequential':
 304 |         B = 25
 305 |     else:
 306 |         B = 50
 307 |     current_mod.fit_bootstrap_models_online(B, miss_test_idx=[])
 308 |     result = current_mod.run_experiments(
 309 |         alpha=alpha, stride=1, data_name='Anything', itrial=itrial, methods=['Ensemble'], smallT=smallT)
 310 |     # NOTE: 'current_mod' include estimated interval centers and widths, and 'results' JUST include average results and name
 311 |     if return_full:
 312 |         # For more detailed plot
 313 |         return [result, current_mod]
 314 |     else:
 315 |         # For average result
 316 |         return result
 317 | 
 318 | 
 319 | '''Visualize Actual vs. Predicted Error and intervals'''
 320 | 
 321 | 
 322 | def visualize_everything(Data_dc, results, train_frac=0.2, alpha=0.05, change_pts=False, refit=False, arg=[], save_fig=True, tseries=False, stronglymixing=False, first_run=True):
 323 |     # 'results' comes from 'split_and_train' above
 324 |     result_ave, result_mod = results
 325 |     true_errs = Data_dc['Eps']  # Include training data
 326 |     Ttrain = math.ceil(train_frac * len(true_errs))
 327 |     FX = Data_dc['f(X)']  # Include training data
 328 |     Y_predict = Data_dc['Y'][math.ceil(len(FX) * train_frac):]
 329 |     FXhat = result_mod.Ensemble_pred_interval_centers  # Only for T+1,...,T+T1
 330 |     PI = result_mod.Ensemble_pred_interval_ends  # Only for T+1,...,T+T1
 331 |     past_resid = result_mod.Ensemble_online_resid  # Include training LOO residuals
 332 |     beta_hat_bin = binning(past_resid, alpha)
 333 |     beta_hat_bin  # Estimate
 334 |     print(f'Beta^hat_bin is {beta_hat_bin}')
 335 |     if stronglymixing:
 336 |         savename = 'beta_star_stronglymixing.p'
 337 |     else:
 338 |         savename = 'beta_star_nostronglymixing.p'
 339 |     if first_run:
 340 |         beta_star = beta_star_comp(alpha, stronglymixing)  # Actual
 341 |         with open(savename, 'wb') as fp:
 342 |             pickle.dump(beta_star, fp, protocol=pickle.HIGHEST_PROTOCOL)
 343 |     else:
 344 |         with open(savename, 'rb') as fp:
 345 |             beta_star = pickle.load(fp)
 346 |     print(f'Beta^* is {beta_star}')
 347 |     # # NOTE: 0-3 below are useful and took me a while to make, but they may NOT be needed now.
 348 |     # # 0. Compare f(X_t) & hat f(X_t), t>T
 349 |     # fig_fx = EmpvsActual_F([FX[Ttrain:], FXhat])
 350 |     # # 1. & 2. Compare actual vs. empirical CDF & PDF
 351 |     # if change_pts:
 352 |     #     # at T*+T/2+1, for past T/2.
 353 |     #     # Same for refit/not refit because we just want to illustrate the benefit refitting brings
 354 |     #     Tstar, Thalf = arg
 355 |     #     fig_cdf = EmpvsActual_CDF(true_errs[Tstar:Tstar+Thalf], past_resid[Tstar:Tstar+Thalf])
 356 |     #     fig_pdf = EmpvsActual_Err(true_errs[Tstar:Tstar+Thalf], past_resid[Tstar:Tstar+Thalf])
 357 |     # else:
 358 |     #     # at T+1, for past T
 359 |     #     fig_cdf = EmpvsActual_CDF(true_errs[:Ttrain], past_resid[:Ttrain])
 360 |     #     fig_pdf = EmpvsActual_Err(true_errs[:Ttrain], past_resid[:Ttrain])
 361 |     # # 3. Compare actual vs. empirical f(X_t) +/- width, t>T
 362 |     # fig_ptwisewidth = EmpvsActual_PtwiseWidth(
 363 |     #     beta_star, alpha, FX[-len(FXhat):], FXhat, PI, Y_predict, stronglymixing)
 364 |     # 4. Create a simple version
 365 |     fig_ptwisewidth_simple = EmpvsActual_PtwiseWidth_simple(
 366 |         beta_star, alpha, FX[-len(FXhat):], FXhat, PI, Y_predict, stronglymixing)
 367 |     name = 'Simulation'
 368 |     if save_fig:
 369 |         if change_pts:
 370 |             if refit:
 371 |                 string = '_refit_changepts'
 372 |             else:
 373 |                 string = '_norefit_changepts'
 374 |         else:
 375 |             string = '_nochangepts'
 376 |         regr_name = result_ave['muh_fun'][0]
 377 |         tse = '_tseries' if tseries else ''
 378 |         strongm = '_mixing' if stronglymixing else ''
 379 |         # fig_fx.savefig(
 380 |         #     f'{name}/EmpvsActual_FX{string}_{regr_name}{tse}{strongm}.pdf', dpi=300, bbox_inches='tight',
 381 |         #     pad_inches=0)
 382 |         # fig_cdf.savefig(
 383 |         #     f'{name}/EmpvsActual_CDF{string}_{regr_name}{tse}{strongm}.pdf', dpi=300, bbox_inches='tight',
 384 |         #     pad_inches=0)
 385 |         # fig_pdf.savefig(
 386 |         #     f'{name}/EmpvsActual_PDF{string}_{regr_name}{tse}{strongm}.pdf', dpi=300, bbox_inches='tight',
 387 |         #     pad_inches=0)
 388 |         # fig_ptwisewidth.savefig(
 389 |         #     f'{name}/EmpvsActual_PtwiseWidth{string}_{regr_name}{tse}{strongm}.pdf', dpi=300, bbox_inches='tight',
 390 |         #     pad_inches=0)
 391 |         fig_ptwisewidth_simple.savefig(
 392 |             f'{name}/EmpvsActual_PtwiseWidth{string}_{regr_name}{tse}{strongm}_simple.pdf', dpi=300, bbox_inches='tight',
 393 |             pad_inches=0)
 394 | 
 395 | 
 396 | '''Real-data Section'''
 397 | '''Helpers for read data '''
 398 | 
 399 | 
 400 | def read_data(i, filename, max_data_size):
 401 |     if i == 0:
 402 |         '''
 403 |             All datasets are Multivariate time-series. They have respective Github for more details as well.
 404 |             1. Greenhouse Gas Observing Network Data Set
 405 |             Time from 5.10-7.31, 2010, with 4 samples everyday, 6 hours apart between data poits.
 406 |             Goal is to "use inverse methods to determine the optimal values of the weights in the weighted sum of 15 tracers that best matches the synthetic observations"
 407 |             In other words, find weights so that first 15 tracers will be as close to the last as possible.
 408 |             Note, data at many other grid cells are available. Others are in Downloads/🌟AISTATS Data/Greenhouse Data
 409 |             https://archive.ics.uci.edu/ml/datasets/Greenhouse+Gas+Observing+Network
 410 |         '''
 411 |         data = pd.read_csv(filename, header=None, sep=' ').T
 412 |         # data.shape  # 327, 16Note, rows are 16 time series (first 15 from tracers, last from synthetic).
 413 |     elif i == 1:
 414 |         '''
 415 |             2. Appliances energy prediction Data Set
 416 |             The data set is at 10 min for about 4.5 months.
 417 |             The column named 'Appliances' is the response. Other columns are predictors
 418 |             https://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
 419 |         '''
 420 |         data = pd.read_csv(filename, delimiter=',')
 421 |         # data.shape  # (19736, 29)
 422 |         data.drop('date', inplace=True, axis=1)
 423 |         data.loc[:, data.columns != 'Appliances']
 424 |     elif i == 2:
 425 |         '''
 426 |             3. Beijing Multi-Site Air-Quality Data Data Set
 427 |             This data set includes hourly air pollutants data from 12 nationally-controlled air-quality monitoring sites.
 428 |             Time period from 3.1, 2013 to 2.28, 2017.
 429 |             PM2.5 or PM10 would be the response.
 430 |             https://archive.ics.uci.edu/ml/datasets/Beijing+Multi-Site+Air-Quality+Data
 431 |         '''
 432 |         data = pd.read_csv(filename)
 433 |         # data.shape  # 35064, 18
 434 |         # data.columns
 435 |         data.drop(columns=['No', 'year', 'month', 'day', 'hour',
 436 |                            'wd', 'station'], inplace=True, axis=1)
 437 |         data.dropna(inplace=True)
 438 |         # data.shape  # 32907, 11
 439 |         # data.head(5)
 440 |     else:
 441 |         """
 442 |             4 (Alternative). NREL Solar data at Atlanta Downtown in 2018. 24 observations per day and separately equally by 1H @ half an hour mark everytime
 443 |             Data descriptions see Solar Writeup
 444 |             Data download:
 445 |             (With API) https://nsrdb.nrel.gov/data-sets/api-instructions.html
 446 |             (Manual) https://maps.nrel.gov/nsrdb-viewer
 447 |         """
 448 |         data = pd.read_csv(filename, skiprows=2)
 449 |         # data.shape  # 8760, 14
 450 |         data.drop(columns=data.columns[0:5], inplace=True)
 451 |         data.drop(columns='Unnamed: 13', inplace=True)
 452 |         # data.shape  # 8760, 8
 453 |         # data.head(5)
 454 |     # pick maximum of X data points (for speed)
 455 |     data = data.iloc[:min(max_data_size, data.shape[0]), :]
 456 |     print(data.shape)
 457 |     return data
 458 | 
 459 | # Extra real-data for CA and Wind
 460 | 
 461 | 
 462 | def read_CA_data(filename):
 463 |     data = pd.read_csv(filename)
 464 |     # data.shape  # 8760, 14
 465 |     data.drop(columns=data.columns[0:6], inplace=True)
 466 |     return data
 467 | 
 468 | 
 469 | def read_wind_data():
 470 |     ''' Note, just use the 8760 hourly observation in 2019
 471 |     Github repo is here: https://github.com/Duvey314/austin-green-energy-predictor'''
 472 |     data_wind_19 = pd.read_csv('Data/Wind_Hackberry_Generation_2019_2020.csv')
 473 |     data_wind_19 = data_wind_19.iloc[:24 * 365, :]
 474 |     return data_wind_19
 475 | 
 476 | 
 477 | '''Binning Subroutine (used everywhere)'''
 478 | 
 479 | 
 480 | def binning(past_resid, alpha):
 481 |     '''
 482 |     Input:
 483 |         past residuals: evident
 484 |         alpha: signifance level
 485 |     Output:
 486 |         beta_hat_bin as argmin of the difference
 487 |     Description:
 488 |         Compute the beta^hat_bin from past_resid, by breaking [0,alpha] into bins (like 20). It is enough for small alpha
 489 |         number of bins are determined rather automatic, relative the size of whole domain
 490 |     '''
 491 |     bins = 5  # For computation, can just reduce it to like 10 or 5 in real data
 492 |     beta_is = np.linspace(start=0, stop=alpha, num=bins)
 493 |     width = np.zeros(bins)
 494 |     for i in range(bins):
 495 |         width[i] = np.percentile(past_resid, math.ceil(100 * (1 - alpha + beta_is[i]))) - \
 496 |             np.percentile(past_resid, math.ceil(100 * beta_is[i]))
 497 |     i_star = np.argmin(width)
 498 |     return beta_is[i_star]
 499 | 
 500 | 
 501 | '''Neural Networks Regressors'''
 502 | 
 503 | 
 504 | def keras_mod():
 505 |     # See explanation of Dropout here: https://towardsdatascience.com/machine-learning-part-20-dropout-keras-layers-explained-8c9f6dc4c9ab
 506 |     model = Sequential(name='NeuralNet')
 507 |     model.add(Dense(100, activation='relu'))
 508 |     model.add(Dense(100, activation='relu'))
 509 |     model.add(Dropout(0.2))
 510 |     model.add(Dense(100, activation='relu'))
 511 |     model.add(Dense(1, activation='relu'))
 512 |     opt = Adam(5e-4)
 513 |     model.compile(loss='mean_squared_error', optimizer=opt)
 514 |     return model
 515 | 
 516 | 
 517 | def keras_rnn():
 518 |     model = Sequential(name='RNN')
 519 |     # For fast cuDNN implementation, activation = 'relu' does not work
 520 |     model.add(LSTM(100, activation='tanh', return_sequences=True))
 521 |     model.add(LSTM(100, activation='tanh'))
 522 |     model.add(Dense(1, activation='relu'))
 523 |     opt = Adam(5e-4)
 524 |     model.compile(loss='mean_squared_error', optimizer=opt)
 525 |     return model
 526 | 
 527 | 
 528 | '''Helper for ensemble'''
 529 | 
 530 | 
 531 | def generate_bootstrap_samples(n, m, B):
 532 |     '''
 533 |       Return: B-by-m matrix, where row b gives the indices for b-th bootstrap sample
 534 |     '''
 535 |     samples_idx = np.zeros((B, m), dtype=int)
 536 |     for b in range(B):
 537 |         sample_idx = np.random.choice(n, m)
 538 |         samples_idx[b, :] = sample_idx
 539 |     return(samples_idx)
 540 | 
 541 | 
 542 | def one_dimen_transform(Y_train, Y_predict, d):
 543 |     n = len(Y_train)
 544 |     n1 = len(Y_predict)
 545 |     X_train = np.zeros((n - d, d))  # from d+1,...,n
 546 |     X_predict = np.zeros((n1, d))  # from n-d,...,n+n1-d
 547 |     for i in range(n - d):
 548 |         X_train[i, :] = Y_train[i:i + d]
 549 |     for i in range(n1):
 550 |         if i < d:
 551 |             X_predict[i, :] = np.r_[Y_train[n - d + i:], Y_predict[:i]]
 552 |         else:
 553 |             X_predict[i, :] = Y_predict[i - d:i]
 554 |     Y_train = Y_train[d:]
 555 |     return([X_train, X_predict, Y_train, Y_predict])
 556 | 
 557 | 
 558 | '''Helper for doing online residual'''
 559 | 
 560 | 
 561 | def strided_app(a, L, S):  # Window len = L, Stride len/stepsize = S
 562 |     nrows = ((a.size - L) // S) + 1
 563 |     n = a.strides[0]
 564 |     return np.lib.stride_tricks.as_strided(a, shape=(nrows, L), strides=(S * n, n))
 565 | 
 566 | 
 567 | '''Helper for Weighted ICP'''
 568 | 
 569 | 
 570 | def weighted_quantile(values, quantiles, sample_weight=None,
 571 |                       values_sorted=False, old_style=False):
 572 |     """ Very close to numpy.percentile, but supports weights.
 573 |     NOTE: quantiles should be in [0, 1]!
 574 |     :param values: numpy.array with data
 575 |     :param quantiles: array-like with many quantiles needed
 576 |     :param sample_weight: array-like of the same length as `array`
 577 |     :param values_sorted: bool, if True, then will avoid sorting of
 578 |         initial array
 579 |     :param old_style: if True, will correct output to be consistent
 580 |         with numpy.percentile.
 581 |     :return: numpy.array with computed quantiles.
 582 |     """
 583 |     values = np.array(values)
 584 |     quantiles = np.array(quantiles)
 585 |     if sample_weight is None:
 586 |         sample_weight = np.ones(len(values))
 587 |     sample_weight = np.array(sample_weight)
 588 |     assert np.all(quantiles >= 0) and np.all(quantiles <= 1), \
 589 |         'quantiles should be in [0, 1]'
 590 | 
 591 |     if not values_sorted:
 592 |         sorter = np.argsort(values)
 593 |         values = values[sorter]
 594 |         sample_weight = sample_weight[sorter]
 595 | 
 596 |     weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight
 597 |     if old_style:
 598 |         # To be convenient with numpy.percentile
 599 |         weighted_quantiles -= weighted_quantiles[0]
 600 |         weighted_quantiles /= weighted_quantiles[-1]
 601 |     else:
 602 |         weighted_quantiles /= np.sum(sample_weight)
 603 |     return np.interp(quantiles, weighted_quantiles, values)
 604 | 
 605 | 
 606 | """
 607 | For comparing and plotting
 608 | (a) f(X_t) vs hat f(X_t)
 609 | (b) F vs. F_hat and {eps_t} vs. {eps_t hat}
 610 | """
 611 | # (a)
 612 | 
 613 | 
 614 | def EmpvsActual_F(value_ls):
 615 |     ''' Used for comparing actual vs. estimated CDF and PDF (Histogram)
 616 |         value_ls=[actual_errors,estimate_errors]
 617 |         which='CDF' or 'PDF' (e.g. Histogram)
 618 |     '''
 619 |     plt.rcParams.update({'font.size': 18})
 620 |     FX, FXhat = value_ls
 621 |     fig, ax = plt.subplots(figsize=(7, 3))
 622 |     ax.plot(FX[-100:], color="black",
 623 |             label=r"$f(X_t)$")
 624 |     ax.plot(FXhat[-100:], color="blue",
 625 |             label=r"$\hat{f}(X_t)$")
 626 |     ax.legend(loc='center', bbox_to_anchor=(0.5, 1.15), ncol=2)
 627 |     plt.show()
 628 |     return fig
 629 | 
 630 | # (b)
 631 | 
 632 | 
 633 | def val_to_pdf_or_cdf(value_ls, which):
 634 |     ''' Used for comparing actual vs. estimated CDF and PDF (Histogram)
 635 |         value_ls=[actual_errors,estimate_errors]
 636 |         which='CDF' or 'PDF' (e.g. Histogram)
 637 |     '''
 638 |     plt.rcParams.update({'font.size': 18})
 639 |     bins = 50
 640 |     # First on CDF
 641 |     count_t, bins_count_t = np.histogram(value_ls[0], bins=bins)
 642 |     count_e, bins_count_e = np.histogram(value_ls[1], bins=bins)
 643 |     pdf_t = count_t / sum(count_t)
 644 |     pdf_e = count_e / sum(count_e)
 645 |     cdf_t = np.cumsum(pdf_t)
 646 |     cdf_e = np.cumsum(pdf_e)
 647 |     fig, ax = plt.subplots(figsize=(3, 3))
 648 |     if which == 'PDF':
 649 |         ax.plot(bins_count_t[1:], pdf_t, color="black",
 650 |                 label=r"$\{\epsilon_t\}_{t=1}^T$")
 651 |         ax.plot(bins_count_e[1:], pdf_e, color="blue",
 652 |                 label=r"$\{\hat{\epsilon_t}\}_{t=1}^T$")
 653 |         ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.27), ncol=2)
 654 |     else:
 655 |         ax.plot(bins_count_t[1:], cdf_t, color="black", label=r"$F_{T+1}$")
 656 |         ax.plot(bins_count_e[1:], cdf_e, color="blue",
 657 |                 label=r'$\hat{F}_{T+1}$')
 658 |         ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.27), ncol=2)
 659 |     plt.show()
 660 |     return fig
 661 | 
 662 | 
 663 | def EmpvsActual_CDF(true_errs, past_resid):
 664 |     '''
 665 |     Description:
 666 |         (Before Prediction) Overlap empirical CDF on top of actual CDF
 667 |     '''
 668 |     # Example: https://www.geeksforgeeks.org/how-to-calculate-and-plot-a-cumulative-distribution-function-with-matplotlib-in-python/
 669 |     return(val_to_pdf_or_cdf([true_errs, past_resid], which='CDF'))
 670 | 
 671 | 
 672 | def EmpvsActual_Err(true_errs, past_resid):
 673 |     '''
 674 |     Description:
 675 |         (Before Prediction) Overlap empirical histogram/PDF on top of actual errors
 676 |     '''
 677 |     return(val_to_pdf_or_cdf([true_errs, past_resid], which='PDF'))
 678 | 
 679 | 
 680 | def EmpvsActual_PtwiseWidth(beta_star, alpha, FX, FXhat, PI, Y_predict, stronglymixing, change_pts=False, args=[]):
 681 |     '''
 682 |     Input:
 683 |         FX: True FX, size T1 for a particular T
 684 |         FXhat: Estimated FX from estimator, size T1 for a particular T
 685 |         PI: estimated upper and lower intervals, size [T1,2] for a particular T
 686 |     Description:
 687 |         (After Prediction)
 688 |         Side-by-side Plot f(X)+/- actual width vs. hat f(X)+/- estimated width.
 689 |         This is for a particular trial, since we plot over t >= T
 690 |     '''
 691 |     plt.rcParams.update({'font.size': 18,
 692 |                          'legend.fontsize': 15})
 693 |     if stronglymixing:
 694 |         Finv = F_inv_stronglymixing
 695 |     else:
 696 |         Finv = F_inv
 697 |     upper_t = FX + Finv(1 - alpha + beta_star)
 698 |     lower_t = FX + Finv(beta_star)
 699 |     upper_e, lower_e = np.array(PI['upper']), np.array(PI['lower'])
 700 |     fig, ax = plt.subplots(1, 2, figsize=(14, 3), sharey=True)
 701 |     legend_loc = (0.5, 1.65)
 702 |     if change_pts:
 703 |         Tstar, Tnew = args
 704 |         start_plt = Tstar - 50
 705 |         end_plt = Tstar + Tnew + 50
 706 | 
 707 |     else:
 708 |         start_plt = -100
 709 |         end_plt = -1
 710 |     # True values
 711 |     ax[0].plot(FX[start_plt:end_plt], color='black', label=r'$f(X_t)$')
 712 |     wid = 1
 713 |     ax[0].plot(Y_predict[start_plt:end_plt], color='red',
 714 |                label=r'$Y_t$', linewidth=wid)
 715 |     ax[0].plot(upper_t[start_plt:end_plt], color='blue',
 716 |                label=r'$f(X_t)+F_t^{-1}(1-\alpha+\beta^*)$')
 717 |     ax[0].plot(lower_t[start_plt:end_plt], color='orange',
 718 |                label=r'$f(X_t)+F_t^{-1}(\beta^*)$')
 719 |     ax[0].set_xlabel('Prediction Time Index')
 720 |     ax[0].legend(loc='upper center', bbox_to_anchor=legend_loc,
 721 |                  title=r'Oracle Intervals $C^{\alpha}_t$', ncol=2)
 722 |     # Estimated values
 723 |     ax[1].plot(FXhat[start_plt:end_plt],
 724 |                color='black', label=r'$\hat{f}(X_t)$')
 725 |     ax[1].plot(Y_predict[start_plt:end_plt], color='red',
 726 |                label=r'$Y_t$', linewidth=wid)
 727 |     ax[1].plot(upper_e[start_plt:end_plt], color='blue',
 728 |                label=r'$\hat{f}(X_t)+\hat{F}_t^{-1}(1-\alpha+\hat{\beta}_{\rm{bin}})$')
 729 |     ax[1].plot(lower_e[start_plt:end_plt], color='orange',
 730 |                label=r'$\hat{f}(X_t)+\hat{F}_t^{-1}(\hat{\beta}_{\rm{bin}})$')
 731 |     legend_loc = (0.5, 1.65)
 732 |     ax[1].legend(loc='upper center', bbox_to_anchor=legend_loc,
 733 |                  title=r'Estimated Intervals $\hat{C}^{\alpha}_t$', ncol=2)
 734 |     ax[1].set_xlabel('Prediction Time Index')
 735 |     # hide tick and tick label of the big axis
 736 |     plt.show()
 737 |     # plt.tick_params(labelcolor='none', which='both', top=False,
 738 |     #                 bottom=False, left=False, right=False)
 739 |     # plt.xlabel(r"Prediction Index $t$")
 740 |     # plt.ylabel(r"Response Value $Y$")
 741 |     return fig
 742 | 
 743 | 
 744 | def EmpvsActual_PtwiseWidth_simple(beta_star, alpha, FX, FXhat, PI, Y_predict, stronglymixing, change_pts=False, args=[]):
 745 |     '''
 746 |     # NOTE: this is modified from "EmpvsActual_PtwiseWidth", so some inputs are not used
 747 |     Input:
 748 |         FX: True FX, size T1 for a particular T
 749 |         FXhat: Estimated FX from estimator, size T1 for a particular T
 750 |         PI: estimated upper and lower intervals, size [T1,2] for a particular T
 751 |     Description:
 752 |         (After Prediction)
 753 |         Side-by-side Plot f(X)+/- actual width vs. hat f(X)+/- estimated width.
 754 |         This is for a particular trial, since we plot over t >= T
 755 |     '''
 756 |     titlesize = 24
 757 |     plt.rcParams.update({'axes.labelsize': titlesize-2, 'axes.titlesize': titlesize,
 758 |                         'legend.fontsize': titlesize-2, 'xtick.labelsize': titlesize-4, 'ytick.labelsize': titlesize-4})
 759 |     if stronglymixing:
 760 |         Finv = F_inv_stronglymixing
 761 |     else:
 762 |         Finv = F_inv
 763 |     upper_e, lower_e = np.array(PI['upper']), np.array(PI['lower'])
 764 |     if change_pts:
 765 |         fig, ax = plt.subplots(figsize=(8, 4))
 766 |     else:
 767 |         fig, ax = plt.subplots(figsize=(5, 4))
 768 |     legend_loc = (0.5, 1.65)
 769 |     if change_pts:
 770 |         Tstar, Tnew = args
 771 |         start_plt = Tstar - 50
 772 |         end_plt = Tstar + Tnew + 50
 773 | 
 774 |     else:
 775 |         start_plt = -100
 776 |         end_plt = -1
 777 |     # True values
 778 |     ax.plot(Y_predict[start_plt:end_plt], color='orange', label=r'$Y_t$')
 779 |     ax.plot(FXhat[start_plt:end_plt], color='blue', label=r'$\hat{Y_t}$')
 780 |     ax.fill_between(range(np.abs(start_plt - end_plt)),
 781 |                     lower_e[start_plt:end_plt], upper_e[start_plt:end_plt], color='blue', alpha=0.2)
 782 |     ax.set_xlabel('Prediction Time Index')
 783 |     # legend_loc = (0.5, 1.1)
 784 |     # ax.legend(loc='upper left', ncol=2)
 785 |     # ax.set_title('Estimation and Prediction Intervals')
 786 |     plt.show()
 787 |     return fig
 788 | 
 789 | 
 790 | def EmpvsActual_AveWidth(beta_star, alpha, mean_width_dic, mean_cov_dic, stronglymixing=False, cond_cov=False):
 791 |     '''
 792 |     Input:
 793 |         mean_width_dic: {T_ls: est_mean_width}, contains what is the average
 794 |             width of intervals over test points when T_ls fraction of total data is used for training
 795 |     Description:
 796 |         (After Prediction) average est. widths vs. oracle widths (horizontal line)
 797 |         This is for different training sizes T
 798 |     '''
 799 |     titlesize = 25
 800 |     plt.rcParams.update({'axes.labelsize': titlesize-2, 'axes.titlesize': titlesize,
 801 |                         'legend.fontsize': titlesize-2, 'xtick.labelsize': titlesize-4, 'ytick.labelsize': titlesize-4})
 802 |     if cond_cov:
 803 |         fig, ax = plt.subplots(figsize=(7, 4.5))
 804 |     else:
 805 |         fig, ax = plt.subplots(figsize=(7, 4.5))
 806 |     dic = pd.DataFrame(mean_width_dic.items(), columns=[
 807 |                        'T_ls', 'est_mean_width'])
 808 |     est_mean_width, T_ls = dic['est_mean_width'], dic['T_ls']
 809 |     ax.plot(T_ls, est_mean_width, marker='o', color='blue',
 810 |             label='EnbPI')  # plot x and y using blue circle markers
 811 |     if stronglymixing:
 812 |         Finv = F_inv_stronglymixing
 813 |     else:
 814 |         Finv = F_inv
 815 |     oracle = Finv(1 - alpha + beta_star) - Finv(beta_star)
 816 |     ax.axhline(y=oracle,
 817 |                color='blue', linestyle='dashed', label='Oracle')
 818 |     trans = transforms.blended_transform_factory(
 819 |         ax.get_yticklabels()[0].get_transform(), ax.transData)
 820 |     # ax.text(0.1, oracle+0.05, "{:.1f}".format(oracle), color='blue', transform=trans,
 821 |     #         ha="right", va="center", weight='bold')
 822 |     # ax.tick_params(axis='y', pad=16)
 823 |     [t.set_color('blue') for t in ax.yaxis.get_ticklabels()]
 824 |     ax.set_xlabel(r'$\%$ of Total Data')
 825 |     # ax.legend(loc='center right', bbox_to_anchor=(0.5, 1.2), title='Width', ncol=2)
 826 |     ax2 = ax.twinx()
 827 |     dic = pd.DataFrame(mean_cov_dic.items(), columns=['T_ls', 'est_mean_cov'])
 828 |     est_mean_cov, T_ls = dic['est_mean_cov'], dic['T_ls']
 829 |     # plot x and y using blue circle markers
 830 |     ax2.plot(T_ls, est_mean_cov, marker='o', color='red', label='EnbPI')
 831 |     [t.set_color('red') for t in ax2.yaxis.get_ticklabels()]
 832 |     ax2.axhline(y=1 - alpha,
 833 |                 color='red', linestyle='dotted', label='Target')
 834 |     ax2.set_ylim(0.8, 1)
 835 |     ax.tick_params(axis='y', color='red')
 836 |     # ax2.legend(loc='center left', bbox_to_anchor=(0.5, 1.2), title='Marginal Coverage', ncol=2)
 837 |     if cond_cov:
 838 |         plt.rcParams.update(
 839 |             {'legend.fontsize': 11, 'legend.title_fontsize': 11})
 840 |         # x1 = 0
 841 |         same = 0.24
 842 |         # ax.legend(loc='lower left', title='Conditional Width',
 843 |         #           bbox_to_anchor=(x1, same), ncol=2)
 844 |         # ax2.legend(loc='lower left', bbox_to_anchor=(
 845 |         #     x1 + 0.32, same), title='Conditional Coverage', ncol=2)
 846 |         ax2.set_ylim(0.85, 1.05)
 847 |     else:
 848 |         same = 0.05
 849 |         # ax.legend(loc='lower left', title='Width',
 850 |         #           bbox_to_anchor=(0, same), ncol=2)
 851 |         # ax2.legend(loc='lower left', bbox_to_anchor=(
 852 |         #     0.4, same), title='Marginal Coverage', ncol=2)
 853 |     plt.show()
 854 |     return fig
 855 | 
 856 | 
 857 | """
 858 | For Plotting results: average width and marginal coverage plots
 859 | """
 860 | 
 861 | 
 862 | def flip(items, ncol):
 863 |     return itertools.chain(*[items[i::ncol] for i in range(ncol)])
 864 | 
 865 | 
 866 | def plot_average_new(x_axis, x_axis_name, save=True, Dataname=['Solar_Atl'], two_rows=True):
 867 |     """Plot mean coverage and width for different PI methods and regressor combinations side by side,
 868 |        over rho or train_size or alpha_ls
 869 |        Parameters:
 870 |         data_type: simulated (2-by-3) or real data (2-by-2)
 871 |         x_axis: either list of train_size, or alpha
 872 |         x_axis_name: either train_size or alpha
 873 |     """
 874 |     ncol = 2
 875 |     Dataname.append(Dataname[0])  # for 1D results
 876 |     if two_rows:
 877 |         fig, ax = plt.subplots(2, 2, figsize=(8, 8), sharex=True)
 878 |     else:
 879 |         fig, ax = plt.subplots(1, 4, figsize=(16, 4), sharex=True)
 880 |     j = 0
 881 |     filename = {'alpha': 'alpha', 'train_size': 'train'}
 882 |     one_D = False
 883 |     for data_name in Dataname:
 884 |         # load appropriate data
 885 |         if j == 1 or one_D:
 886 |             results = pd.read_csv(
 887 |                 f'Results/{data_name}_many_{filename[x_axis_name]}_new_1d.csv')
 888 |         else:
 889 |             results = pd.read_csv(
 890 |                 f'Results/{data_name}_many_{filename[x_axis_name]}_new.csv')
 891 |         methods_name = ['ARIMA', 'ExpSmoothing', 'DynamicFactor', 'Ensemble']
 892 |         cov_together = []
 893 |         width_together = []
 894 |         # Loop through dataset name and plot average coverage and width for the particular regressor
 895 |         # First ARIMA, then Ensemble
 896 |         muh_fun = np.unique(results[(results.method == 'Ensemble') & (
 897 |             results.muh_fun != 'Sequential')]['muh_fun'])
 898 |         tseries_mtd = methods_name[:3]
 899 |         for method in methods_name:
 900 |             print(method)
 901 |             if method in tseries_mtd:
 902 |                 results_method = results[(results['method'] == method)]
 903 |                 if data_name == 'Network':
 904 |                     method_cov = results_method.groupby(
 905 |                         by=[x_axis_name, 'node'], as_index=False).mean().groupby(x_axis_name)['coverage'].describe()  # Column with 50% is median
 906 |                     method_width = results_method.groupby(
 907 |                         by=[x_axis_name, 'node'], as_index=False).mean().groupby(x_axis_name)['width'].describe()  # Column with 50% is median
 908 |                 else:
 909 |                     method_cov = results_method.groupby(
 910 |                         x_axis_name)['coverage'].describe()  # Column with 50% is median
 911 |                     method_width = results_method.groupby(
 912 |                         x_axis_name)['width'].describe()  # Column with 50% is median
 913 |                     method_cov['se'] = method_cov['std'] / \
 914 |                         np.sqrt(method_cov['count'])
 915 |                     method_width['se'] = method_width['std'] / \
 916 |                         np.sqrt(method_width['count'])
 917 |                     cov_together.append(method_cov)
 918 |                     width_together.append(method_width)
 919 |             else:
 920 |                 for fit_func in muh_fun:
 921 |                     results_method = results[(results['method'] == method)
 922 |                                              & (results['muh_fun'] == fit_func)]
 923 |                     if data_name == 'Network':
 924 |                         method_cov = results_method.groupby(
 925 |                             by=[x_axis_name, 'node'], as_index=False).mean().groupby(x_axis_name)['coverage'].describe()  # Column with 50% is median
 926 |                         method_width = results_method.groupby(
 927 |                             by=[x_axis_name, 'node'], as_index=False).mean().groupby(x_axis_name)['width'].describe()  # Column with 50% is median
 928 |                     else:
 929 |                         method_cov = results_method.groupby(
 930 |                             x_axis_name)['coverage'].describe()  # Column with 50% is median
 931 |                         method_width = results_method.groupby(
 932 |                             x_axis_name)['width'].describe()  # Column with 50% is median
 933 |                     method_cov['se'] = method_cov['std'] / \
 934 |                         np.sqrt(method_cov['count'])
 935 |                     method_width['se'] = method_width['std'] / \
 936 |                         np.sqrt(method_width['count'])
 937 |                     cov_together.append(method_cov)
 938 |                     width_together.append(method_width)
 939 |         # Plot
 940 |         # Parameters
 941 |         num_method = len(tseries_mtd) + len(muh_fun)  # ARIMA + EnbPI
 942 |         colors = cm.rainbow(np.linspace(0, 1, num_method))
 943 |         mtds = np.append(tseries_mtd, muh_fun)
 944 |         # label_names = methods_name
 945 |         label_names = {'ARIMA': 'ARIMA',
 946 |                        'ExpSmoothing': 'ExpSmoothing',
 947 |                        'DynamicFactor': 'DynamicFactor',
 948 |                        'RidgeCV': 'EnbPI Ridge',
 949 |                        'RandomForestRegressor': 'EnbPI RF', 'Sequential': 'EnbPI NN', 'RNN': 'EnbPI RNN'}
 950 |         first = 0
 951 |         second = 1
 952 |         if one_D:
 953 |             first = 2
 954 |             second = 3
 955 |         name = 'mean'
 956 |         print(mtds)
 957 |         for i in range(num_method):
 958 |             if two_rows:
 959 |                 # Coverage
 960 |                 ax[j, first].plot(x_axis, cov_together[i][name], linestyle='-',
 961 |                                   marker='o', label=label_names[mtds[i]], color=colors[i])
 962 |                 ax[j, first].fill_between(x_axis, cov_together[i][name] - cov_together[i]['se'],
 963 |                                           cov_together[i][name] + cov_together[i]['se'], alpha=0.35, facecolor=colors[i])
 964 |                 ax[j, first].set_ylim(0.7, 1)
 965 |                 ax[j, first].tick_params(
 966 |                     axis='both', which='major')
 967 |                 # Width
 968 |                 ax[j, second].plot(x_axis, width_together[i][name], linestyle='-',
 969 |                                    marker='o', label=label_names[mtds[i]], color=colors[i])
 970 |                 ax[j, second].fill_between(x_axis, width_together[i][name] - width_together[i]['se'],
 971 |                                            width_together[i][name] + width_together[i]['se'], alpha=0.35, facecolor=colors[i])
 972 |                 ax[j, second].tick_params(
 973 |                     axis='both', which='major')
 974 |                 # Legends, target coverage, labels...
 975 |                 # Set label
 976 |                 ax[j, first].plot(
 977 |                     x_axis, x_axis, linestyle='-.', color='green')
 978 |                 # x_ax = ax[j, first].axes.get_xaxis()
 979 |                 # x_ax.set_visible(False)
 980 |                 nrow = len(Dataname)
 981 |                 ax[nrow - 1, 0].set_xlabel(r'$1-\alpha$')
 982 |                 ax[nrow - 1, 1].set_xlabel(r'$1-\alpha$')
 983 |             else:
 984 |                 # Coverage
 985 |                 ax[first].plot(x_axis, cov_together[i][name], linestyle='-',
 986 |                                marker='o', label=label_names[mtds[i]], color=colors[i])
 987 |                 ax[first].fill_between(x_axis, cov_together[i][name] - cov_together[i]['se'],
 988 |                                        cov_together[i][name] + cov_together[i]['se'], alpha=0.35, facecolor=colors[i])
 989 |                 ax[first].set_ylim(0.65, 1)
 990 |                 ax[first].tick_params(
 991 |                     axis='both', which='major')
 992 |                 # Width
 993 |                 ax[second].plot(x_axis, width_together[i][name], linestyle='-',
 994 |                                 marker='o', label=label_names[mtds[i]], color=colors[i])
 995 |                 ax[second].fill_between(x_axis, width_together[i][name] - width_together[i]['se'],
 996 |                                         width_together[i][name] + width_together[i]['se'], alpha=0.35, facecolor=colors[i])
 997 |                 ax[second].tick_params(
 998 |                     axis='both', which='major')
 999 |                 # Legends, target coverage, labels...
1000 |                 # Set label
1001 |                 ax[first].plot(x_axis, x_axis, linestyle='-.', color='green')
1002 |                 # x_ax = ax[j, first].axes.get_xaxis()
1003 |                 # x_ax.set_visible(False)
1004 |                 ax[first].set_xlabel(r'$1-\alpha$')
1005 |                 ax[second].set_xlabel(r'$1-\alpha$')
1006 |         if two_rows:
1007 |             j += 1
1008 |         else:
1009 |             one_D = True
1010 |     if two_rows:
1011 |         ax[0, 0].set_title('Coverage')
1012 |         ax[0, 1].set_title('Width')
1013 |     else:
1014 |         ax[0].set_title('Coverage')
1015 |         ax[1].set_title('Width')
1016 |         ax[2].set_title('Coverage')
1017 |         ax[3].set_title('Width')
1018 |     if two_rows:
1019 |         ax[0, 0].set_ylabel('Multivariate')
1020 |         ax[1, 0].set_ylabel('Unitivariate')
1021 |     else:
1022 |         ax[0].set_ylabel('Multivariate')
1023 |         ax[2].set_ylabel('Unitivariate')
1024 |     fig.tight_layout(pad=0)
1025 |     if two_rows:
1026 |         # ax[0, 1].legend(loc='upper left', fontsize=axisfont-2)
1027 |         ax[1, 1].legend(loc='upper center',
1028 |                         bbox_to_anchor=(-0.08, -0.18), ncol=3)
1029 |     else:
1030 |         # # With only ARIMA
1031 |         # ax[3].legend(loc='upper center',
1032 |         #              bbox_to_anchor=(-0.75, -0.18), ncol=5, fontsize=axisfont-2)
1033 |         cols = 3
1034 |         handles, labels = ax[3].get_legend_handles_labels()
1035 |         ax[3].legend(flip(handles, cols), flip(labels, cols), loc='upper center',
1036 |                      bbox_to_anchor=(-0.3, -0.18), ncol=cols)
1037 |         # ax[3].legend(loc='upper center',
1038 |         #              bbox_to_anchor=(-0.68, -0.18), ncol=3, fontsize=axisfont - 2)
1039 |     if save:
1040 |         if two_rows:
1041 |             fig.savefig(
1042 |                 f'{Dataname[0]}_mean_coverage_width_{x_axis_name}.pdf', dpi=300, bbox_inches='tight',
1043 |                 pad_inches=0)
1044 |         else:
1045 |             fig.savefig(
1046 |                 f'{Dataname[0]}_mean_coverage_width_{x_axis_name}_one_row.pdf', dpi=300, bbox_inches='tight',
1047 |                 pad_inches=0)
1048 | 
1049 | 
1050 | def grouped_box_new(dataname, type, alpha=0.1, extra_save=''):
1051 |     '''First (Second) row contains grouped boxplots for multivariate (univariate) for Ridge, RF, and NN.
1052 |        Each boxplot contains coverage and width for all three PI methods over 3 (0.1, 0.3, 0.5) train/total data, so 3*3 boxes in total
1053 |        extra_save is for special suffix of plot (such as comparing NN and RNN)'''
1054 |     results = pd.read_csv(f'Results/{dataname}_many_train_new{extra_save}.csv')
1055 |     results.sort_values('method', inplace=True, ascending=True)
1056 |     results.loc[results.method == 'Ensemble', 'method'] = 'EnbPI'
1057 |     results.loc[results.method == 'Weighted_ICP', 'method'] = 'Weighted ICP'
1058 |     results_1d = pd.read_csv(
1059 |         f'Results/{dataname}_many_train_new_1d{extra_save}.csv')
1060 |     results_1d.sort_values('method', inplace=True, ascending=True)
1061 |     results_1d.loc[results_1d.method == 'Ensemble', 'method'] = 'EnbPI'
1062 |     results_1d.loc[results_1d.method
1063 |                    == 'Weighted_ICP', 'method'] = 'Weighted ICP'
1064 |     if 'Sequential' in np.array(results.muh_fun):
1065 |         results['muh_fun'].replace({'Sequential': 'NeuralNet'}, inplace=True)
1066 |         results_1d['muh_fun'].replace(
1067 |             {'Sequential': 'NeuralNet'}, inplace=True)
1068 |     regrs = np.unique(results.muh_fun)
1069 |     regrs_label = {'RidgeCV': 'Ridge', 'LassoCV': 'Lasso', 'RandomForestRegressor': "RF",
1070 |                    'NeuralNet': "NN", 'RNN': 'RNN', 'GaussianProcessRegressor': 'GP'}
1071 |     # Set up plot
1072 |     ncol = 2  # Compare RNN vs NN
1073 |     if len(regrs) > 2:
1074 |         ncol = 3  # Ridge, RF, NN
1075 |         regrs = ['RidgeCV', 'RandomForestRegressor', 'NeuralNet']
1076 |     if type == 'coverage':
1077 |         f, ax = plt.subplots(2, ncol, figsize=(
1078 |             3 * ncol, 6), sharex=True, sharey=True)
1079 |     else:
1080 |         # all plots in same row share y-axis
1081 |         f, ax = plt.subplots(2, ncol, figsize=(
1082 |             3 * ncol, 6), sharex=True, sharey=True)
1083 |     f.tight_layout(pad=0)
1084 |     # Prepare for plot
1085 |     d = 20
1086 |     results_1d.train_size += d  # for plotting purpose
1087 |     tot_data = math.ceil(max(results.train_size) / 0.278)
1088 |     results['ratio'] = np.round(results.train_size / tot_data, 2)
1089 |     results_1d['ratio'] = np.round(results_1d.train_size / tot_data, 2)
1090 |     j = 0  # column, denote aggregator
1091 |     ratios = np.unique(results['ratio'])
1092 |     # train_size_for_plot = [ratios[2], ratios[4], ratios[6], ratios[9]] # This was for 4 boxplots in one figure
1093 |     train_size_for_plot = ratios
1094 |     for regr in regrs:
1095 |         mtd = ['EnbPI', 'ICP', 'Weighted ICP']
1096 |         mtd_colors = ['red', 'royalblue', 'black']
1097 |         color_dict = dict(zip(mtd, mtd_colors))  # specify colors for each box
1098 |         # Start plotting
1099 |         which_train_idx = [
1100 |             fraction in train_size_for_plot for fraction in results.ratio]
1101 |         which_train_idx_1d = [
1102 |             fraction in train_size_for_plot for fraction in results_1d.ratio]
1103 |         results_plt = results.iloc[which_train_idx, ]
1104 |         results_1d_plt = results_1d.iloc[which_train_idx_1d, ]
1105 |         sns.boxplot(y=type, x='ratio',
1106 |                     data=results_plt[results_plt.muh_fun == regr],
1107 |                     palette=color_dict,
1108 |                     hue='method', ax=ax[0, j], showfliers=False, linewidth=0.4)
1109 |         sns.boxplot(y=type, x='ratio',
1110 |                     data=results_1d_plt[results_1d_plt.muh_fun == regr],
1111 |                     palette=color_dict,
1112 |                     hue='method', ax=ax[1, j], showfliers=False, linewidth=0.4)
1113 |         # # Add text, as the boxes are too things to be distinguishable
1114 |         # for i in range(len(mtd)):
1115 |         #     m = mtd[i]
1116 |         #     for k in [0, 1]:
1117 |         #         ax[k, j].text(i-1, results_plt[(results_plt.muh_fun == regr) & (
1118 |         #             results_plt.method == m)][type].max()+1, m, ha='center', color=mtd_colors[i], fontsize=12)
1119 |         for i in range(2):
1120 |             # if type == 'coverage' and extra_save == '_online':
1121 |             #     ax[i, j].set_ylim([0.8, 0.95])
1122 |             ax[i, j].tick_params(axis='both', which='major', labelsize=14)
1123 |             if type == 'coverage':
1124 |                 ax[i, j].axhline(y=0.9, color='black', linestyle='dashed')
1125 |             # Control legend
1126 |             ax[i, j].get_legend().remove()
1127 |             # Control y and x-label
1128 |             if j == 0:
1129 |                 # Y-label on
1130 |                 ax[0, 0].set_ylabel('Multivariate')
1131 |                 ax[1, 0].set_ylabel('Univariate')
1132 |                 if i == 1:
1133 |                     # X-label on
1134 |                     ax[1, j].set_xlabel(
1135 |                         r'$\%$ of Total Data')
1136 |                 else:
1137 |                     # X-label off
1138 |                     x_axis = ax[i, j].axes.get_xaxis()
1139 |                     x_axis.set_visible(False)
1140 |             else:
1141 |                 y_label = ax[i, j].axes.get_yaxis().get_label()
1142 |                 y_label.set_visible(False)
1143 |                 if type == 'coverage':
1144 |                     # Y-label off
1145 |                     y_axis = ax[i, j].axes.get_yaxis()
1146 |                     y_axis.set_visible(False)
1147 |                 if i == 1:
1148 |                     # X-label on
1149 |                     ax[1, j].set_xlabel(
1150 |                         r'$\%$ of Total Data')
1151 |                 else:
1152 |                     # X-label off
1153 |                     x_axis = ax[i, j].axes.get_xaxis()
1154 |                     x_axis.set_visible(False)
1155 |             # Control Title
1156 |             if i == 0:
1157 |                 ax[0, j].set_title(regrs_label[regr])
1158 |         j += 1
1159 |         # Legend lastly
1160 |     # Assign to top middle
1161 |     # ax[1, 1].legend(loc='upper center',
1162 |     #                 bbox_to_anchor=(0.5, -0.25), ncol=3)
1163 |     plt.legend(loc='upper center',
1164 |                bbox_to_anchor=(-0.15, -0.25), ncol=3)
1165 |     plt.savefig(
1166 |         f'{dataname}_boxplot_{type}{extra_save}.pdf', dpi=300, bbox_inches='tight',
1167 |         pad_inches=0)
1168 | 
1169 | 
1170 | def set_share_axes(axs, target=None, sharex=False, sharey=False):
1171 |     if target is None:
1172 |         target = axs.flat[0]
1173 |     # Manage share using grouper objects
1174 |     for ax in axs.flat:
1175 |         if sharex:
1176 |             target._shared_x_axes.join(target, ax)
1177 |         if sharey:
1178 |             target._shared_y_axes.join(target, ax)
1179 |     # Turn off x tick labels and offset text for all but the bottom row
1180 |     if sharex and axs.ndim > 1:
1181 |         for ax in axs[:-1, :].flat:
1182 |             ax.xaxis.set_tick_params(
1183 |                 which='both', labelbottom=False, labeltop=False)
1184 |             ax.xaxis.offsetText.set_visible(False)
1185 |     # Turn off y tick labels and offset text for all but the left most column
1186 |     if sharey and axs.ndim > 1:
1187 |         for ax in axs[:, 1:].flat:
1188 |             ax.yaxis.set_tick_params(
1189 |                 which='both', labelleft=False, labelright=False)
1190 |             ax.yaxis.offsetText.set_visible(False)
1191 | 
1192 | 
1193 | def grouped_box_new_with_MoreCPMethods(type):
1194 |     font_size = 18
1195 |     label_size = 20
1196 |     results = pd.read_csv(
1197 |         f'Solar_QOOB_Adaptive_JaB_EnbPI_marginal_Multi-variate.csv')
1198 |     results.sort_values('method', inplace=True, ascending=True)
1199 |     results.loc[results.method == 'Ensemble', 'method'] = 'EnbPI'
1200 |     results.loc[results.method == 'JaB', 'method'] = 'J+aB'
1201 |     results_1d = pd.read_csv(
1202 |         f'Solar_QOOB_Adaptive_JaB_EnbPI_marginal_Uni-variate.csv')
1203 |     results_1d.sort_values('method', inplace=True, ascending=True)
1204 |     results_1d.loc[results_1d.method == 'Ensemble', 'method'] = 'EnbPI'
1205 |     results_1d.loc[results_1d.method == 'JaB', 'method'] = 'J+aB'
1206 |     results['muh_fun'].replace(
1207 |         {'RangerForestRegressor': 'RF'}, inplace=True)
1208 |     results['muh_fun'].replace(
1209 |         {'RandomForestRegressor': 'RF'}, inplace=True)
1210 |     results_1d['muh_fun'].replace(
1211 |         {'RangerForestRegressor': 'RF'}, inplace=True)
1212 |     results_1d['muh_fun'].replace(
1213 |         {'RandomForestRegressor': 'RF'}, inplace=True)
1214 |     # Set up plot
1215 |     fig, ax = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
1216 |     # Prepare for plot
1217 |     tot_data = math.ceil(max(results.train_size) / 0.278)
1218 |     results['ratio'] = np.round(results.train_size / tot_data, 2)
1219 |     results_1d['ratio'] = np.round(results.train_size / tot_data, 2)
1220 |     ratios = np.unique(results['ratio'])
1221 |     train_size_for_plot = ratios
1222 |     mtd = [
1223 |         'EnbPI', 'QOOB', 'Adaptive_CI', 'J+aB']
1224 |     mtd_colors = ['red', 'royalblue', 'black', 'orange']
1225 |     color_dict = dict(zip(mtd, mtd_colors))  # specify colors for each box
1226 |     # Start plotting
1227 |     which_train_idx = [
1228 |         fraction in train_size_for_plot for fraction in results.ratio]
1229 |     results_plt = results.iloc[which_train_idx, ]
1230 |     results_1d_plt = results_1d.iloc[which_train_idx, ]
1231 |     sns.boxplot(y=type, x='ratio',
1232 |                 data=results_plt[results_plt.muh_fun == 'RF'],
1233 |                 palette=color_dict,
1234 |                 hue='method', ax=ax[0], showfliers=False, width=1, saturation=1, linewidth=0.4)
1235 |     sns.boxplot(y=type, x='ratio',
1236 |                 data=results_1d_plt[results_1d_plt.muh_fun == 'RF'],
1237 |                 palette=color_dict,
1238 |                 hue='method', ax=ax[1], showfliers=False, width=1, saturation=1, linewidth=0.4)
1239 |     for j in range(2):
1240 |         ax[j].tick_params(axis='both', which='major', labelsize=14)
1241 |         if type == 'coverage':
1242 |             ax[j].axhline(y=0.9, color='black',
1243 |                           linestyle='dashed', linewidth=1)
1244 |         # Control legend
1245 |         ax[j].get_legend().remove()
1246 |         # Control y and x-label
1247 |         ax[0].axes.get_xaxis().set_visible(False)
1248 |         ax[1].set_xlabel(r'$\%$ of Total Data', fontsize=label_size)
1249 |     ax[0].set_ylabel('Multivariate', fontsize=label_size)
1250 |     ax[1].set_ylabel('Univariate', fontsize=label_size)
1251 |     # Control Title
1252 |     ax[0].set_title('RF', fontsize=label_size)
1253 |     plt.tight_layout(pad=0)
1254 |     plt.legend(loc='upper right',
1255 |                bbox_to_anchor=(1, -0.3), ncol=4, fontsize=font_size)
1256 |     return fig
1257 | 
1258 | 
1259 | '''For Conditional Coverage---Preprocessing'''
1260 | 
1261 | 
1262 | def missing_data(data, missing_frac, update=False):
1263 |     n = len(data)
1264 |     idx = np.random.choice(n, size=int(missing_frac * n), replace=False)
1265 |     if update:
1266 |         data = np.delete(data, idx, 0)
1267 |     idx = idx.tolist()
1268 |     return (data, idx)
1269 | 
1270 | 
1271 | def restructure_X_t(darray):
1272 |     '''
1273 |     For each row i after the first row, take i-1 last entries of the first row and then impute the rest
1274 |     Imputation is just generating random N(Y_train_mean, Y_train_std), where
1275 |     Y_train is the first row.
1276 |     '''
1277 |     s = darray.shape[1]
1278 |     copy = np.copy(darray)
1279 |     for i in range(1, min(s, darray.shape[0])):
1280 |         copy[i, :s - i] = copy[0, i:]
1281 |         imputed_val = np.abs(np.random.normal(loc=np.mean(
1282 |             copy[0]), scale=np.std(copy[0]), size=i))
1283 |         copy[i, s - i:] = imputed_val
1284 |     return copy
1285 | 
1286 | 
1287 | def further_preprocess(data, response_name='DHI', suffix=''):
1288 |     '''Extract non-zero hours and also hours between 10AM-2PM (where radiation is high) '''
1289 |     max_recorder = pd.DataFrame(np.zeros(24), index=range(0, 24))
1290 |     for i in range(0, 24):
1291 |         # Check at what times max recording is 0 (meaning no recording yet)
1292 |         # 12:00 AM every day. for every later hour, + i \in \{1,...,23\}
1293 |         time = np.arange(365) * 24 + i
1294 |         max_record = np.max(data[response_name][time])
1295 |         max_recorder.iloc[i] = max_record
1296 |     # Drop these non-zero things
1297 |     data_sub = data.copy()
1298 |     to_be_droped = np.where(max_recorder == 0)[0]
1299 |     print(to_be_droped)
1300 |     drop_idx = []
1301 |     if len(to_be_droped) > 0:
1302 |         for i in to_be_droped:
1303 |             drop_idx.append(np.arange(365) * 24 + i)
1304 |         drop_idx = np.hstack(drop_idx)
1305 |         data_sub.drop(drop_idx, inplace=True)
1306 |     else:
1307 |         data_sub = []
1308 |     # Create near_noon data between 10AM-2PM
1309 |     if suffix == '':
1310 |         to_be_included = np.array([10, 11, 12, 13, 14])
1311 |     if suffix == '_8_9_15_16_17':
1312 |         to_be_included = np.array([8, 9, 15, 16, 17])
1313 |     if suffix == '_10_14':
1314 |         to_be_included = np.array([10, 11, 12, 13, 14])
1315 |     to_be_droped = np.delete(np.arange(24), to_be_included)
1316 |     data_near_noon = data.copy()
1317 |     drop_idx = []
1318 |     for i in to_be_droped:
1319 |         drop_idx.append(np.arange(365) * 24 + i)
1320 |     drop_idx = np.hstack(drop_idx)
1321 |     data_near_noon.drop(drop_idx, inplace=True)
1322 |     return [data_sub, data_near_noon]
1323 | 
1324 | 
1325 | def big_transform_s_beyond_1(sub, cities, current_city, one_dim, missing, miss_frac=0.25):
1326 |     '''Overall, include ALL other cities' data in the CURRENT city being considered.
1327 |        1. Check what data is used (full, sub, or near-noon), need sub, but it is now suppressed.
1328 |        # NOTE, 1 is suppressed for now, since we are uncertain whether sub or near-noon is needed for Californian results
1329 |        2. If missing, process these training and testing data before transform
1330 |        -->> Current city and neighbors are assumed to have DIFFERENT missing fractions.
1331 |        3. Then, if one_dim, transform data (include past), but since s>1, apply *restructure_X_t* to s rows a time'''
1332 |     big_X_train = []
1333 |     big_X_predict = []
1334 |     big_Y_train = []
1335 |     big_Y_predict = []
1336 |     stride_ls = []
1337 |     for city in cities:
1338 |         print(city)
1339 |         # Start 1
1340 |         if 'Solar_Atl' in city:
1341 |             data_full = read_data(3, 'Data/Solar_Atl_data.csv', 10000)
1342 |             suffix = city[9:]
1343 |             _, data = further_preprocess(data_full, suffix=suffix)
1344 |             if suffix == '_10_14':
1345 |                 stride = 5
1346 |             if suffix == '_8_9_15_16_17':
1347 |                 stride = 5
1348 |         else:
1349 |             if city == 'Wind_Austin':
1350 |                 data_full = read_wind_data()
1351 |                 data_sub, data_near_noon = further_preprocess(
1352 |                     data_full, response_name='MWH')
1353 |             else:
1354 |                 data_full = read_CA_data(f'Data/{city}_data.csv')
1355 |                 data_sub, data_near_noon = further_preprocess(data_full)
1356 |             if sub == 0:
1357 |                 data = data_full
1358 |                 stride = 24
1359 |             elif sub == 1:
1360 |                 data = data_sub
1361 |                 stride = int(len(data) / 365)
1362 |             else:
1363 |                 data = data_near_noon
1364 |                 stride = 5
1365 |         train_size = 92 * stride
1366 |         col_name = 'MWH' if city == 'Wind_Austin' else 'DHI'
1367 |         data_x = data.loc[:, data.columns != col_name]
1368 |         data_y = data[col_name]
1369 |         data_x_numpy = data_x.to_numpy()  # Convert to numpy
1370 |         data_y_numpy = data_y.to_numpy()  # Convert to numpy
1371 |         X_train = data_x_numpy[:train_size, :]
1372 |         X_predict = data_x_numpy[train_size:, :]
1373 |         Y_train_del = data_y_numpy[:train_size]
1374 |         Y_predict_del = data_y_numpy[train_size:]
1375 |         # Finish 1
1376 |         # Start 2
1377 |         if missing:
1378 |             X_train, miss_train_idx = missing_data(
1379 |                 X_train, missing_frac=miss_frac, update=True)
1380 |             Y_train_del = np.delete(Y_train_del, miss_train_idx)
1381 |             Y_predict_del, miss_test_idx = missing_data(
1382 |                 Y_predict_del, missing_frac=miss_frac, update=False)
1383 |             if city == current_city:
1384 |                 # Need an additional Y_truth
1385 |                 Y_train = Y_train_del
1386 |                 Y_predict = Y_predict_del.copy()
1387 |                 true_miss_text_idx = miss_test_idx
1388 |             Y_predict_del[miss_test_idx] = np.abs(np.random.normal(loc=np.mean(
1389 |                 Y_train_del), scale=np.std(Y_train_del), size=len(miss_test_idx)))
1390 | 
1391 |         else:
1392 |             true_miss_text_idx = []
1393 |             if city == current_city:
1394 |                 Y_train = Y_train_del
1395 |                 Y_predict = Y_predict_del
1396 |         # Finish 2
1397 |         # Start 3
1398 |         if one_dim:
1399 |             X_train, X_predict, Y_train_del, Y_predict_del = one_dimen_transform(
1400 |                 Y_train_del, Y_predict_del, d=min(stride, 24))  # Note: this handles 'no_slide (stride=infty)' case
1401 |             j = 0
1402 |             for k in range(len(X_predict) // stride + 1):
1403 |                 X_predict[j * k:min((j + 1) * k, len(X_predict))
1404 |                           ] = restructure_X_t(X_predict[j * k:min((j + 1) * k, len(X_predict))])
1405 |                 j += 1
1406 |             big_X_train.append(X_train)
1407 |             big_X_predict.append(X_predict)
1408 |             if city == current_city:
1409 |                 Y_train = Y_train_del
1410 |                 Y_predict = Y_predict_del
1411 |         else:
1412 |             big_X_train.append(X_train)
1413 |             big_X_predict.append(X_predict)
1414 |         # Finish 3
1415 |     X_train = np.hstack(big_X_train)
1416 |     X_predict = np.hstack(big_X_predict)
1417 |     return([X_train, X_predict, Y_train, Y_predict, true_miss_text_idx, stride])
1418 | 
1419 | 
1420 | def all_together(Data_name, sub, no_slide, missing, miss_frac=0.25, one_dim=False, use_EnbPI=True):
1421 |     methods = ['Ensemble'] if use_EnbPI else ['QOOB', 'Adaptive_CI']
1422 |     train_days = 92
1423 |     itrial = 1
1424 |     results_ls = {}
1425 |     alpha = 0.1
1426 |     B = 50  # number of bootstrap samples
1427 |     if 'Solar_Atl' in Data_name:
1428 |         Data_name = ['Solar_Atl_8_9_15_16_17', 'Solar_Atl_10_14']
1429 |     XY_ls = []
1430 |     for data_name in Data_name:
1431 |         np.random.seed(98765)
1432 |         # Note, this is necessary because a model may "remember the past"
1433 |         nnet = keras_mod()
1434 |         if 'Solar_Atl' in data_name:
1435 |             X_train, X_predict, Y_train, Y_predict, miss_test_idx, stride = big_transform_s_beyond_1(
1436 |                 sub, [data_name], data_name, one_dim, missing)
1437 |         else:
1438 |             X_train, X_predict, Y_train, Y_predict, miss_test_idx, stride = big_transform_s_beyond_1(
1439 |                 sub, Data_name, data_name, one_dim, missing)
1440 |         train_size = 92 * stride
1441 |         print(f'At train_size={train_size}')
1442 |         print(f'For {data_name}')
1443 |         if no_slide:
1444 |             stride = int((365 - 92) * stride)  # No slide at all
1445 |         print(f'Stride = {stride}')
1446 |         if use_EnbPI:
1447 |             # Run NN, RNN, RF, and Linear models
1448 |             nnet = keras_mod()
1449 |             min_alpha = 0.0001
1450 |             max_alpha = 10
1451 |             ridge_cv = RidgeCV(alphas=np.linspace(min_alpha, max_alpha, 10))
1452 |             random_forest = RandomForestRegressor(n_estimators=10, criterion='mse',
1453 |                                                   bootstrap=False, max_depth=2, n_jobs=-1)
1454 |             ridge_results = EnbPI.prediction_interval(
1455 |                 ridge_cv,  X_train, X_predict, Y_train, Y_predict)
1456 |             ridge_results.fit_bootstrap_models_online(B, miss_test_idx)
1457 |             rf_results = EnbPI.prediction_interval(
1458 |                 random_forest,  X_train, X_predict, Y_train, Y_predict)
1459 |             rf_results.fit_bootstrap_models_online(B, miss_test_idx)
1460 |             # For CP Methods
1461 |             print(f'regressor is {ridge_cv.__class__.__name__}')
1462 |             result_ridge = ridge_results.run_experiments(
1463 |                 alpha, stride, data_name, itrial, methods=methods, get_plots=True)
1464 |             result_ridge[0]['center'] = ridge_results.Ensemble_pred_interval_centers
1465 |             print(f'regressor is {random_forest.__class__.__name__}')
1466 |             result_rf = rf_results.run_experiments(
1467 |                 alpha, stride, data_name, itrial, methods=methods, get_plots=True)
1468 |             result_rf[0]['center'] = rf_results.Ensemble_pred_interval_centers
1469 |             results_ls[data_name] = [result_ridge, result_rf, stride, Y_train, Y_predict, ridge_results.Ensemble_online_resid[:train_days],
1470 |                                      rf_results.Ensemble_online_resid[:train_days]]
1471 |             # # NN takes a bit too long w/o much benefit so do not use it
1472 |             # print(f'regressor is {nnet.name}')
1473 |             # nn_results = EnbPI.prediction_interval(
1474 |             #     nnet,  X_train, X_predict, Y_train, Y_predict)
1475 |             # nn_results.fit_bootstrap_models_online(B, miss_test_idx)
1476 |             # result_nn = nn_results.run_experiments(
1477 |             #     alpha, stride, data_name, itrial, methods=methods, get_plots=True)
1478 |             # result_nn[0]['center'] = nn_results.Ensemble_pred_interval_centers
1479 |             # results_ls[data_name] = [result_ridge, result_rf, result_nn, stride, Y_train, Y_predict, ridge_results.Ensemble_online_resid[:train_days],
1480 |             #                          rf_results.Ensemble_online_resid[:train_days], nn_results.Ensemble_online_resid[:train_days]]
1481 |         else:
1482 |             # For quantile RF
1483 |             XY_ls.append([X_train, X_predict, Y_train, Y_predict])
1484 |     if use_EnbPI:
1485 |         return results_ls
1486 |     else:
1487 |         return XY_ls
1488 | 
1489 | 
1490 | def small_helper(results_ls):
1491 |     names = list(results_ls.keys())
1492 |     result_ridge_ls = []
1493 |     result_rf_ls = []
1494 |     result_nn_ls = []
1495 |     Y_train_ls = []
1496 |     Y_predict_ls = []
1497 |     stride_ls = []
1498 |     ridge_resid_ls = []
1499 |     rf_resid_ls = []
1500 |     nn_resid_ls = []
1501 |     for data_name in names:
1502 |         # result_ridge, result_rf, result_nn, stride, Y_train, Y_predict, ridge_resid, rf_resid, nn_resid = results_ls[
1503 |         #     data_name]
1504 |         result_ridge, result_rf, stride, Y_train, Y_predict, ridge_resid, rf_resid = results_ls[
1505 |             data_name]
1506 |         result_ridge_ls.append(result_ridge[0])
1507 |         ridge_resid_ls.append(ridge_resid)
1508 |         result_rf_ls.append(result_rf[0])
1509 |         rf_resid_ls.append(rf_resid)
1510 |         # result_nn_ls.append(result_nn[0])
1511 |         # nn_resid_ls.append(nn_resid)
1512 |         Y_train_ls.append(Y_train)
1513 |         Y_predict_ls.append(Y_predict)
1514 |         stride_ls.append(stride)
1515 |     # results_dict = {'Ridge': [result_ridge_ls, ridge_resid_ls], 'RF': [
1516 |     #     result_rf_ls, rf_resid_ls], 'NN': [result_nn_ls, nn_resid_ls]}
1517 |     results_dict = {'Ridge': [result_ridge_ls, ridge_resid_ls], 'RF': [
1518 |         result_rf_ls, rf_resid_ls]}
1519 |     return [results_dict, Y_train_ls, Y_predict_ls, stride_ls]
1520 | 
1521 | 
1522 | '''For Conditional Coverage---Plotting'''
1523 | 
1524 | 
1525 | def PI_on_series_plus_cov_or_not(results, stride, which_hours, which_method, regr_method, Y_predict, no_slide=False, five_in_a_row=True):
1526 |     # Plot PIs on predictions for the particular hour
1527 |     # At most three plots in a row (so that figures look appropriately large)
1528 |     # plt.rcParams.update({'font.size': 18})
1529 |     titlesize = 28
1530 |     plt.rcParams.update({'axes.labelsize': titlesize-2, 'axes.titlesize': titlesize,
1531 |                         'legend.fontsize': titlesize-2, 'xtick.labelsize': titlesize-2, 'ytick.labelsize': titlesize-2})
1532 |     if five_in_a_row:
1533 |         ncol = 5
1534 |     else:
1535 |         ncol = 4
1536 |     nrow = np.ceil(len(which_hours) / ncol).astype(int)
1537 |     if stride == 24 or stride == 14 or stride == 15:
1538 |         # Multi-row
1539 |         fig, ax = plt.subplots(nrow * 2, ncol, figsize=(ncol * 5, nrow * 6), sharex='row',
1540 |                                sharey='row', constrained_layout=True)
1541 |     else:
1542 |         fig, ax = plt.subplots(2, 5, figsize=(6 * 5, 6), sharex='row',
1543 |                                sharey='row', constrained_layout=True)
1544 |     if stride > 24:
1545 |         # Because we focused on near-noon-data
1546 |         n1 = int(results[0].shape[0] / 5)
1547 |     else:
1548 |         n1 = int(results[0].shape[0] / stride)
1549 |     plot_length = 91  # Plot 3 months, April-June
1550 |     method_ls = {'Ensemble': 0, 'ICP': 1, 'WeightedICP': 2}
1551 |     results_by_method = results[method_ls[which_method]]
1552 |     for i in range(len(which_hours)):
1553 |         hour = which_hours[i]
1554 |         if stride > 24:
1555 |             indices_at_hour = np.arange(n1) * 5 + hour
1556 |         else:
1557 |             indices_at_hour = np.arange(n1) * stride + hour
1558 |         to_plot = indices_at_hour[:plot_length]
1559 |         row = (i // ncol) * 2
1560 |         col = np.mod(i, ncol)
1561 |         covered_or_not = []
1562 |         for j in range(n1):
1563 |             if Y_predict[indices_at_hour[j]] >= results_by_method['lower'][indices_at_hour[j]] and Y_predict[indices_at_hour[j]] <= results_by_method['upper'][indices_at_hour[j]]:
1564 |                 covered_or_not.append(1)
1565 |             else:
1566 |                 covered_or_not.append(0)
1567 |         coverage = np.mean(covered_or_not)
1568 |         coverage = np.round(coverage, 2)
1569 |         # Plot PI on data
1570 |         train_size = 92
1571 |         rot_angle = 15
1572 |         x_axis = np.arange(plot_length)
1573 |         if stride == 24 or stride == 14 or stride == 15:
1574 |             current_figure = ax[row, col]
1575 |         else:
1576 |             col = np.mod(i, 5)
1577 |             current_figure = ax[0, col]
1578 |         current_figure.scatter(
1579 |             x_axis, Y_predict[to_plot], marker='.', s=4, color='black')
1580 |         current_figure.plot(
1581 |             x_axis, results_by_method['center'][to_plot], color='red', linewidth=0.7)
1582 |         lower_vals = np.maximum(0, results_by_method['lower'][to_plot])
1583 |         upper_vals = np.maximum(0, results_by_method['upper'][to_plot])
1584 |         current_figure.fill_between(x_axis, lower_vals, upper_vals, alpha=0.3)
1585 |         # current_figure.plot(x_axis, np.maximum(0, results_by_method['upper'][to_plot]))
1586 |         # current_figure.plot(x_axis, np.maximum(0, results_by_method['lower'][to_plot]))
1587 |         # For axis purpose, subtract June
1588 |         xticks = np.linspace(0, plot_length - 30, 3).astype(int)
1589 |         xtick_labels = [calendar.month_name[int(i / 30) + 4]
1590 |                         for i in xticks]  # Get months, start from April
1591 |         current_figure.set_xticks(xticks)
1592 |         current_figure.set_xticklabels(xtick_labels)
1593 |         current_figure.tick_params(axis='x', rotation=rot_angle)
1594 |         # Title
1595 |         if stride == 24:
1596 |             current_figure.set_title(f'At {hour}:00 \n Coverage is {coverage}')
1597 |         elif stride == 5 or no_slide:
1598 |             current_figure.set_title(
1599 |                 f'At {hour+10}:00 \n Coverage is {coverage}')
1600 |         else:
1601 |             if stride == 15:
1602 |                 current_figure.set_title(
1603 |                     f'At {hour+5}:00 \n Coverage is {coverage}')
1604 |             else:
1605 |                 current_figure.set_title(
1606 |                     f'At {hour+6}:00 \n Coverage is {coverage}')
1607 |         # if stride == 14:
1608 |         #     # Sub data`
1609 |         #     current_figure.set_title(f'At {hour+6}:00 \n Coverage is {coverage}')
1610 |         # elif stride == 24:
1611 |         #     # Full data
1612 |         #     current_figure.set_title(f'At {hour}:00 \n Coverage is {coverage}')
1613 |         # else:
1614 |         #     # Near noon data
1615 |         #     current_figure.set_title(f'At {hour+10}:00 \n Coverage is {coverage}')
1616 |         # Plot cover or not over test period
1617 |         x_axis = np.arange(n1)
1618 |         if stride == 24 or stride == 14 or stride == 15:
1619 |             current_figure = ax[row + 1, col]
1620 |         else:
1621 |             col = np.mod(i, 5)
1622 |             current_figure = ax[1, col]
1623 |         current_figure.scatter(x_axis, covered_or_not, marker='.', s=0.4)
1624 |         current_figure.set_ylim([-1, 2])
1625 |         # For axis purpose, subtract December
1626 |         xticks = np.linspace(0, n1 - 31, 3).astype(int)
1627 |         xtick_labels = [calendar.month_name[int(
1628 |             i / 30) + 4] for i in xticks]  # Get months
1629 |         current_figure.set_xticks(xticks)
1630 |         current_figure.set_xticklabels(xtick_labels)
1631 |         current_figure.tick_params(axis='x', rotation=rot_angle)
1632 |         yticks = [0, 1]
1633 |         current_figure.set_yticks(yticks)
1634 |         current_figure.set_yticklabels(['Uncovered', 'Covered'])
1635 |         # xticks = current_figure.get_xticks()  # Actual numbers
1636 |         # xtick_labels = [f'T+{int(i)}' for i in xticks]
1637 |         # current_figure.set_xticklabels(xtick_labels)
1638 |     # if no_slide:
1639 |     #     fig.suptitle(
1640 |     #         f'EnbPI Intervals under {regr_method} without sliding', fontsize=22)
1641 |     # else:
1642 |     #     fig.suptitle(
1643 |     #         f'EnbPI Intervals under {regr_method} with s={stride}', fontsize=22)
1644 |     return fig
1645 | 
1646 | 
1647 | def make_cond_plots(Data_name, results_ls, no_slide, missing, one_d, five_in_a_row=True):
1648 |     for data_name in Data_name:
1649 |         # result_ridge, result_rf, result_nn, stride, Y_predict = results_ls[data_name]
1650 |         # res = [result_ridge, result_rf, result_nn]
1651 |         result_ridge, result_rf, stride, Y_predict = results_ls[data_name]
1652 |         res = [result_ridge, result_rf]
1653 |         if no_slide:
1654 |             which_hours = [0, 1, 2, 3, 4]  # 10AM-2PM
1655 |         else:
1656 |             if stride == 24:
1657 |                 if five_in_a_row:
1658 |                     which_hours = [7, 8, 9, 16, 17, 10, 11, 12, 13, 14]
1659 |                 else:
1660 |                     which_hours = [7, 8, 10, 11, 12, 13, 14, 16, 17]
1661 |             elif stride == 5:
1662 |                 which_hours = [0, 1, 2, 3, 4]
1663 |             else:
1664 |                 if five_in_a_row:
1665 |                     if data_name == 'Solar_Atl':
1666 |                         which_hours = [i - 6 for i in [7, 8,
1667 |                                                        9, 16, 17, 10, 11, 12, 13, 14]]
1668 |                     else:
1669 |                         which_hours = [i - 5 for i in [7, 8,
1670 |                                                        9, 16, 17, 10, 11, 12, 13, 14]]
1671 |                 else:
1672 |                     if data_name == 'Solar_Atl':
1673 |                         # which_hours = [i-6 for i in [7, 8, 10, 11, 12, 13, 14, 16, 17]]
1674 |                         which_hours = [
1675 |                             i - 6 for i in [8, 9, 16, 17, 11, 12, 13, 14]]
1676 |                     else:
1677 |                         # which_hours = [i-5 for i in [7, 8, 10, 11, 12, 13, 14, 16, 17]]
1678 |                         which_hours = [
1679 |                             i - 6 for i in [8, 9, 16, 17, 11, 12, 13, 14]]
1680 |         which_method = 'Ensemble'
1681 |         regr_methods = {0: 'Ridge', 1: 'RF', 2: 'NN'}
1682 |         X_data_type = {True: 'uni', False: 'multi'}
1683 |         Xtype = X_data_type[one_d]
1684 |         slide = '_no_slide' if no_slide else '_daily_slide'
1685 |         Dtype = {24: '_fulldata', 14: '_subdata',
1686 |                  15: '_subdata', 5: '_near_noon_data'}
1687 |         if no_slide:
1688 |             dtype = ''
1689 |         else:
1690 |             dtype = Dtype[stride]
1691 |         miss = '_with_missing' if missing else ''
1692 |         for i in range(len(res)):
1693 |             regr_method = regr_methods[i]
1694 |             fig = PI_on_series_plus_cov_or_not(
1695 |                 res[i], stride, which_hours, which_method, regr_method, Y_predict, no_slide, five_in_a_row)
1696 |             fig.savefig(f'{data_name}_{regr_method}_{Xtype}_PI_on_series_plus_cov_or_not{slide}{dtype}{miss}.pdf', dpi=300, bbox_inches='tight',
1697 |                         pad_inches=0)
1698 | 
1699 | 
1700 | def make_cond_plots_Solar_Atl(results_dict, regr_name, Y_predict_ls, stride_ls, use_EnbPI=True):
1701 |     fig, ax = plt.subplots(4, 4, figsize=(4 * 7, 6 * 2), sharex='row',
1702 |                            sharey='row', constrained_layout=True)
1703 |     titlesize = 28
1704 |     plt.rcParams.update({'axes.labelsize': titlesize, 'axes.titlesize': titlesize,
1705 |                         'legend.fontsize': titlesize, 'xtick.labelsize': titlesize, 'ytick.labelsize': titlesize})
1706 |     if use_EnbPI:
1707 |         results_ls, resid_ls = results_dict[regr_name]
1708 |         zipper = zip(results_ls, Y_predict_ls, stride_ls, resid_ls)
1709 |     else:
1710 |         results_ls = results_dict[regr_name]
1711 |         zipper = zip(results_ls, Y_predict_ls, stride_ls)
1712 |     i = 0
1713 |     row_ix = 0
1714 |     col_ix = 0
1715 |     hour_label = {0: [8, 9, 15, 16], 1: [10, 11, 12, 13]}
1716 |     k = 0
1717 |     cov_width_hour = {}
1718 |     for zip_tmp in zipper:
1719 |         if use_EnbPI:
1720 |             result, Y_predict, stride, resid = zip_tmp
1721 |         else:
1722 |             result, Y_predict, stride = zip_tmp
1723 |         tot_hour = min([stride, 4])
1724 |         n1 = int(Y_predict.shape[0] / stride)
1725 |         for hour in range(tot_hour):
1726 |             if k <= 3:
1727 |                 row = 0
1728 |                 col = k
1729 |             else:
1730 |                 row = 2
1731 |                 col = k - 4
1732 |             k += 1
1733 |             indices_at_hour = np.arange(n1) * tot_hour + hour
1734 |             covered_or_not = []
1735 |             for j in range(n1):
1736 |                 if Y_predict[indices_at_hour[j]] >= result['lower'][indices_at_hour[j]] and Y_predict[indices_at_hour[j]] <= result['upper'][indices_at_hour[j]]:
1737 |                     covered_or_not.append(1)
1738 |                 else:
1739 |                     covered_or_not.append(0)
1740 |             coverage = np.mean(covered_or_not)
1741 |             coverage = np.round(coverage, 2)
1742 |             width = np.round(
1743 |                 np.mean(result['upper'][indices_at_hour]-result['lower'][indices_at_hour]), 2)
1744 |             # Plot
1745 |             current_figure = ax[row, col]
1746 |             plot_length = 92
1747 |             x_axis = np.arange(plot_length)
1748 |             to_plot = indices_at_hour[:plot_length]
1749 |             current_figure.scatter(
1750 |                 x_axis, Y_predict[to_plot], marker='.', s=4, color='black')
1751 |             if use_EnbPI:
1752 |                 current_figure.plot(
1753 |                     x_axis, result['center'][to_plot], color='red', linewidth=0.7)
1754 |             lower_vals = np.maximum(0, result['lower'][to_plot])
1755 |             upper_vals = np.maximum(0, result['upper'][to_plot])
1756 |             current_figure.fill_between(
1757 |                 x_axis, lower_vals, upper_vals, alpha=0.3)
1758 |             xticks = np.linspace(0, plot_length - 30, 3).astype(int)  #
1759 |             xtick_labels = [calendar.month_name[int(i / 30) + 4]
1760 |                             for i in xticks]  # Get months, start from April
1761 |             current_figure.set_xticks(xticks)
1762 |             current_figure.set_xticklabels(xtick_labels)
1763 |             current_figure.tick_params(
1764 |                 axis='x', rotation=15, labelsize=titlesize)
1765 |             current_figure.tick_params(axis='y', labelsize=titlesize)
1766 |             hour_name = hour_label[i][hour]
1767 |             # current_figure.set_title(
1768 |             #     f'At {hour_name+1}:00 \n Coverage={coverage} & Width={width}')
1769 |             current_figure.set_title(
1770 |                 f'At {hour_name+1}:00')
1771 |             cov_width_hour[hour_name+1] = [coverage, width]
1772 |             current_figure = ax[row + 1, col]
1773 |             # # Histogram plot
1774 |             # sns.histplot(resid, bins=15, kde=True, ax=current_figure)
1775 |             # current_figure.axes.get_yaxis().set_visible(False)
1776 |             # current_figure.set_title(r'Histogram of $\{\hat{\epsilon_t}\}_{t=1}^T$')
1777 |             # Moving coverage
1778 |             N = 30  # e.g. average over past 20 days
1779 |             moving_cov = np.convolve(
1780 |                 covered_or_not, np.ones(N) / N, mode='valid')
1781 |             current_figure.plot(moving_cov, color='red',
1782 |                                 label='Sliding Coverage')
1783 |             # For axis purpose, subtract December
1784 |             xticks = np.linspace(0, len(covered_or_not)
1785 |                                  - 31 - N + 1, 3).astype(int)
1786 |             xtick_labels = [calendar.month_name[int(
1787 |                 i / 30) + 4 + N // 30] for i in xticks]  # Get months
1788 |             current_figure.set_xticks(xticks)
1789 |             current_figure.set_xticklabels(xtick_labels)
1790 |             current_figure.tick_params(
1791 |                 axis='x', rotation=15, labelsize=titlesize)
1792 |             current_figure.tick_params(axis='y', labelsize=titlesize)
1793 |             current_figure.axhline(
1794 |                 0.9, color='black', linewidth=3, linestyle='--')
1795 |         i += 1
1796 |     if use_EnbPI:
1797 |         fig2, ax = plt.subplots(2, 1, figsize=(7, 6 * 2))
1798 |         # Histogram plot
1799 |         sns.histplot(resid_ls[0], bins=15, kde=True, ax=ax[0])
1800 |         ax[0].axes.get_yaxis().set_visible(False)
1801 |         # ax[0].set_title(r'Histogram of $\{\hat{\epsilon_t}\}_{t=1}^T$')
1802 |         sns.histplot(resid_ls[1], bins=15, kde=True, ax=ax[1])
1803 |         ax[1].axes.get_yaxis().set_visible(False)
1804 |         fig2.tight_layout(pad=2)
1805 |         return [fig, fig2]
1806 |     else:
1807 |         return [fig, cov_width_hour]
1808 | 
1809 | 
1810 | '''Other helpers'''
1811 | 
1812 | 
1813 | def ave_cov_width(df, Y):
1814 |     coverage_res = ((np.array(df['lower']) <= Y) & (
1815 |         np.array(df['upper']) >= Y)).mean()
1816 |     print(f'Average Coverage is {coverage_res}')
1817 |     width_res = (df['upper'] - df['lower']).mean()
1818 |     print(f'Average Width is {width_res}')
1819 |     return [coverage_res, width_res]
1820 | 
1821 | 
1822 | def adjust_alpha_t(alpha_t, alpha, errs, gamma=0.005, method='simple'):
1823 |     if method == 'simple':
1824 |         # Eq. (2) of Adaptive CI
1825 |         return alpha_t+gamma*(alpha-errs[-1])
1826 |     else:
1827 |         # Eq. (3) of Adaptive CI with particular w_s as given
1828 |         t = len(errs)
1829 |         errs = np.array(errs)
1830 |         w_s_ls = np.array([0.95**(t-i) for i in range(t)]
1831 |                           )  # Furtherest to Most recent
1832 |         return alpha_t+gamma*(alpha-w_s_ls.dot(errs))
1833 | 


--------------------------------------------------------------------------------
/utils_SPCI.py:
--------------------------------------------------------------------------------
  1 | # from statsmodels.regression.quantile_regression import QuantReg
  2 | # from sklearn.linear_model import QuantileRegressor
  3 | import numpy as np
  4 | import math
  5 | import pandas as pd
  6 | 
  7 | 
  8 | #### From utils_EnbPI ####
  9 | def adjust_alpha_t(alpha_t, alpha, errs, gamma=0.005, method='simple'):
 10 |     if method == 'simple':
 11 |         # Eq. (2) of Adaptive CI
 12 |         return alpha_t+gamma*(alpha-errs[-1])
 13 |     else:
 14 |         # Eq. (3) of Adaptive CI with particular w_s as given
 15 |         t = len(errs)
 16 |         errs = np.array(errs)
 17 |         w_s_ls = np.array([0.95**(t-i) for i in range(t)]
 18 |                           )  # Furtherest to Most recent
 19 |         return alpha_t+gamma*(alpha-w_s_ls.dot(errs))
 20 | 
 21 | 
 22 | def ave_cov_width(df, Y):
 23 |     coverage_res = ((np.array(df['lower']) <= Y) & (
 24 |         np.array(df['upper']) >= Y)).mean()
 25 |     print(f'Average Coverage is {coverage_res}')
 26 |     width_res = (df['upper'] - df['lower']).mean()
 27 |     print(f'Average Width is {width_res}')
 28 |     return [coverage_res, width_res]
 29 | 
 30 | #### Miscellaneous ####
 31 | 
 32 | 
 33 | window_size = 300
 34 | 
 35 | 
 36 | def rolling_avg(x, window=window_size):
 37 |     return np.convolve(x, np.ones(window)/window)[(window-1):-window]
 38 | 
 39 | 
 40 | def dict_to_latex(dict, train_ls):
 41 |     DF = pd.DataFrame.from_dict(np.vstack(dict.values()))
 42 |     keys = list(dict.keys())
 43 |     index = np.array([[f'{key} coverage', f'{key} width']
 44 |                      for key in keys]).flatten()
 45 |     DF.index = index
 46 |     DF.columns = train_ls
 47 |     print(DF)
 48 |     print(DF.round(2).to_latex())
 49 | 
 50 | 
 51 | def make_NP_df(X, Y):
 52 |     Xnames = [f'X{a}' for a in np.arange(X.shape[1]).astype(str)]
 53 |     full_names = ['ds']+Xnames+['y']
 54 |     date_tmp = pd.date_range(
 55 |         start='1/1/2018', periods=len(Y)).astype(str)  # Artificial
 56 |     df_tmp = pd.DataFrame(np.c_[date_tmp, X, Y], columns=full_names)
 57 |     return df_tmp, Xnames
 58 | 
 59 | 
 60 | def generate_bootstrap_samples(n, m, B):
 61 |     '''
 62 |       Return: B-by-m matrix, where row b gives the indices for b-th bootstrap sample
 63 |     '''
 64 |     samples_idx = np.zeros((B, m), dtype=int)
 65 |     for b in range(B):
 66 |         sample_idx = np.random.choice(n, m)
 67 |         samples_idx[b, :] = sample_idx
 68 |     return(samples_idx)
 69 | 
 70 | 
 71 | def strided_app(a, L, S):  # Window len = L, Stride len/stepsize = S
 72 |     nrows = ((a.size - L) // S) + 1
 73 |     n = a.strides[0]
 74 |     return np.lib.stride_tricks.as_strided(a, shape=(nrows, L), strides=(S * n, n))
 75 | 
 76 | 
 77 | def binning(past_resid, alpha):
 78 |     '''
 79 |     Input:
 80 |         past residuals: evident
 81 |         alpha: signifance level
 82 |     Output:
 83 |         beta_hat_bin as argmin of the difference
 84 |     Description:
 85 |         Compute the beta^hat_bin from past_resid, by breaking [0,alpha] into bins (like 20). It is enough for small alpha
 86 |         number of bins are determined rather automatic, relative the size of whole domain
 87 |     '''
 88 |     bins = 5  # For computation, can just reduce it to like 10 or 5 in real data
 89 |     beta_ls = np.linspace(start=0, stop=alpha, num=bins)
 90 |     width = np.zeros(bins)
 91 |     for i in range(bins):
 92 |         width[i] = np.percentile(past_resid, math.ceil(100 * (1 - alpha + beta_ls[i]))) - \
 93 |             np.percentile(past_resid, math.ceil(100 * beta_ls[i]))
 94 |     i_star = np.argmin(width)
 95 |     return beta_ls[i_star]
 96 | 
 97 | 
 98 | def binning_use_RF_quantile_regr(quantile_regr, Xtrain, Ytrain, feature, beta_ls, sample_weight=None):
 99 |     # API ref: https://sklearn-quantile.readthedocs.io/en/latest/generated/sklearn_quantile.RandomForestQuantileRegressor.html
100 |     feature = feature.reshape(1, -1)
101 |     low_high_pred = quantile_regr.fit(Xtrain, Ytrain,sample_weight).predict(feature)
102 |     num_mid = int(len(low_high_pred)/2)
103 |     low_pred, high_pred = low_high_pred[:num_mid], low_high_pred[num_mid:]
104 |     width = (high_pred-low_pred).flatten()
105 |     i_star = np.argmin(width)
106 |     wid_left, wid_right = low_pred[i_star], high_pred[i_star]
107 |     return i_star, beta_ls[i_star], wid_left, wid_right
108 | 
109 | 
110 | def merge_table_mean_std(table_result, colnames=None):
111 |     M, N = table_result.shape[0], int(table_result.shape[1]/2)
112 |     table = np.zeros((M, N), dtype=object)
113 |     idx = table_result.index
114 |     for i in range(M):
115 |         for j in range(N):
116 |             table[i,
117 |                   j] = f'{table_result.iloc[i,2*j]} ({table_result.iloc[i,2*j+1]})'
118 |     colnames = np.array([[f'{name} coverage', f'{name} width']
119 |                         for name in colnames]).flatten()
120 |     return pd.DataFrame(table, index=idx, columns=colnames)
121 | 
122 | 
123 | # def binning_use_linear_quantile_regr(residX, residY, alpha):
124 | #     # bins = 5
125 | #     # beta_ls = np.linspace(start=1e-5, stop=alpha-1e-5, num=bins)
126 | #     bins = 1
127 | #     beta_ls = [alpha/2]  # No search, as this is too slow.
128 | #     width = np.zeros(bins)
129 | #     width_left = np.zeros(bins)
130 | #     width_right = np.zeros(bins)
131 | #     for i in range(bins):
132 | #         feature = residX[-1]
133 | #         '''
134 | #             Sklearn class
135 | #             See scipy: https://docs.scipy.org/doc/scipy/reference/optimize.linprog-interior-point.html#optimize-linprog-interior-point
136 | #             for a list of option. "solver_options" are given as "options" therein
137 | #
138 | #             NOTE, we CANNOT afford many iterations, as this is VERY COSTLY (about 4 sec per point for this loop below even for 10 iterations...)
139 | #             Even just 1 iter, stll like 2 sec
140 | #
141 | #             See sklearn for which solver to use:
142 | #             https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.QuantileRegressor.html#sklearn.linear_model.QuantileRegressor
143 | #
144 | #             BUT solver = 'highs' is claimed to be fast but actually does not work
145 | #         '''
146 | #         solver = 'interior-point'
147 | #         sol_options = {'maxiter': 10}
148 | #         reg_low = QuantileRegressor(
149 | #             quantile=beta_ls[i], solver=solver, solver_options=sol_options)
150 | #         reg_high = QuantileRegressor(
151 | #             quantile=1 - alpha + beta_ls[i], solver=solver, solver_options=sol_options)
152 | #         reg_low.fit(residX[:-1], residY)
153 | #         reg_high.fit(residX[:-1], residY)
154 | #         width_left[i] = reg_low.predict(feature.reshape(1, -1))
155 | #         width_right[i] = reg_high.predict(feature.reshape(1, -1))
156 | #         # ############################
157 | #         # # Statsmodel class
158 | #         # '''
159 | #         #     https://www.statsmodels.org/dev/generated/statsmodels.regression.quantile_regression.QuantReg.html?highlight=quantreg
160 | #         #     Actually, still not fast....
161 | #         #     Hence, removed this "Optimizer width", but width can then be wider than necessary
162 | #         # '''
163 | #         # mod = QuantReg(residY, residX[:-1], max_iter=1)
164 | #         # reg_low = mod.fit(q=beta_ls[i])
165 | #         # reg_high = mod.fit(q=1-alpha+beta_ls[i])
166 | #         # width_left[i] = mod.predict(reg_low.params, feature)
167 | #         # width_right[i] = mod.predict(reg_high.params, feature)
168 | #         width[i] = width_right[i] - width_left[i]
169 | #     i_star = np.argmin(width)
170 | #     return width_left[i_star], width_right[i_star]
171 | 
172 | 
173 | #######
174 | 


--------------------------------------------------------------------------------
/visualize.py:
--------------------------------------------------------------------------------
  1 | import utils_SPCI as utils
  2 | import calendar
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | from statsmodels.tsa.stattools import pacf
  6 | import numpy as np
  7 | import data
  8 | import pandas as pd
  9 | import warnings
 10 | import torch
 11 | import pickle
 12 | import pdb
 13 | from sklearn.ensemble import RandomForestRegressor
 14 | import SPCI_class as SPCI
 15 | warnings.filterwarnings("ignore")
 16 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 17 | titlesize = 22
 18 | plt.rcParams.update({'axes.labelsize': titlesize, 'axes.titlesize': titlesize,
 19 |                     'legend.fontsize': titlesize, 'xtick.labelsize': titlesize, 'ytick.labelsize': titlesize})
 20 | 
 21 | 
 22 | def detach_torch(input):
 23 |     return input.cpu().detach().numpy()
 24 | 
 25 | 
 26 | def CI_on_Ytest(results_EnbPI_SPCI, Ytest, train_size, mtd='SPCI', dataname='solar'):
 27 |     if mtd == 'SPCI':
 28 |         PIs = results_EnbPI_SPCI.PIs_SPCI
 29 |     else:
 30 |         PIs = results_EnbPI_SPCI.PIs_EnbPI
 31 |     stride = results_EnbPI_SPCI.stride
 32 |     stride_save = '' if stride == 1 else f'_{stride}'
 33 |     fig, ax = plt.subplots(figsize=(10, 3))
 34 |     xaxes = range(train_size, train_size + len(Ytest))
 35 |     ax.scatter(xaxes, Ytest, color='black', s=3)
 36 |     ax.fill_between(xaxes, PIs['upper'],
 37 |                     PIs['lower'], alpha=0.25, color='blue')
 38 |     width = (np.array(PIs['upper']) - np.array(PIs['lower'])).mean()
 39 |     cov = ((np.array(PIs['lower']) <= Ytest) & (
 40 |         np.array(PIs['upper']) >= Ytest)).mean()
 41 |     ax.set_xlabel('Prediction Time Index')
 42 |     ax.set_title(mtd + r' $C_{\alpha}(X_t)$ around $Y$'
 43 |                  + f', coverage {cov:.2f}, width {width:.2f}')
 44 |     fig.savefig(f'{mtd}_Interval_on_Ytest{stride_save}_{dataname}.png', dpi=300,
 45 |                 bbox_inches='tight',
 46 |                 pad_inches=0)
 47 |     return fig
 48 | 
 49 | 
 50 | def plot_burn_in(PIs_ls, Ytest, window_size, savename, use_NeuralProphet=False):
 51 |     window_size_dict = {'electric': 100, 'solar': 100, 'window': 50}
 52 |     window_size = window_size_dict['solar']
 53 |     PIs_EnbPI, PIs_SPCI, PIs_AdaptiveCI, PI_nexCP_WLS = PIs_ls[:4]
 54 |     if use_NeuralProphet:
 55 |         PIs_SPCINeuralProphet = PIs_ls[-1]
 56 |     fig, ax = plt.subplots(figsize=(12, 5))
 57 |     first = (np.array(PIs_EnbPI['upper'])
 58 |              - np.array(PIs_EnbPI['lower']))[:window_size]
 59 |     first_cov = ((np.array(PIs_EnbPI['lower']) <= Ytest)
 60 |                  & (np.array(PIs_EnbPI['upper']) >= Ytest))[:window_size]
 61 |     ax.plot(
 62 |         first, label=f'EnbPI: {first.mean():.2f} & {first_cov.mean():.2f}', color='black')
 63 |     second = (np.array(PIs_SPCI['upper'])
 64 |               - np.array(PIs_SPCI['lower']))[:window_size]
 65 |     second_cov = ((np.array(PIs_SPCI['lower']) <= Ytest)
 66 |                   & (np.array(PIs_SPCI['upper']) >= Ytest))[:window_size]
 67 |     ax.plot(
 68 |         second, label=f'SPCI: {second.mean():.2f} & {second_cov.mean():.2f}', color='orange')
 69 |     if use_NeuralProphet:
 70 |         second_NP = (np.array(PIs_SPCINeuralProphet['upper'])
 71 |                      - np.array(PIs_SPCINeuralProphet['lower']))[:window_size]
 72 |         second_NP_cov = ((np.array(PIs_SPCINeuralProphet['lower']) <= Ytest)
 73 |                          & (np.array(PIs_SPCINeuralProphet['upper']) >= Ytest))[:window_size]
 74 |         ax.plot(
 75 |             second_NP, label=f'SPCI-NeuralProphet: {second_NP.mean():.2f} & {second_NP_cov.mean():.2f}', color='yellow')
 76 |     third = (np.array(PIs_AdaptiveCI['upper'])
 77 |              - np.array(PIs_AdaptiveCI['lower']))[:window_size]
 78 |     third_cov = ((np.array(PIs_AdaptiveCI['lower']) <= Ytest)
 79 |                  & (np.array(PIs_AdaptiveCI['upper']) >= Ytest))[:window_size]
 80 |     ax.plot(
 81 |         third, label=f'AdaptiveCI: {third.mean():.2f} & {third_cov.mean():.2f}', color='gray', linewidth=0.75)
 82 |     fourth = (np.array(PI_nexCP_WLS[:, 1])
 83 |               - np.array(PI_nexCP_WLS[:, 0]))[:window_size]
 84 |     fourth_cov = ((PI_nexCP_WLS[:, 0] <= Ytest)
 85 |                   & (PI_nexCP_WLS[:, 1] >= Ytest))[:window_size]
 86 |     ax.plot(
 87 |         fourth, label=f'Nex-CP WLS: {fourth.mean():.2f} & {fourth_cov.mean():.2f}', color='magenta')
 88 |     ax.set_xlabel('Burn-in Period')
 89 |     ax.set_ylabel('Width')
 90 |     # ax.legend(title='Method: Ave Width in burn-in', title_fontsize=17,
 91 |     #           loc='upper center', ncol=1, bbox_to_anchor=(1.4, 0.45))
 92 |     ax.legend(title='Method: Ave Width & Coverage in burn-in', title_fontsize=22,
 93 |               loc='lower center', ncol=2, bbox_to_anchor=(0.475, -0.63))
 94 |     plt.savefig(f'Brun_in_plot_{savename}.png', dpi=300,
 95 |                 bbox_inches='tight',
 96 |                 pad_inches=0)
 97 | 
 98 | 
 99 | wind_loc = 0
100 | 
101 | 
102 | def plot_rolling(alpha, train_frac, non_stat_solar=True, dsets=['wind', 'solar', 'electric']):
103 |     if 'simulate' in dsets[0]:
104 |         make_plot = False
105 |         methods = ['SPCI', 'EnbPI']
106 |     else:
107 |         make_plot = True
108 |         methods = ['SPCI', 'EnbPI', 'AdaptiveCI', 'NEXCP']
109 |         colors = ['black', 'orange', 'blue', 'magenta']
110 |     window_size_dict = {'electric': 100, 'solar': 100, 'wind': 50}
111 |     full_cov_width_table = np.zeros(
112 |         (len(methods), len(dsets) * 2 * 2), dtype=object)
113 |     for i, data_name in enumerate(dsets):
114 |         if make_plot:
115 |             window_size = window_size_dict[data_name]
116 |             fig, ax = plt.subplots(1, 2, figsize=(20, 4), sharex=True)
117 |         for j, name in enumerate(methods):
118 |             print(f'{name} on {data_name}')
119 |             if make_plot:
120 |                 dloader = data.real_data_loader()
121 |                 univariate, filter_zero, non_stat_solar = False, False, True
122 |                 solar_args = [univariate, filter_zero, non_stat_solar]
123 |                 wind_args = [wind_loc]
124 |                 X_full, Y_full = dloader.get_data(
125 |                     data_name, solar_args, wind_args)
126 |             else:
127 |                 simul_name_dict = {1: 'simulation_state_space',
128 |                                    2: 'simulate_nonstationary', 3: 'simulate_heteroskedastic'}
129 |                 simul_type = 2+i
130 |                 data_name = simul_name_dict[simul_type]
131 |                 simul_loader = data.simulate_data_loader()
132 |                 Data_dict = simul_loader.get_simul_data(simul_type)
133 |                 X_full, Y_full = Data_dict['X'].to(
134 |                     device), Data_dict['Y'].to(device)
135 |                 X_full, Y_full = detach_torch(X_full), detach_torch(Y_full)
136 |             N = len(Y_full)
137 |             N0 = int(train_frac * N)
138 |             Y_test = Y_full[N0:]
139 |             with open(f'{name}_{data_name}_train_frac_{np.round(train_frac,2)}_alpha_{alpha}.p', 'rb') as fp:
140 |                 dict_rolling = pickle.load(fp)
141 |             num_trials = len(dict_rolling.keys())
142 |             cov_ls, width_ls = [], []
143 |             for itrial in range(num_trials):
144 |                 PI = dict_rolling[f'Itrial{itrial}']
145 |                 cov_stat = ((np.array(PI['lower']) <= Y_test)
146 |                             & (np.array(PI['upper']) >= Y_test))
147 |                 width_stat = ((np.array(PI['upper']) - np.array(PI['lower'])))
148 |                 cov_ls.append(cov_stat)
149 |                 width_ls.append(width_stat)
150 |             covs = [np.mean(c) for c in cov_ls]
151 |             widths = [np.mean(w) for w in width_ls]
152 |             full_cov_width_table[j, i * 4] = f'{np.mean(covs):.2f}'
153 |             full_cov_width_table[j, i * 4 + 1] = f'{np.std(covs):.2e}'
154 |             full_cov_width_table[j, i * 4 + 2] = f'{np.mean(widths):.2f}'
155 |             full_cov_width_table[j, i * 4 + 3] = f'{np.std(widths):.2e}'
156 |             if make_plot:
157 |                 cov_rolling = [utils.rolling_avg(
158 |                     cov, window=window_size) for cov in cov_ls]
159 |                 cov_rolling_mean, cov_rolling_std = np.mean(
160 |                     cov_rolling, 0), np.std(cov_rolling, 0)
161 |                 width_rolling = [utils.rolling_avg(
162 |                     width, window=window_size) for width in width_ls]
163 |                 width_rolling_mean, width_rolling_std = np.mean(
164 |                     width_rolling, 0), np.std(width_rolling, 0)
165 |                 # Plot
166 |                 if j == 0:
167 |                     ax[0].axhline(y=1 - alpha, linestyle='--', color='gray')
168 |                 xaxis = np.arange(N0 + window_size, N)
169 |                 ax[0].plot(xaxis, cov_rolling_mean,
170 |                            color=colors[j], label=name)
171 |                 ax[0].fill_between(xaxis, cov_rolling_mean - cov_rolling_std,
172 |                                    cov_rolling_mean + cov_rolling_std, color=colors[j], alpha=0.3)
173 |                 ax[1].plot(xaxis, width_rolling_mean, color=colors[j])
174 |                 ax[1].fill_between(xaxis, width_rolling_mean - width_rolling_std,
175 |                                    width_rolling_mean + width_rolling_std, color=colors[j], alpha=0.3)
176 |         if make_plot:
177 |             ax[0].set_xlabel('Data index')
178 |             ax[0].set_ylim([1 - 4 * alpha, 1])
179 |             ax[0].set_ylabel('Rolling coverage')
180 |             ax[0].legend(ncol=2, loc='lower center')
181 |             ax[1].set_ylabel('Rolling width')
182 |             ax[1].set_xlabel('Data index')
183 |             fig.tight_layout()
184 |             plt.savefig(f'Rolling_comparison_{data_name}.png', dpi=300,
185 |                         bbox_inches='tight',
186 |                         pad_inches=0)
187 |             plt.show()
188 |             plt.close()
189 |     dsets = np.array([[f'{dname} cov mean', f'{dname} cov std',
190 |                        f'{dname} width mean', f'{dname} width std']
191 |                       for dname in dsets]).flatten()
192 |     full_cov_width_table = pd.DataFrame(
193 |         full_cov_width_table, index=methods, columns=dsets)
194 |     return full_cov_width_table
195 | 
196 | 
197 | def residual_histogram_pacf_cond_cov():
198 |     # NOTE: one major difference from before is that the training data ONLY come from certain hours
199 |     dloader = data.real_data_loader()
200 |     Y_full, X_full, _ = dloader.get_non_stationary_solar(
201 |         univariate=False, max_N=8760)
202 |     idx_choose = []
203 |     hours = [7, 8, 15, 16, 17]  # Actual hours - 1
204 |     stride = len(hours)
205 |     for h in hours:
206 |         idx = np.arange(365) * 24 + h
207 |         idx_choose.append(idx)
208 |     Y_full = Y_full[np.concatenate(idx_choose)]
209 |     X_full = X_full[np.concatenate(idx_choose)]
210 |     Y_full, X_full = torch.from_numpy(Y_full).float().to(
211 |         device), torch.from_numpy(X_full).float().to(device)
212 |     train_length = 183
213 |     train_frac = train_length / 365
214 |     N = int(X_full.shape[0] * train_frac)
215 |     X_train, X_predict, Y_train, Y_predict = X_full[:
216 |                                                     N], X_full[N:], Y_full[:N], Y_full[N:]
217 |     fit_func = RandomForestRegressor(n_estimators=20, criterion='mse',
218 |                                      bootstrap=False, max_depth=2, n_jobs=-1)
219 |     EnbPI = SPCI.SPCI_and_EnbPI(
220 |         X_train, X_predict, Y_train, Y_predict, fit_func=fit_func)
221 |     EnbPI.fit_bootstrap_models_online(B=50, fit_sigmaX=False)
222 |     # Plot residual and pacf
223 |     plot_resid_and_pacf(EnbPI)
224 |     for use_SPCI in [False, True]:
225 |         mtd = 'SPCI' if use_SPCI else 'EnbPI'
226 |         print(f'################ Using {mtd} ################')
227 |         alpha = 0.1
228 |         smallT = not use_SPCI
229 |         past_window = 300
230 |         EnbPI.compute_PIs_Ensemble_online(
231 |             alpha, smallT=smallT, past_window=past_window, stride=stride, use_SPCI=use_SPCI)
232 |         # Plot cond coverage
233 |         titles = ['8', '9', '16', '17']
234 |         fig, ax = plt.subplots(1, 4, figsize=(4 * 8, 4), sharex=True,
235 |                                sharey=True, constrained_layout=True)
236 |         PIs = EnbPI.PIs_Ensemble
237 |         Y_pred = EnbPI.Ensemble_pred_interval_centers.cpu().numpy()
238 |         Y_true = EnbPI.Y_predict.cpu().numpy()
239 |         titlesize = 28
240 |         for h in range(4):
241 |             current_figure = ax[h]
242 |             plot_length = 365 - train_length
243 |             idx = np.arange(plot_length) * len(hours) + h
244 |             PIs_h = PIs.iloc[idx, :]
245 |             Y_pred_h = Y_pred[idx]
246 |             Y_true_h = Y_true[idx]
247 |             mean_cov = ((Y_true_h >= PIs_h['lower']) & (
248 |                 Y_true_h <= PIs_h['upper'])).mean()
249 |             mean_width = (PIs_h['upper'] - PIs_h['lower']).mean()
250 |             mean_cov, mean_width = np.round(
251 |                 mean_cov, 2), np.round(mean_width, 2)
252 |             x_axis = np.arange(plot_length)
253 |             current_figure.plot(Y_pred_h, color='red', linewidth=0.7)
254 |             current_figure.scatter(
255 |                 x_axis, Y_true_h, marker='.', s=4, color='black')
256 |             xticks = np.linspace(0, plot_length, 3).astype(int)  #
257 |             xtick_labels = [calendar.month_name[int(i / 31) + int(train_length / 30) + 1]
258 |                             for i in xticks]  # Get months, start from April
259 |             current_figure.set_xticks(xticks)
260 |             current_figure.set_xticklabels(xtick_labels, fontsize=titlesize)
261 |             current_figure.set_title(
262 |                 f'At {titles[h]}:00 \n Coverage: {mean_cov} & Width: {mean_width}')
263 |             current_figure.tick_params(
264 |                 axis='x', rotation=15, labelsize=titlesize)
265 |             lower_vals = np.maximum(0, PIs_h['lower']).to_numpy()
266 |             upper_vals = np.maximum(0, PIs_h['upper']).to_numpy()
267 |             current_figure.fill_between(
268 |                 x_axis, lower_vals, upper_vals, alpha=0.3)
269 |         cq = 'CondQuantile' if use_SPCI else 'NoCondQuantile'
270 |         plt.savefig(f'Cond_coverage_{cq}.png', dpi=300,
271 |                     bbox_inches='tight',
272 |                     pad_inches=0)
273 |         plt.show()
274 |         plt.close()
275 |         plt.close()
276 | 
277 | 
278 | def plot_resid_and_pacf(EnbPI):
279 |     # Plot residual and pacf given trained model
280 |     fig, ax = plt.subplots(1, 2, figsize=(10, 4))
281 |     N = len(EnbPI.X_train)
282 |     resid_rest = EnbPI.Ensemble_online_resid[:N]
283 |     low, up = np.percentile(
284 |         resid_rest, 4), np.percentile(resid_rest, 95)
285 |     resid_rest = resid_rest[(resid_rest >= low) & (
286 |         resid_rest < up)]
287 |     sns.histplot(resid_rest, bins=15, kde=True, ax=ax[0])
288 |     ax[0].set_xticks([int(resid_rest.min()), 0, int(resid_rest.max())])
289 |     ax[0].set_title(
290 |         r'Histogram of $\{\hat{\epsilon}_t\}_{t=1}^T$', fontsize=24)
291 |     ax[0].set_ylabel('')
292 |     ax[0].yaxis.set_ticks([])
293 |     ax[1].plot(pacf(EnbPI.Ensemble_online_resid),
294 |                marker='o', markersize=4)
295 |     ax[1].set_title("PACF", fontsize=24)
296 |     ax[1].grid()
297 |     plt.savefig('Resid_histogram_and_PACF.png', dpi=300,
298 |                 bbox_inches='tight',
299 |                 pad_inches=0)
300 |     plt.show()
301 |     plt.close()
302 | 
303 | ##################
304 | 


--------------------------------------------------------------------------------