├── Chapter6-Case-Study ├── SL-10 │ ├── cf12.xls │ ├── fwc10.xls │ ├── fwe10.xls │ ├── rl10.xls │ ├── normal1.xls │ ├── normal cf.xls │ ├── normal r.xls │ ├── Condenser_Fouling_Fault_Data.xlsx │ ├── Refrigerant_Leak_Fault_Data.xlsx │ ├── Reduced_Condenser_Water_Flow_Fault_Data.xlsx │ ├── Reduced_Evaporator_Water_Flow_Fault_Data.xlsx │ ├── Refrigerant_Leak_RF_Dynamic_Threshold.ipynb │ ├── Condenser_Foul_SVM_Dynamic_Threshold.ipynb │ └── Condenser_Foul_RF_Dynamic_Threshold.ipynb ├── SL-20 │ ├── cf20.xls │ ├── fwc20.xls │ ├── fwe20.xls │ ├── rl20.xls │ ├── normal1.xls │ ├── normal cf.xls │ ├── normal r.xls │ ├── Condenser_Fouling_Fault_Data.xlsx │ ├── Refrigerant_Leak_Fault_Data.xlsx │ ├── Reduced_Condenser_Water_Flow_Fault_Data.xlsx │ ├── Reduced_Evaporator_Water_Flow_Fault_Data.xlsx │ ├── Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb │ ├── Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb │ ├── Refrigerant_Leak_RF_Dynamic_Threshold.ipynb │ └── Condenser_Foul_SVM_Dynamic_Threshold.ipynb └── SL-30 │ ├── cf30.xls │ ├── fwc30.xls │ ├── fwe30.xls │ ├── rl30.xls │ ├── normal1.xls │ ├── normal cf.xls │ ├── normal r.xls │ ├── Condenser_Fouling_Fault_Data.xlsx │ ├── Refrigerant_Leak_Fault_Data.xlsx │ ├── Reduced_Condenser_Water_Flow_Fault_Data.xlsx │ ├── Reduced_Evaporator_Water_Flow_Fault_Data.xlsx │ ├── Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb │ ├── Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb │ ├── Refrigerant_Leak_RF_Dynamic_Threshold.ipynb │ └── Condenser_Foul_SVM_Dynamic_Threshold.ipynb ├── Chapter4-Detection-of-Faults ├── Climate_Data.xls ├── EnergyData_D1.xlsx ├── EnergyData_D2.xlsx ├── EnergyData_D3.xlsx └── EnergyData_D4.xlsx ├── Chapter5-Threshold-Comparison ├── Climate_Data.xls ├── EnergyData_D1.xlsx ├── EnergyData_D2.xlsx ├── EnergyData_D3.xlsx └── ~$Feature_Scores.xlsx ├── Chapter5-EnergyModel-Comparison ├── Climate_Data.xls ├── EnergyData_D1.xlsx ├── EnergyData_D2.xlsx ├── EnergyData_D3.xlsx ├── Energy_Modeling_ERF_D1.ipynb ├── Energy_Modeling_ERF_D2.ipynb ├── Energy_Modeling_ERF_D3.ipynb ├── Energy_Modeling_RF_D1.ipynb ├── Energy_Modeling_RF_D2.ipynb ├── Energy_Modeling_RF_D3.ipynb ├── Energy_Modeling_SVM_D1.ipynb ├── Energy_Modeling_SVM_D2.ipynb └── Energy_Modeling_SVM_D3.ipynb └── README.md /Chapter6-Case-Study/SL-10/cf12.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/cf12.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/fwc10.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/fwc10.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/fwe10.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/fwe10.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/rl10.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/rl10.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/cf20.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/cf20.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/fwc20.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/fwc20.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/fwe20.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/fwe20.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/rl20.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/rl20.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/cf30.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/cf30.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/fwc30.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/fwc30.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/fwe30.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/fwe30.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/rl30.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/rl30.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/normal1.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/normal1.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/normal1.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/normal1.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/normal1.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/normal1.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/normal cf.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/normal cf.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/normal r.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/normal r.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/normal cf.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/normal cf.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/normal r.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/normal r.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/normal cf.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/normal cf.xls -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/normal r.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/normal r.xls -------------------------------------------------------------------------------- /Chapter4-Detection-of-Faults/Climate_Data.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/Climate_Data.xls -------------------------------------------------------------------------------- /Chapter4-Detection-of-Faults/EnergyData_D1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D1.xlsx -------------------------------------------------------------------------------- /Chapter4-Detection-of-Faults/EnergyData_D2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D2.xlsx -------------------------------------------------------------------------------- /Chapter4-Detection-of-Faults/EnergyData_D3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D3.xlsx -------------------------------------------------------------------------------- /Chapter4-Detection-of-Faults/EnergyData_D4.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D4.xlsx -------------------------------------------------------------------------------- /Chapter5-Threshold-Comparison/Climate_Data.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/Climate_Data.xls -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Climate_Data.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/Climate_Data.xls -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/EnergyData_D1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/EnergyData_D1.xlsx -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/EnergyData_D2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/EnergyData_D2.xlsx -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/EnergyData_D3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/EnergyData_D3.xlsx -------------------------------------------------------------------------------- /Chapter5-Threshold-Comparison/EnergyData_D1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/EnergyData_D1.xlsx -------------------------------------------------------------------------------- /Chapter5-Threshold-Comparison/EnergyData_D2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/EnergyData_D2.xlsx -------------------------------------------------------------------------------- /Chapter5-Threshold-Comparison/EnergyData_D3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/EnergyData_D3.xlsx -------------------------------------------------------------------------------- /Chapter5-Threshold-Comparison/~$Feature_Scores.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/~$Feature_Scores.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Condenser_Fouling_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Condenser_Fouling_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Refrigerant_Leak_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Refrigerant_Leak_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Condenser_Fouling_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Condenser_Fouling_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Refrigerant_Leak_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Refrigerant_Leak_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Condenser_Fouling_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Condenser_Fouling_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Refrigerant_Leak_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Refrigerant_Leak_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Reduced_Condenser_Water_Flow_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Reduced_Condenser_Water_Flow_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Reduced_Condenser_Water_Flow_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Reduced_Condenser_Water_Flow_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Reduced_Condenser_Water_Flow_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Reduced_Condenser_Water_Flow_Fault_Data.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fault-Detection-HVAC 2 | Python Source code and datasets used in my doctoral dissertation - Detection of faults in HVAC systems using tree-based ensemble models and dynamic thresholds 3 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 18 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 19 | "from time import time\n", 20 | "from sklearn.preprocessing import MinMaxScaler\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n", 23 | "from sklearn.svm import SVR\n", 24 | "from sklearn.pipeline import Pipeline\n", 25 | "import pprint as pp\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 49 | "Chiller_Data.reset_index(drop=True, inplace=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 61 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 62 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 63 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 64 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 65 | "#Time_data = Chiller_Data['Time (minutes)']" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 77 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 78 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 79 | "X = Chiller_Data.as_matrix()\n", 80 | "Feature_Names = list(Chiller_Data)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "#################################################################################################\n", 92 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 93 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 94 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 95 | "#################################################################################################" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 107 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 108 | " threshold_EPS = np.zeros(I-1)\n", 109 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 110 | " threshold_LMTD = np.zeros(I-1)\n", 111 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 112 | " labels = np.zeros(I-1)\n", 113 | " for k in np.arange(I,len(P_EPS)+1):\n", 114 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 115 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 116 | " T_EPS = mu_EPS - N*sigma_EPS\n", 117 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 118 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 119 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 120 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 121 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 122 | " \n", 123 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 124 | " labels = np.append(labels,1)\n", 125 | " else:\n", 126 | " labels = np.append(labels,0)\n", 127 | " return labels, threshold_EPS, threshold_LMTD" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "t0 = time()\n", 139 | "np.random.seed(7)\n", 140 | "########################################################################################\n", 141 | "# Regression\n", 142 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 143 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 144 | "Y_Test_Pred_scaled = np.zeros((len(y_test),2))\n", 145 | "\n", 146 | "scaler = MinMaxScaler()\n", 147 | "scaler.fit(y_train)\n", 148 | "y_train_scaled = scaler.transform(y_train)\n", 149 | "\n", 150 | "estimators = []\n", 151 | "estimators.append(('standardize', MinMaxScaler()))\n", 152 | "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 153 | "estimators.append(('SVM', SVR()))\n", 154 | "pipe = Pipeline(estimators)\n", 155 | " \n", 156 | "p_grid = dict(FS__k = [8, 16],\n", 157 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 158 | " SVM__C = np.logspace(0, 3, 4))\n", 159 | " \n", 160 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 161 | "model.fit(X_train, y_train_scaled[:,0])\n", 162 | " \n", 163 | "params = model.best_params_\n", 164 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 165 | " \n", 166 | "Y_Test_Pred_scaled[:,0] = model.predict(X_test)\n", 167 | "\n", 168 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 169 | "model.fit(X_train, y_train_scaled[:,1])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 173 | " \n", 174 | "Y_Test_Pred_scaled[:,1] = model.predict(X_test)\n", 175 | "Y_Test_Pred = scaler.inverse_transform(Y_Test_Pred_scaled)\n", 176 | "\n", 177 | "P_EPS = Y_Test_Pred[:,0]\n", 178 | "P_LMTD = Y_Test_Pred[:,1]\n", 179 | " \n", 180 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 181 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 182 | "\n", 183 | "print(\"########################################################################################\")\n", 184 | "print(\"Confusion Matrix - testing:\")\n", 185 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 186 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 187 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 188 | "print(\"False positive means false alarms\")\n", 189 | "print(\"False Negative means missed faults\")\n", 190 | "print(\"########################################################################################\")\n", 191 | "print(\"Classification Report - testing:\")\n", 192 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 193 | "print(\"########################################################################################\")\n", 194 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 195 | "print(\"########################################################################################\")\n", 196 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 197 | "print(\"########################################################################################\")\n", 198 | "########################################################################################\n", 199 | " \n", 200 | "t1 = time()\n", 201 | "print('Time taken for this trial %f' %(t1-t0))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [] 212 | } 213 | ], 214 | "metadata": { 215 | "anaconda-cloud": {}, 216 | "kernelspec": { 217 | "display_name": "Python [Root]", 218 | "language": "python", 219 | "name": "Python [Root]" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.5.4" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 1 236 | } 237 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 18 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 19 | "from time import time\n", 20 | "from sklearn.preprocessing import MinMaxScaler\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n", 23 | "from sklearn.svm import SVR\n", 24 | "from sklearn.pipeline import Pipeline\n", 25 | "import pprint as pp\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 49 | "Chiller_Data.reset_index(drop=True, inplace=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 61 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 62 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 63 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 64 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 65 | "#Time_data = Chiller_Data['Time (minutes)']" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 77 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 78 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 79 | "X = Chiller_Data.as_matrix()\n", 80 | "Feature_Names = list(Chiller_Data)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "#################################################################################################\n", 92 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 93 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 94 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 95 | "#################################################################################################" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 107 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 108 | " threshold_EPS = np.zeros(I-1)\n", 109 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 110 | " threshold_LMTD = np.zeros(I-1)\n", 111 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 112 | " labels = np.zeros(I-1)\n", 113 | " for k in np.arange(I,len(P_EPS)+1):\n", 114 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 115 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 116 | " T_EPS = mu_EPS - N*sigma_EPS\n", 117 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 118 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 119 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 120 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 121 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 122 | " \n", 123 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 124 | " labels = np.append(labels,1)\n", 125 | " else:\n", 126 | " labels = np.append(labels,0)\n", 127 | " return labels, threshold_EPS, threshold_LMTD" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "t0 = time()\n", 139 | "np.random.seed(7)\n", 140 | "########################################################################################\n", 141 | "# Regression\n", 142 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 143 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 144 | "Y_Test_Pred_scaled = np.zeros((len(y_test),2))\n", 145 | "\n", 146 | "scaler = MinMaxScaler()\n", 147 | "scaler.fit(y_train)\n", 148 | "y_train_scaled = scaler.transform(y_train)\n", 149 | "\n", 150 | "estimators = []\n", 151 | "estimators.append(('standardize', MinMaxScaler()))\n", 152 | "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 153 | "estimators.append(('SVM', SVR()))\n", 154 | "pipe = Pipeline(estimators)\n", 155 | " \n", 156 | "p_grid = dict(FS__k = [8, 16],\n", 157 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 158 | " SVM__C = np.logspace(0, 3, 4))\n", 159 | " \n", 160 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 161 | "model.fit(X_train, y_train_scaled[:,0])\n", 162 | " \n", 163 | "params = model.best_params_\n", 164 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 165 | " \n", 166 | "Y_Test_Pred_scaled[:,0] = model.predict(X_test)\n", 167 | "\n", 168 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 169 | "model.fit(X_train, y_train_scaled[:,1])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 173 | " \n", 174 | "Y_Test_Pred_scaled[:,1] = model.predict(X_test)\n", 175 | "Y_Test_Pred = scaler.inverse_transform(Y_Test_Pred_scaled)\n", 176 | "\n", 177 | "P_EPS = Y_Test_Pred[:,0]\n", 178 | "P_LMTD = Y_Test_Pred[:,1]\n", 179 | " \n", 180 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 181 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 182 | "\n", 183 | "print(\"########################################################################################\")\n", 184 | "print(\"Confusion Matrix - testing:\")\n", 185 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 186 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 187 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 188 | "print(\"False positive means false alarms\")\n", 189 | "print(\"False Negative means missed faults\")\n", 190 | "print(\"########################################################################################\")\n", 191 | "print(\"Classification Report - testing:\")\n", 192 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 193 | "print(\"########################################################################################\")\n", 194 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 195 | "print(\"########################################################################################\")\n", 196 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 197 | "print(\"########################################################################################\")\n", 198 | "########################################################################################\n", 199 | " \n", 200 | "t1 = time()\n", 201 | "print('Time taken for this trial %f' %(t1-t0))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [] 212 | } 213 | ], 214 | "metadata": { 215 | "anaconda-cloud": {}, 216 | "kernelspec": { 217 | "display_name": "Python [Root]", 218 | "language": "python", 219 | "name": "Python [Root]" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.5.4" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 1 236 | } 237 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n", 14 | " train_test_split\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import seaborn as sns\n", 17 | "sns.set(color_codes=True)\n", 18 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 19 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 20 | "from time import time\n", 21 | "from sklearn.preprocessing import MinMaxScaler\n", 22 | "from sklearn.preprocessing import quantile_transform\n", 23 | "import scipy.stats as st\n", 24 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 25 | "from xgboost import XGBRegressor\n", 26 | "from sklearn.ensemble import ExtraTreesRegressor\n", 27 | "from sklearn.pipeline import Pipeline\n", 28 | "from sklearn.multioutput import MultiOutputRegressor\n", 29 | "import pprint as pp\n", 30 | "import datetime\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 54 | "Chiller_Data.reset_index(drop=True, inplace=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 66 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 67 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 68 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 69 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 70 | "#Time_data = Chiller_Data['Time (minutes)']" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 82 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 83 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 84 | "X = Chiller_Data.as_matrix()\n", 85 | "Feature_Names = list(Chiller_Data)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#################################################################################################\n", 97 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 98 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 99 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 100 | "#################################################################################################" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 112 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 113 | " threshold_EPS = np.zeros(I-1)\n", 114 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 115 | " threshold_LMTD = np.zeros(I-1)\n", 116 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 117 | " labels = np.zeros(I-1)\n", 118 | " for k in np.arange(I,len(P_EPS)+1):\n", 119 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 120 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 121 | " T_EPS = mu_EPS - N*sigma_EPS\n", 122 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 123 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 124 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 125 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 126 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 127 | " \n", 128 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 129 | " labels = np.append(labels,1)\n", 130 | " else:\n", 131 | " labels = np.append(labels,0)\n", 132 | " return labels, threshold_EPS, threshold_LMTD" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "t0 = time()\n", 144 | "np.random.seed(7)\n", 145 | "########################################################################################\n", 146 | "# Regression\n", 147 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 148 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 149 | "\n", 150 | "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 151 | "FS_model = rfecv.fit(X_train, y_train[:,0])\n", 152 | "\n", 153 | "ranks = FS_model.ranking_\n", 154 | "FN =[]\n", 155 | "for i in range(len(ranks)):\n", 156 | " if ranks[i] == 1:\n", 157 | " FN.append(Feature_Names[i])\n", 158 | "print(FN)\n", 159 | "\n", 160 | "X = Chiller_Data[FN].as_matrix()\n", 161 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 162 | "\n", 163 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 164 | "p_grid = dict()\n", 165 | "p_grid = dict(n_estimators = NE)\n", 166 | "\n", 167 | "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 168 | " n_jobs=-1)\n", 169 | "model.fit(X_train, y_train[:,0])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 173 | " \n", 174 | "P_EPS = model.predict(X_test)\n", 175 | "\n", 176 | "######################################################################################################\n", 177 | "\n", 178 | "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 179 | "FS_model = rfecv.fit(X_train, y_train[:,1])\n", 180 | "\n", 181 | "ranks = FS_model.ranking_\n", 182 | "FN =[]\n", 183 | "for i in range(len(ranks)):\n", 184 | " if ranks[i] == 1:\n", 185 | " FN.append(Feature_Names[i])\n", 186 | "print(FN)\n", 187 | "\n", 188 | "X = Chiller_Data[FN].as_matrix()\n", 189 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 190 | "\n", 191 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 192 | "p_grid = dict()\n", 193 | "p_grid = dict(n_estimators = NE)\n", 194 | "\n", 195 | "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 196 | " n_jobs=-1)\n", 197 | "model.fit(X_train, y_train[:,1])\n", 198 | " \n", 199 | "params = model.best_params_\n", 200 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 201 | " \n", 202 | "P_LMTD = model.predict(X_test)\n", 203 | "\n", 204 | "\n", 205 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 206 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 207 | "\n", 208 | "print(\"########################################################################################\")\n", 209 | "print(\"Confusion Matrix - testing:\")\n", 210 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 211 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 212 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 213 | "print(\"False positive means false alarms\")\n", 214 | "print(\"False Negative means missed faults\")\n", 215 | "print(\"########################################################################################\")\n", 216 | "print(\"Classification Report - testing:\")\n", 217 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 218 | "print(\"########################################################################################\")\n", 219 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 220 | "print(\"########################################################################################\")\n", 221 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 222 | "print(\"########################################################################################\")\n", 223 | "########################################################################################\n", 224 | " \n", 225 | "t1 = time()\n", 226 | "print('Time taken for this trial %f' %(t1-t0))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": true 234 | }, 235 | "outputs": [], 236 | "source": [] 237 | } 238 | ], 239 | "metadata": { 240 | "anaconda-cloud": {}, 241 | "kernelspec": { 242 | "display_name": "Python [Root]", 243 | "language": "python", 244 | "name": "Python [Root]" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.5.4" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 1 261 | } 262 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n", 14 | " train_test_split\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import seaborn as sns\n", 17 | "sns.set(color_codes=True)\n", 18 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 19 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 20 | "from time import time\n", 21 | "from sklearn.preprocessing import MinMaxScaler\n", 22 | "from sklearn.preprocessing import quantile_transform\n", 23 | "import scipy.stats as st\n", 24 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 25 | "from xgboost import XGBRegressor\n", 26 | "from sklearn.ensemble import ExtraTreesRegressor\n", 27 | "from sklearn.pipeline import Pipeline\n", 28 | "from sklearn.multioutput import MultiOutputRegressor\n", 29 | "import pprint as pp\n", 30 | "import datetime\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 54 | "Chiller_Data.reset_index(drop=True, inplace=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 66 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 67 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 68 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 69 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 70 | "#Time_data = Chiller_Data['Time (minutes)']" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 82 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 83 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 84 | "X = Chiller_Data.as_matrix()\n", 85 | "Feature_Names = list(Chiller_Data)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#################################################################################################\n", 97 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 98 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 99 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 100 | "#################################################################################################" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 112 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 113 | " threshold_EPS = np.zeros(I-1)\n", 114 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 115 | " threshold_LMTD = np.zeros(I-1)\n", 116 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 117 | " labels = np.zeros(I-1)\n", 118 | " for k in np.arange(I,len(P_EPS)+1):\n", 119 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 120 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 121 | " T_EPS = mu_EPS - N*sigma_EPS\n", 122 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 123 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 124 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 125 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 126 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 127 | " \n", 128 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 129 | " labels = np.append(labels,1)\n", 130 | " else:\n", 131 | " labels = np.append(labels,0)\n", 132 | " return labels, threshold_EPS, threshold_LMTD" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "t0 = time()\n", 144 | "np.random.seed(7)\n", 145 | "########################################################################################\n", 146 | "# Regression\n", 147 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 148 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 149 | "\n", 150 | "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 151 | "FS_model = rfecv.fit(X_train, y_train[:,0])\n", 152 | "\n", 153 | "ranks = FS_model.ranking_\n", 154 | "FN =[]\n", 155 | "for i in range(len(ranks)):\n", 156 | " if ranks[i] == 1:\n", 157 | " FN.append(Feature_Names[i])\n", 158 | "print(FN)\n", 159 | "\n", 160 | "X = Chiller_Data[FN].as_matrix()\n", 161 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 162 | "\n", 163 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 164 | "p_grid = dict()\n", 165 | "p_grid = dict(n_estimators = NE)\n", 166 | "\n", 167 | "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 168 | " n_jobs=-1)\n", 169 | "model.fit(X_train, y_train[:,0])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 173 | " \n", 174 | "P_EPS = model.predict(X_test)\n", 175 | "\n", 176 | "######################################################################################################\n", 177 | "\n", 178 | "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 179 | "FS_model = rfecv.fit(X_train, y_train[:,1])\n", 180 | "\n", 181 | "ranks = FS_model.ranking_\n", 182 | "FN =[]\n", 183 | "for i in range(len(ranks)):\n", 184 | " if ranks[i] == 1:\n", 185 | " FN.append(Feature_Names[i])\n", 186 | "print(FN)\n", 187 | "\n", 188 | "X = Chiller_Data[FN].as_matrix()\n", 189 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 190 | "\n", 191 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 192 | "p_grid = dict()\n", 193 | "p_grid = dict(n_estimators = NE)\n", 194 | "\n", 195 | "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 196 | " n_jobs=-1)\n", 197 | "model.fit(X_train, y_train[:,1])\n", 198 | " \n", 199 | "params = model.best_params_\n", 200 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 201 | " \n", 202 | "P_LMTD = model.predict(X_test)\n", 203 | "\n", 204 | "\n", 205 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 206 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 207 | "\n", 208 | "print(\"########################################################################################\")\n", 209 | "print(\"Confusion Matrix - testing:\")\n", 210 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 211 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 212 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 213 | "print(\"False positive means false alarms\")\n", 214 | "print(\"False Negative means missed faults\")\n", 215 | "print(\"########################################################################################\")\n", 216 | "print(\"Classification Report - testing:\")\n", 217 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 218 | "print(\"########################################################################################\")\n", 219 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 220 | "print(\"########################################################################################\")\n", 221 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 222 | "print(\"########################################################################################\")\n", 223 | "########################################################################################\n", 224 | " \n", 225 | "t1 = time()\n", 226 | "print('Time taken for this trial %f' %(t1-t0))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": true 234 | }, 235 | "outputs": [], 236 | "source": [] 237 | } 238 | ], 239 | "metadata": { 240 | "anaconda-cloud": {}, 241 | "kernelspec": { 242 | "display_name": "Python [Root]", 243 | "language": "python", 244 | "name": "Python [Root]" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.5.4" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 1 261 | } 262 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Refrigerant_Leak_RF_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n", 14 | " train_test_split\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import seaborn as sns\n", 17 | "sns.set(color_codes=True)\n", 18 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 19 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 20 | "from time import time\n", 21 | "from sklearn.preprocessing import MinMaxScaler\n", 22 | "from sklearn.preprocessing import quantile_transform\n", 23 | "import scipy.stats as st\n", 24 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 25 | "from xgboost import XGBRegressor\n", 26 | "from sklearn.ensemble import RandomForestRegressor\n", 27 | "from sklearn.pipeline import Pipeline\n", 28 | "from sklearn.multioutput import MultiOutputRegressor\n", 29 | "import pprint as pp\n", 30 | "import datetime\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 54 | "Chiller_Data.reset_index(drop=True, inplace=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 66 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 67 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 68 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 69 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 70 | "#Time_data = Chiller_Data['Time (minutes)']" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 82 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 83 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 84 | "X = Chiller_Data.as_matrix()\n", 85 | "Feature_Names = list(Chiller_Data)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#################################################################################################\n", 97 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 98 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 99 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 100 | "#################################################################################################" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 112 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 113 | " threshold_EPS = np.zeros(I-1)\n", 114 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 115 | " threshold_LMTD = np.zeros(I-1)\n", 116 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 117 | " labels = np.zeros(I-1)\n", 118 | " for k in np.arange(I,len(P_EPS)+1):\n", 119 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 120 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 121 | " T_EPS = mu_EPS - N*sigma_EPS\n", 122 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 123 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 124 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 125 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 126 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 127 | " \n", 128 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 129 | " labels = np.append(labels,1)\n", 130 | " else:\n", 131 | " labels = np.append(labels,0)\n", 132 | " return labels, threshold_EPS, threshold_LMTD" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "t0 = time()\n", 144 | "np.random.seed(7)\n", 145 | "########################################################################################\n", 146 | "# Regression\n", 147 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 148 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 149 | "\n", 150 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 151 | "FS_model = rfecv.fit(X_train, y_train[:,0])\n", 152 | "\n", 153 | "ranks = FS_model.ranking_\n", 154 | "FN =[]\n", 155 | "for i in range(len(ranks)):\n", 156 | " if ranks[i] == 1:\n", 157 | " FN.append(Feature_Names[i])\n", 158 | "print(FN)\n", 159 | "\n", 160 | "X = Chiller_Data[FN].as_matrix()\n", 161 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 162 | "\n", 163 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 164 | "p_grid = dict()\n", 165 | "p_grid = dict(n_estimators = NE)\n", 166 | "\n", 167 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 168 | " n_jobs=-1)\n", 169 | "model.fit(X_train, y_train[:,0])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 173 | " \n", 174 | "P_EPS = model.predict(X_test)\n", 175 | "\n", 176 | "######################################################################################################\n", 177 | "\n", 178 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 179 | "FS_model = rfecv.fit(X_train, y_train[:,1])\n", 180 | "\n", 181 | "ranks = FS_model.ranking_\n", 182 | "FN =[]\n", 183 | "for i in range(len(ranks)):\n", 184 | " if ranks[i] == 1:\n", 185 | " FN.append(Feature_Names[i])\n", 186 | "print(FN)\n", 187 | "\n", 188 | "X = Chiller_Data[FN].as_matrix()\n", 189 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 190 | "\n", 191 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 192 | "p_grid = dict()\n", 193 | "p_grid = dict(n_estimators = NE)\n", 194 | "\n", 195 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 196 | " n_jobs=-1)\n", 197 | "model.fit(X_train, y_train[:,1])\n", 198 | " \n", 199 | "params = model.best_params_\n", 200 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 201 | " \n", 202 | "P_LMTD = model.predict(X_test)\n", 203 | "\n", 204 | "\n", 205 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 206 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 207 | "\n", 208 | "print(\"########################################################################################\")\n", 209 | "print(\"Confusion Matrix - testing:\")\n", 210 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 211 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 212 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 213 | "print(\"False positive means false alarms\")\n", 214 | "print(\"False Negative means missed faults\")\n", 215 | "print(\"########################################################################################\")\n", 216 | "print(\"Classification Report - testing:\")\n", 217 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 218 | "print(\"########################################################################################\")\n", 219 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 220 | "print(\"########################################################################################\")\n", 221 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 222 | "print(\"########################################################################################\")\n", 223 | "########################################################################################\n", 224 | " \n", 225 | "t1 = time()\n", 226 | "print('Time taken for this trial %f' %(t1-t0))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": true 234 | }, 235 | "outputs": [], 236 | "source": [] 237 | } 238 | ], 239 | "metadata": { 240 | "anaconda-cloud": {}, 241 | "kernelspec": { 242 | "display_name": "Python [Root]", 243 | "language": "python", 244 | "name": "Python [Root]" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.5.4" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 1 261 | } 262 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Refrigerant_Leak_RF_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n", 14 | " train_test_split\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import seaborn as sns\n", 17 | "sns.set(color_codes=True)\n", 18 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 19 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 20 | "from time import time\n", 21 | "from sklearn.preprocessing import MinMaxScaler\n", 22 | "from sklearn.preprocessing import quantile_transform\n", 23 | "import scipy.stats as st\n", 24 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 25 | "from xgboost import XGBRegressor\n", 26 | "from sklearn.ensemble import RandomForestRegressor\n", 27 | "from sklearn.pipeline import Pipeline\n", 28 | "from sklearn.multioutput import MultiOutputRegressor\n", 29 | "import pprint as pp\n", 30 | "import datetime\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 54 | "Chiller_Data.reset_index(drop=True, inplace=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 66 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 67 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 68 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 69 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 70 | "#Time_data = Chiller_Data['Time (minutes)']" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 82 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 83 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 84 | "X = Chiller_Data.as_matrix()\n", 85 | "Feature_Names = list(Chiller_Data)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#################################################################################################\n", 97 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 98 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 99 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 100 | "#################################################################################################" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 112 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 113 | " threshold_EPS = np.zeros(I-1)\n", 114 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 115 | " threshold_LMTD = np.zeros(I-1)\n", 116 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 117 | " labels = np.zeros(I-1)\n", 118 | " for k in np.arange(I,len(P_EPS)+1):\n", 119 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 120 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 121 | " T_EPS = mu_EPS - N*sigma_EPS\n", 122 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 123 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 124 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 125 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 126 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 127 | " \n", 128 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 129 | " labels = np.append(labels,1)\n", 130 | " else:\n", 131 | " labels = np.append(labels,0)\n", 132 | " return labels, threshold_EPS, threshold_LMTD" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "t0 = time()\n", 144 | "np.random.seed(7)\n", 145 | "########################################################################################\n", 146 | "# Regression\n", 147 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 148 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 149 | "\n", 150 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 151 | "FS_model = rfecv.fit(X_train, y_train[:,0])\n", 152 | "\n", 153 | "ranks = FS_model.ranking_\n", 154 | "FN =[]\n", 155 | "for i in range(len(ranks)):\n", 156 | " if ranks[i] == 1:\n", 157 | " FN.append(Feature_Names[i])\n", 158 | "print(FN)\n", 159 | "\n", 160 | "X = Chiller_Data[FN].as_matrix()\n", 161 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 162 | "\n", 163 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 164 | "p_grid = dict()\n", 165 | "p_grid = dict(n_estimators = NE)\n", 166 | "\n", 167 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 168 | " n_jobs=-1)\n", 169 | "model.fit(X_train, y_train[:,0])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 173 | " \n", 174 | "P_EPS = model.predict(X_test)\n", 175 | "\n", 176 | "######################################################################################################\n", 177 | "\n", 178 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 179 | "FS_model = rfecv.fit(X_train, y_train[:,1])\n", 180 | "\n", 181 | "ranks = FS_model.ranking_\n", 182 | "FN =[]\n", 183 | "for i in range(len(ranks)):\n", 184 | " if ranks[i] == 1:\n", 185 | " FN.append(Feature_Names[i])\n", 186 | "print(FN)\n", 187 | "\n", 188 | "X = Chiller_Data[FN].as_matrix()\n", 189 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 190 | "\n", 191 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 192 | "p_grid = dict()\n", 193 | "p_grid = dict(n_estimators = NE)\n", 194 | "\n", 195 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 196 | " n_jobs=-1)\n", 197 | "model.fit(X_train, y_train[:,1])\n", 198 | " \n", 199 | "params = model.best_params_\n", 200 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 201 | " \n", 202 | "P_LMTD = model.predict(X_test)\n", 203 | "\n", 204 | "\n", 205 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 206 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 207 | "\n", 208 | "print(\"########################################################################################\")\n", 209 | "print(\"Confusion Matrix - testing:\")\n", 210 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 211 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 212 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 213 | "print(\"False positive means false alarms\")\n", 214 | "print(\"False Negative means missed faults\")\n", 215 | "print(\"########################################################################################\")\n", 216 | "print(\"Classification Report - testing:\")\n", 217 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 218 | "print(\"########################################################################################\")\n", 219 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 220 | "print(\"########################################################################################\")\n", 221 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 222 | "print(\"########################################################################################\")\n", 223 | "########################################################################################\n", 224 | " \n", 225 | "t1 = time()\n", 226 | "print('Time taken for this trial %f' %(t1-t0))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": true 234 | }, 235 | "outputs": [], 236 | "source": [] 237 | } 238 | ], 239 | "metadata": { 240 | "anaconda-cloud": {}, 241 | "kernelspec": { 242 | "display_name": "Python [Root]", 243 | "language": "python", 244 | "name": "Python [Root]" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.5.4" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 1 261 | } 262 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Refrigerant_Leak_RF_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n", 14 | " train_test_split\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "import seaborn as sns\n", 17 | "sns.set(color_codes=True)\n", 18 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 19 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 20 | "from time import time\n", 21 | "from sklearn.preprocessing import MinMaxScaler\n", 22 | "from sklearn.preprocessing import quantile_transform\n", 23 | "import scipy.stats as st\n", 24 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 25 | "from xgboost import XGBRegressor\n", 26 | "from sklearn.ensemble import RandomForestRegressor\n", 27 | "from sklearn.pipeline import Pipeline\n", 28 | "from sklearn.multioutput import MultiOutputRegressor\n", 29 | "import pprint as pp\n", 30 | "import datetime\n", 31 | "%matplotlib inline" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 54 | "Chiller_Data.reset_index(drop=True, inplace=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 66 | "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n", 67 | "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n", 68 | "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n", 69 | "Chiller_Data.dropna(axis=0,inplace=True)\n", 70 | "#Time_data = Chiller_Data['Time (minutes)']" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n", 82 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 83 | "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n", 84 | "X = Chiller_Data.as_matrix()\n", 85 | "Feature_Names = list(Chiller_Data)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "#################################################################################################\n", 97 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 98 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 99 | "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n", 100 | "#################################################################################################" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n", 112 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 113 | " threshold_EPS = np.zeros(I-1)\n", 114 | " threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n", 115 | " threshold_LMTD = np.zeros(I-1)\n", 116 | " threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n", 117 | " labels = np.zeros(I-1)\n", 118 | " for k in np.arange(I,len(P_EPS)+1):\n", 119 | " mu_EPS = np.mean(P_EPS[(k-I):k])\n", 120 | " sigma_EPS = np.std(P_EPS[(k-I):k])\n", 121 | " T_EPS = mu_EPS - N*sigma_EPS\n", 122 | " threshold_EPS = np.append(threshold_EPS,T_EPS)\n", 123 | " mu_LMTD = np.mean(P_LMTD[(k-I):k])\n", 124 | " sigma_LMTD = np.std(P_LMTD[(k-I):k])\n", 125 | " T_LMTD = mu_LMTD - N*sigma_LMTD\n", 126 | " threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n", 127 | " \n", 128 | " if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n", 129 | " labels = np.append(labels,1)\n", 130 | " else:\n", 131 | " labels = np.append(labels,0)\n", 132 | " return labels, threshold_EPS, threshold_LMTD" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "t0 = time()\n", 144 | "np.random.seed(7)\n", 145 | "########################################################################################\n", 146 | "# Regression\n", 147 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 148 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 149 | "\n", 150 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 151 | "FS_model = rfecv.fit(X_train, y_train[:,0])\n", 152 | "\n", 153 | "ranks = FS_model.ranking_\n", 154 | "FN =[]\n", 155 | "for i in range(len(ranks)):\n", 156 | " if ranks[i] == 1:\n", 157 | " FN.append(Feature_Names[i])\n", 158 | "print(FN)\n", 159 | "\n", 160 | "X = Chiller_Data[FN].as_matrix()\n", 161 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 162 | "\n", 163 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 164 | "p_grid = dict()\n", 165 | "p_grid = dict(n_estimators = NE)\n", 166 | "\n", 167 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 168 | " n_jobs=-1)\n", 169 | "model.fit(X_train, y_train[:,0])\n", 170 | " \n", 171 | "params = model.best_params_\n", 172 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 173 | " \n", 174 | "P_EPS = model.predict(X_test)\n", 175 | "\n", 176 | "######################################################################################################\n", 177 | "\n", 178 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 179 | "FS_model = rfecv.fit(X_train, y_train[:,1])\n", 180 | "\n", 181 | "ranks = FS_model.ranking_\n", 182 | "FN =[]\n", 183 | "for i in range(len(ranks)):\n", 184 | " if ranks[i] == 1:\n", 185 | " FN.append(Feature_Names[i])\n", 186 | "print(FN)\n", 187 | "\n", 188 | "X = Chiller_Data[FN].as_matrix()\n", 189 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 190 | "\n", 191 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 192 | "p_grid = dict()\n", 193 | "p_grid = dict(n_estimators = NE)\n", 194 | "\n", 195 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n", 196 | " n_jobs=-1)\n", 197 | "model.fit(X_train, y_train[:,1])\n", 198 | " \n", 199 | "params = model.best_params_\n", 200 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 201 | " \n", 202 | "P_LMTD = model.predict(X_test)\n", 203 | "\n", 204 | "\n", 205 | "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n", 206 | "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n", 207 | "\n", 208 | "print(\"########################################################################################\")\n", 209 | "print(\"Confusion Matrix - testing:\")\n", 210 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 211 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 212 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 213 | "print(\"False positive means false alarms\")\n", 214 | "print(\"False Negative means missed faults\")\n", 215 | "print(\"########################################################################################\")\n", 216 | "print(\"Classification Report - testing:\")\n", 217 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 218 | "print(\"########################################################################################\")\n", 219 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 220 | "print(\"########################################################################################\")\n", 221 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 222 | "print(\"########################################################################################\")\n", 223 | "########################################################################################\n", 224 | " \n", 225 | "t1 = time()\n", 226 | "print('Time taken for this trial %f' %(t1-t0))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": true 234 | }, 235 | "outputs": [], 236 | "source": [] 237 | } 238 | ], 239 | "metadata": { 240 | "anaconda-cloud": {}, 241 | "kernelspec": { 242 | "display_name": "Python [Root]", 243 | "language": "python", 244 | "name": "Python [Root]" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.5.4" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 1 261 | } 262 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_ERF_D1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 23 | "from sklearn.ensemble import ExtraTreesRegressor\n", 24 | "from sklearn.decomposition import PCA\n", 25 | "from sklearn.pipeline import Pipeline\n", 26 | "import pprint as pp\n", 27 | "import datetime\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 40 | "#######################################################################################################################\n", 41 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 42 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 43 | " 'Temperature', 'Relative Humidity']]\n", 44 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 45 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 46 | "#######################################################################################################################\n", 47 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 48 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 49 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 50 | "#######################################################################################################################\n", 51 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 52 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 53 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 54 | "#######################################################################################################################\n", 55 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 56 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 57 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 58 | "#######################################################################################################################\n", 59 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 60 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 61 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 62 | "#######################################################################################################################\n", 63 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 64 | "Energy_Data.reset_index(inplace=True)\n", 65 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D1.xlsx')\n", 66 | "#######################################################################################################################\n", 67 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 68 | "Energy_Data.dropna(axis=0,inplace=True)\n", 69 | "#######################################################################################################################\n", 70 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 71 | " 'day': Energy_Data['Day of Month']}))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 83 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 84 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 85 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 86 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 87 | " 'Relative Humidity_MIN', 'Lag1']\n", 88 | "\n", 89 | "X = Energy_Data[Feature_Names].as_matrix()\n", 90 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 91 | "date_time = Energy_Data['Date_Time']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#################################################################################################\n", 103 | "# To test anomaly detector\n", 104 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 105 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 106 | "#################################################################################################" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def energymodel_RF():\n", 118 | " t0 = time()\n", 119 | " np.random.seed(7)\n", 120 | " ########################################################################################\n", 121 | " # Regression\n", 122 | " kf = KFold(n_splits=5, shuffle=True)\n", 123 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 124 | " \n", 125 | " rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n", 126 | " FS_model = rfecv.fit(X_train, y_train)\n", 127 | " \n", 128 | " ranks = FS_model.ranking_\n", 129 | " FN =[]\n", 130 | " for i in range(len(ranks)):\n", 131 | " if ranks[i] == 1:\n", 132 | " FN.append(Feature_Names[i]) \n", 133 | " print(FN)\n", 134 | " \n", 135 | " X = Energy_Data[FN].as_matrix()\n", 136 | " X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n", 137 | " \n", 138 | " p_grid = dict()\n", 139 | " p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n", 140 | " \n", 141 | " model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), \n", 142 | " param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 143 | " model.fit(X_train_transformed, y_train)\n", 144 | " \n", 145 | " params = model.best_params_\n", 146 | " print(\"Best Est: %s\" % (params['n_estimators']))\n", 147 | " \n", 148 | " Y_Test_Pred = model.predict(X_test_transformed)\n", 149 | " \n", 150 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 151 | " data_range = y_test.max() - y_test.min()\n", 152 | " NRMSE = (rmse/data_range) * 100.0\n", 153 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 154 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 155 | " print(\"R-squared: %0.3f\" % RSQ)\n", 156 | " \n", 157 | " fig = plt.figure(figsize=(30,20))\n", 158 | " ax = fig.add_subplot(1, 1, 1)\n", 159 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 160 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 161 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 162 | " plt.xticks(fontsize=25)\n", 163 | " plt.yticks(fontsize=25)\n", 164 | " plt.savefig('Scatter-Target-vs-Pred-ET-D1')\n", 165 | " \n", 166 | " fig = plt.figure(figsize=(30,20))\n", 167 | " ax = fig.add_subplot(1, 1, 1)\n", 168 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 169 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 170 | " plt.xlabel('Date Time',fontsize=40)\n", 171 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 172 | " plt.xticks(fontsize=25)\n", 173 | " plt.yticks(fontsize=25)\n", 174 | " plt.legend(loc='best',fontsize=30)\n", 175 | " plt.savefig('Plot-Target-vs-Pred-ET-D1')\n", 176 | " \n", 177 | " t1 = time()\n", 178 | " print('Time taken for this trial %f' %(t1-t0))\n", 179 | " \n", 180 | " return model, y_test, Y_Test_Pred" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "energymodel_RF()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [Root]", 208 | "language": "python", 209 | "name": "Python [Root]" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_ERF_D2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 23 | "from sklearn.ensemble import ExtraTreesRegressor\n", 24 | "from sklearn.decomposition import PCA\n", 25 | "from sklearn.pipeline import Pipeline\n", 26 | "import pprint as pp\n", 27 | "import datetime\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 40 | "#######################################################################################################################\n", 41 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 42 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 43 | " 'Temperature', 'Relative Humidity']]\n", 44 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 45 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 46 | "#######################################################################################################################\n", 47 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 48 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 49 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 50 | "#######################################################################################################################\n", 51 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 52 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 53 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 54 | "#######################################################################################################################\n", 55 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 56 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 57 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 58 | "#######################################################################################################################\n", 59 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 60 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 61 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 62 | "#######################################################################################################################\n", 63 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 64 | "Energy_Data.reset_index(inplace=True)\n", 65 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D2.xlsx')\n", 66 | "#######################################################################################################################\n", 67 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 68 | "Energy_Data.dropna(axis=0,inplace=True)\n", 69 | "#######################################################################################################################\n", 70 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 71 | " 'day': Energy_Data['Day of Month']}))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 83 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 84 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 85 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 86 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 87 | " 'Relative Humidity_MIN', 'Lag1']\n", 88 | "\n", 89 | "X = Energy_Data[Feature_Names].as_matrix()\n", 90 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 91 | "date_time = Energy_Data['Date_Time']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#################################################################################################\n", 103 | "# To test anomaly detector\n", 104 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 105 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 106 | "#################################################################################################" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def energymodel_RF():\n", 118 | " t0 = time()\n", 119 | " np.random.seed(7)\n", 120 | " ########################################################################################\n", 121 | " # Regression\n", 122 | " kf = KFold(n_splits=5, shuffle=True)\n", 123 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 124 | " \n", 125 | " rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n", 126 | " FS_model = rfecv.fit(X_train, y_train)\n", 127 | " \n", 128 | " ranks = FS_model.ranking_\n", 129 | " FN =[]\n", 130 | " for i in range(len(ranks)):\n", 131 | " if ranks[i] == 1:\n", 132 | " FN.append(Feature_Names[i]) \n", 133 | " print(FN)\n", 134 | " \n", 135 | " X = Energy_Data[FN].as_matrix()\n", 136 | " X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n", 137 | " \n", 138 | " p_grid = dict()\n", 139 | " p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n", 140 | " \n", 141 | " model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), \n", 142 | " param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 143 | " model.fit(X_train_transformed, y_train)\n", 144 | " \n", 145 | " params = model.best_params_\n", 146 | " print(\"Best Est: %s\" % (params['n_estimators']))\n", 147 | " \n", 148 | " Y_Test_Pred = model.predict(X_test_transformed)\n", 149 | " \n", 150 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 151 | " data_range = y_test.max() - y_test.min()\n", 152 | " NRMSE = (rmse/data_range) * 100.0\n", 153 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 154 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 155 | " print(\"R-squared: %0.3f\" % RSQ)\n", 156 | " \n", 157 | " fig = plt.figure(figsize=(30,20))\n", 158 | " ax = fig.add_subplot(1, 1, 1)\n", 159 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 160 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 161 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 162 | " plt.xticks(fontsize=25)\n", 163 | " plt.yticks(fontsize=25)\n", 164 | " plt.savefig('Scatter-Target-vs-Pred-ET-D2')\n", 165 | " \n", 166 | " fig = plt.figure(figsize=(30,20))\n", 167 | " ax = fig.add_subplot(1, 1, 1)\n", 168 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 169 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 170 | " plt.xlabel('Date Time',fontsize=40)\n", 171 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 172 | " plt.xticks(fontsize=25)\n", 173 | " plt.yticks(fontsize=25)\n", 174 | " plt.legend(loc='best',fontsize=30)\n", 175 | " plt.savefig('Plot-Target-vs-Pred-ET-D2')\n", 176 | " \n", 177 | " t1 = time()\n", 178 | " print('Time taken for this trial %f' %(t1-t0))\n", 179 | " \n", 180 | " return model, y_test, Y_Test_Pred" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "energymodel_RF()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [Root]", 208 | "language": "python", 209 | "name": "Python [Root]" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_ERF_D3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 23 | "from sklearn.ensemble import ExtraTreesRegressor\n", 24 | "from sklearn.decomposition import PCA\n", 25 | "from sklearn.pipeline import Pipeline\n", 26 | "import pprint as pp\n", 27 | "import datetime\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 40 | "#######################################################################################################################\n", 41 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 42 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 43 | " 'Temperature', 'Relative Humidity']]\n", 44 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 45 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 46 | "#######################################################################################################################\n", 47 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 48 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 49 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 50 | "#######################################################################################################################\n", 51 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 52 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 53 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 54 | "#######################################################################################################################\n", 55 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 56 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 57 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 58 | "#######################################################################################################################\n", 59 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 60 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 61 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 62 | "#######################################################################################################################\n", 63 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 64 | "Energy_Data.reset_index(inplace=True)\n", 65 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D3.xlsx')\n", 66 | "#######################################################################################################################\n", 67 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 68 | "Energy_Data.dropna(axis=0,inplace=True)\n", 69 | "#######################################################################################################################\n", 70 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 71 | " 'day': Energy_Data['Day of Month']}))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 83 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 84 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 85 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 86 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 87 | " 'Relative Humidity_MIN', 'Lag1']\n", 88 | "\n", 89 | "X = Energy_Data[Feature_Names].as_matrix()\n", 90 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 91 | "date_time = Energy_Data['Date_Time']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#################################################################################################\n", 103 | "# To test anomaly detector\n", 104 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 105 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 106 | "#################################################################################################" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def energymodel_RF():\n", 118 | " t0 = time()\n", 119 | " np.random.seed(7)\n", 120 | " ########################################################################################\n", 121 | " # Regression\n", 122 | " kf = KFold(n_splits=5, shuffle=True)\n", 123 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 124 | " \n", 125 | " rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n", 126 | " FS_model = rfecv.fit(X_train, y_train)\n", 127 | " \n", 128 | " ranks = FS_model.ranking_\n", 129 | " FN =[]\n", 130 | " for i in range(len(ranks)):\n", 131 | " if ranks[i] == 1:\n", 132 | " FN.append(Feature_Names[i]) \n", 133 | " print(FN)\n", 134 | " \n", 135 | " X = Energy_Data[FN].as_matrix()\n", 136 | " X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n", 137 | " \n", 138 | " p_grid = dict()\n", 139 | " p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n", 140 | " \n", 141 | " model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), \n", 142 | " param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 143 | " model.fit(X_train_transformed, y_train)\n", 144 | " \n", 145 | " params = model.best_params_\n", 146 | " print(\"Best Est: %s\" % (params['n_estimators']))\n", 147 | " \n", 148 | " Y_Test_Pred = model.predict(X_test_transformed)\n", 149 | " \n", 150 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 151 | " data_range = y_test.max() - y_test.min()\n", 152 | " NRMSE = (rmse/data_range) * 100.0\n", 153 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 154 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 155 | " print(\"R-squared: %0.3f\" % RSQ)\n", 156 | " \n", 157 | " fig = plt.figure(figsize=(30,20))\n", 158 | " ax = fig.add_subplot(1, 1, 1)\n", 159 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 160 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 161 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 162 | " plt.xticks(fontsize=25)\n", 163 | " plt.yticks(fontsize=25)\n", 164 | " plt.savefig('Scatter-Target-vs-Pred-ET-D3')\n", 165 | " \n", 166 | " fig = plt.figure(figsize=(30,20))\n", 167 | " ax = fig.add_subplot(1, 1, 1)\n", 168 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 169 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 170 | " plt.xlabel('Date Time',fontsize=40)\n", 171 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 172 | " plt.xticks(fontsize=25)\n", 173 | " plt.yticks(fontsize=25)\n", 174 | " plt.legend(loc='best',fontsize=30)\n", 175 | " plt.savefig('Plot-Target-vs-Pred-ET-D3')\n", 176 | " \n", 177 | " t1 = time()\n", 178 | " print('Time taken for this trial %f' %(t1-t0))\n", 179 | " \n", 180 | " return model, y_test, Y_Test_Pred" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "energymodel_RF()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [Root]", 208 | "language": "python", 209 | "name": "Python [Root]" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_RF_D1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 23 | "from sklearn.ensemble import RandomForestRegressor\n", 24 | "from sklearn.decomposition import PCA\n", 25 | "from sklearn.pipeline import Pipeline\n", 26 | "import pprint as pp\n", 27 | "import datetime\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 40 | "#######################################################################################################################\n", 41 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 42 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 43 | " 'Temperature', 'Relative Humidity']]\n", 44 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 45 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 46 | "#######################################################################################################################\n", 47 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 48 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 49 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 50 | "#######################################################################################################################\n", 51 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 52 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 53 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 54 | "#######################################################################################################################\n", 55 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 56 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 57 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 58 | "#######################################################################################################################\n", 59 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 60 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 61 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 62 | "#######################################################################################################################\n", 63 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 64 | "Energy_Data.reset_index(inplace=True)\n", 65 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D1.xlsx')\n", 66 | "#######################################################################################################################\n", 67 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 68 | "Energy_Data.dropna(axis=0,inplace=True)\n", 69 | "#######################################################################################################################\n", 70 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 71 | " 'day': Energy_Data['Day of Month']}))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 83 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 84 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 85 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 86 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 87 | " 'Relative Humidity_MIN', 'Lag1']\n", 88 | "\n", 89 | "X = Energy_Data[Feature_Names].as_matrix()\n", 90 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 91 | "date_time = Energy_Data['Date_Time']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#################################################################################################\n", 103 | "# To test anomaly detector\n", 104 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 105 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 106 | "#################################################################################################" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def energymodel_RF():\n", 118 | " t0 = time()\n", 119 | " np.random.seed(7)\n", 120 | " ########################################################################################\n", 121 | " # Regression\n", 122 | " kf = KFold(n_splits=5, shuffle=True)\n", 123 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 124 | " \n", 125 | " rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n", 126 | " FS_model = rfecv.fit(X_train, y_train)\n", 127 | " \n", 128 | " ranks = FS_model.ranking_\n", 129 | " FN =[]\n", 130 | " for i in range(len(ranks)):\n", 131 | " if ranks[i] == 1:\n", 132 | " FN.append(Feature_Names[i]) \n", 133 | " print(FN)\n", 134 | " \n", 135 | " X = Energy_Data[FN].as_matrix()\n", 136 | " X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n", 137 | " \n", 138 | " p_grid = dict()\n", 139 | " p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n", 140 | " \n", 141 | " model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), \n", 142 | " param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 143 | " model.fit(X_train_transformed, y_train)\n", 144 | " \n", 145 | " params = model.best_params_\n", 146 | " print(\"Best Est: %s\" % (params['n_estimators']))\n", 147 | " \n", 148 | " Y_Test_Pred = model.predict(X_test_transformed)\n", 149 | " \n", 150 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 151 | " data_range = y_test.max() - y_test.min()\n", 152 | " NRMSE = (rmse/data_range) * 100.0\n", 153 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 154 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 155 | " print(\"R-squared: %0.3f\" % RSQ)\n", 156 | " \n", 157 | " fig = plt.figure(figsize=(30,20))\n", 158 | " ax = fig.add_subplot(1, 1, 1)\n", 159 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 160 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 161 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 162 | " plt.xticks(fontsize=25)\n", 163 | " plt.yticks(fontsize=25)\n", 164 | " plt.savefig('Scatter-Target-vs-Pred-RF-D1')\n", 165 | " \n", 166 | " fig = plt.figure(figsize=(30,20))\n", 167 | " ax = fig.add_subplot(1, 1, 1)\n", 168 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 169 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 170 | " plt.xlabel('Date Time',fontsize=40)\n", 171 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 172 | " plt.xticks(fontsize=25)\n", 173 | " plt.yticks(fontsize=25)\n", 174 | " plt.legend(loc='best',fontsize=30)\n", 175 | " plt.savefig('Plot-Target-vs-Pred-RF-D1')\n", 176 | " \n", 177 | " t1 = time()\n", 178 | " print('Time taken for this trial %f' %(t1-t0))\n", 179 | " \n", 180 | " return model, y_test, Y_Test_Pred" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "energymodel_RF()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [Root]", 208 | "language": "python", 209 | "name": "Python [Root]" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_RF_D2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 23 | "from sklearn.ensemble import RandomForestRegressor\n", 24 | "from sklearn.decomposition import PCA\n", 25 | "from sklearn.pipeline import Pipeline\n", 26 | "import pprint as pp\n", 27 | "import datetime\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 40 | "#######################################################################################################################\n", 41 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 42 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 43 | " 'Temperature', 'Relative Humidity']]\n", 44 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 45 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 46 | "#######################################################################################################################\n", 47 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 48 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 49 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 50 | "#######################################################################################################################\n", 51 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 52 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 53 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 54 | "#######################################################################################################################\n", 55 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 56 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 57 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 58 | "#######################################################################################################################\n", 59 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 60 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 61 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 62 | "#######################################################################################################################\n", 63 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 64 | "Energy_Data.reset_index(inplace=True)\n", 65 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D2.xlsx')\n", 66 | "#######################################################################################################################\n", 67 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 68 | "Energy_Data.dropna(axis=0,inplace=True)\n", 69 | "#######################################################################################################################\n", 70 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 71 | " 'day': Energy_Data['Day of Month']}))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 83 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 84 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 85 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 86 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 87 | " 'Relative Humidity_MIN', 'Lag1']\n", 88 | "\n", 89 | "X = Energy_Data[Feature_Names].as_matrix()\n", 90 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 91 | "date_time = Energy_Data['Date_Time']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#################################################################################################\n", 103 | "# To test anomaly detector\n", 104 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 105 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 106 | "#################################################################################################" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def energymodel_RF():\n", 118 | " t0 = time()\n", 119 | " np.random.seed(7)\n", 120 | " ########################################################################################\n", 121 | " # Regression\n", 122 | " kf = KFold(n_splits=5, shuffle=True)\n", 123 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 124 | " \n", 125 | " rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n", 126 | " FS_model = rfecv.fit(X_train, y_train)\n", 127 | " \n", 128 | " ranks = FS_model.ranking_\n", 129 | " FN =[]\n", 130 | " for i in range(len(ranks)):\n", 131 | " if ranks[i] == 1:\n", 132 | " FN.append(Feature_Names[i]) \n", 133 | " print(FN)\n", 134 | " \n", 135 | " X = Energy_Data[FN].as_matrix()\n", 136 | " X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n", 137 | " \n", 138 | " p_grid = dict()\n", 139 | " p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n", 140 | " \n", 141 | " model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), \n", 142 | " param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 143 | " model.fit(X_train_transformed, y_train)\n", 144 | " \n", 145 | " params = model.best_params_\n", 146 | " print(\"Best Est: %s\" % (params['n_estimators']))\n", 147 | " \n", 148 | " Y_Test_Pred = model.predict(X_test_transformed)\n", 149 | " \n", 150 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 151 | " data_range = y_test.max() - y_test.min()\n", 152 | " NRMSE = (rmse/data_range) * 100.0\n", 153 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 154 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 155 | " print(\"R-squared: %0.3f\" % RSQ)\n", 156 | " \n", 157 | " fig = plt.figure(figsize=(30,20))\n", 158 | " ax = fig.add_subplot(1, 1, 1)\n", 159 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 160 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 161 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 162 | " plt.xticks(fontsize=25)\n", 163 | " plt.yticks(fontsize=25)\n", 164 | " plt.savefig('Scatter-Target-vs-Pred-RF-D2')\n", 165 | " \n", 166 | " fig = plt.figure(figsize=(30,20))\n", 167 | " ax = fig.add_subplot(1, 1, 1)\n", 168 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 169 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 170 | " plt.xlabel('Date Time',fontsize=40)\n", 171 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 172 | " plt.xticks(fontsize=25)\n", 173 | " plt.yticks(fontsize=25)\n", 174 | " plt.legend(loc='best',fontsize=30)\n", 175 | " plt.savefig('Plot-Target-vs-Pred-RF-D2')\n", 176 | " \n", 177 | " t1 = time()\n", 178 | " print('Time taken for this trial %f' %(t1-t0))\n", 179 | " \n", 180 | " return model, y_test, Y_Test_Pred" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "energymodel_RF()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [Root]", 208 | "language": "python", 209 | "name": "Python [Root]" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_RF_D3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n", 23 | "from sklearn.ensemble import RandomForestRegressor\n", 24 | "from sklearn.decomposition import PCA\n", 25 | "from sklearn.pipeline import Pipeline\n", 26 | "import pprint as pp\n", 27 | "import datetime\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 40 | "#######################################################################################################################\n", 41 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 42 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 43 | " 'Temperature', 'Relative Humidity']]\n", 44 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 45 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 46 | "#######################################################################################################################\n", 47 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 48 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 49 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 50 | "#######################################################################################################################\n", 51 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 52 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 53 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 54 | "#######################################################################################################################\n", 55 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 56 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 57 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 58 | "#######################################################################################################################\n", 59 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 60 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 61 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 62 | "#######################################################################################################################\n", 63 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 64 | "Energy_Data.reset_index(inplace=True)\n", 65 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D3.xlsx')\n", 66 | "#######################################################################################################################\n", 67 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 68 | "Energy_Data.dropna(axis=0,inplace=True)\n", 69 | "#######################################################################################################################\n", 70 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 71 | " 'day': Energy_Data['Day of Month']}))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 83 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 84 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 85 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 86 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 87 | " 'Relative Humidity_MIN', 'Lag1']\n", 88 | "\n", 89 | "X = Energy_Data[Feature_Names].as_matrix()\n", 90 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 91 | "date_time = Energy_Data['Date_Time']" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "#################################################################################################\n", 103 | "# To test anomaly detector\n", 104 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 105 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 106 | "#################################################################################################" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def energymodel_RF():\n", 118 | " t0 = time()\n", 119 | " np.random.seed(7)\n", 120 | " ########################################################################################\n", 121 | " # Regression\n", 122 | " kf = KFold(n_splits=5, shuffle=True)\n", 123 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 124 | " \n", 125 | " rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n", 126 | " FS_model = rfecv.fit(X_train, y_train)\n", 127 | " \n", 128 | " ranks = FS_model.ranking_\n", 129 | " FN =[]\n", 130 | " for i in range(len(ranks)):\n", 131 | " if ranks[i] == 1:\n", 132 | " FN.append(Feature_Names[i]) \n", 133 | " print(FN)\n", 134 | " \n", 135 | " X = Energy_Data[FN].as_matrix()\n", 136 | " X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n", 137 | " \n", 138 | " p_grid = dict()\n", 139 | " p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n", 140 | " \n", 141 | " model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), \n", 142 | " param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 143 | " model.fit(X_train_transformed, y_train)\n", 144 | " \n", 145 | " params = model.best_params_\n", 146 | " print(\"Best Est: %s\" % (params['n_estimators']))\n", 147 | " \n", 148 | " Y_Test_Pred = model.predict(X_test_transformed)\n", 149 | " \n", 150 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 151 | " data_range = y_test.max() - y_test.min()\n", 152 | " NRMSE = (rmse/data_range) * 100.0\n", 153 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 154 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 155 | " print(\"R-squared: %0.3f\" % RSQ)\n", 156 | " \n", 157 | " fig = plt.figure(figsize=(30,20))\n", 158 | " ax = fig.add_subplot(1, 1, 1)\n", 159 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 160 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 161 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 162 | " plt.xticks(fontsize=25)\n", 163 | " plt.yticks(fontsize=25)\n", 164 | " plt.savefig('Scatter-Target-vs-Pred-RF-D3')\n", 165 | " \n", 166 | " fig = plt.figure(figsize=(30,20))\n", 167 | " ax = fig.add_subplot(1, 1, 1)\n", 168 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 169 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 170 | " plt.xlabel('Date Time',fontsize=40)\n", 171 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 172 | " plt.xticks(fontsize=25)\n", 173 | " plt.yticks(fontsize=25)\n", 174 | " plt.legend(loc='best',fontsize=30)\n", 175 | " plt.savefig('Plot-Target-vs-Pred-RF-D3')\n", 176 | " \n", 177 | " t1 = time()\n", 178 | " print('Time taken for this trial %f' %(t1-t0))\n", 179 | " \n", 180 | " return model, y_test, Y_Test_Pred" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "energymodel_RF()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [Root]", 208 | "language": "python", 209 | "name": "Python [Root]" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_SVM_D1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel, mutual_info_regression, SelectKBest\n", 23 | "from xgboost import XGBRegressor\n", 24 | "from sklearn.svm import SVR\n", 25 | "from sklearn.decomposition import PCA\n", 26 | "from sklearn.pipeline import Pipeline\n", 27 | "import pprint as pp\n", 28 | "import datetime\n", 29 | "%matplotlib inline" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 41 | "#######################################################################################################################\n", 42 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 43 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 44 | " 'Temperature', 'Relative Humidity']]\n", 45 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 46 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 47 | "#######################################################################################################################\n", 48 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 49 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 50 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 51 | "#######################################################################################################################\n", 52 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 53 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 54 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 55 | "#######################################################################################################################\n", 56 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 57 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 58 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 59 | "#######################################################################################################################\n", 60 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 61 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 62 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 63 | "#######################################################################################################################\n", 64 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 65 | "Energy_Data.reset_index(inplace=True)\n", 66 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D1.xlsx')\n", 67 | "#######################################################################################################################\n", 68 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 69 | "Energy_Data.dropna(axis=0,inplace=True)\n", 70 | "#######################################################################################################################\n", 71 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 72 | " 'day': Energy_Data['Day of Month']}))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 84 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 85 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 86 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 87 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 88 | " 'Relative Humidity_MIN', 'Lag1']\n", 89 | "\n", 90 | "X = Energy_Data[Feature_Names].as_matrix()\n", 91 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 92 | "date_time = Energy_Data['Date_Time']" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "#################################################################################################\n", 104 | "# To test anomaly detector\n", 105 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 106 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 107 | "#################################################################################################" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "def energymodel_SVM():\n", 119 | " t0 = time()\n", 120 | " np.random.seed(7)\n", 121 | " ########################################################################################\n", 122 | " # Regression\n", 123 | " kf = KFold(n_splits=5, shuffle=True)\n", 124 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 125 | " \n", 126 | " estimators = []\n", 127 | " estimators.append(('standardize', MinMaxScaler()))\n", 128 | " estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 129 | " estimators.append(('SVM', SVR()))\n", 130 | " pipe = Pipeline(estimators)\n", 131 | " \n", 132 | " y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n", 133 | " \n", 134 | " p_grid = dict(FS__k = [int(i) for i in np.arange(1,len(Feature_Names)+1,1)],\n", 135 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 136 | " SVM__C = np.logspace(0, 3, 4))\n", 137 | " \n", 138 | " model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 139 | " model.fit(X_train, y_train_scaled)\n", 140 | " \n", 141 | " params = model.best_params_\n", 142 | " print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 143 | " \n", 144 | " \n", 145 | " Y_Test_Pred_scaled = model.predict(X_test)\n", 146 | " Y_Test_Pred = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n", 147 | " \n", 148 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 149 | " data_range = y_test.max() - y_test.min()\n", 150 | " NRMSE = (rmse/data_range) * 100.0\n", 151 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 152 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 153 | " print(\"R-squared: %0.3f\" % RSQ)\n", 154 | " \n", 155 | " fig = plt.figure(figsize=(30,20))\n", 156 | " ax = fig.add_subplot(1, 1, 1)\n", 157 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 158 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 159 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 160 | " plt.xticks(fontsize=25)\n", 161 | " plt.yticks(fontsize=25)\n", 162 | " plt.savefig('Scatter-Target-vs-Pred-SVM-D1')\n", 163 | " \n", 164 | " fig = plt.figure(figsize=(30,20))\n", 165 | " ax = fig.add_subplot(1, 1, 1)\n", 166 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 167 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 168 | " plt.xlabel('Date Time',fontsize=40)\n", 169 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 170 | " plt.xticks(fontsize=25)\n", 171 | " plt.yticks(fontsize=25)\n", 172 | " plt.legend(loc='best',fontsize=30)\n", 173 | " plt.savefig('Plot-Target-vs-Pred-SVM-D1')\n", 174 | " \n", 175 | " t1 = time()\n", 176 | " print('Time taken for this trial %f' %(t1-t0))\n", 177 | " \n", 178 | " return model, y_test, Y_Test_Pred" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": false 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "energymodel_SVM()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "anaconda-cloud": {}, 204 | "kernelspec": { 205 | "display_name": "Python [Root]", 206 | "language": "python", 207 | "name": "Python [Root]" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.5.4" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 1 224 | } 225 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_SVM_D2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel, mutual_info_regression, SelectKBest\n", 23 | "from xgboost import XGBRegressor\n", 24 | "from sklearn.svm import SVR\n", 25 | "from sklearn.decomposition import PCA\n", 26 | "from sklearn.pipeline import Pipeline\n", 27 | "import pprint as pp\n", 28 | "import datetime\n", 29 | "%matplotlib inline" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 41 | "#######################################################################################################################\n", 42 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 43 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 44 | " 'Temperature', 'Relative Humidity']]\n", 45 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 46 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 47 | "#######################################################################################################################\n", 48 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 49 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 50 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 51 | "#######################################################################################################################\n", 52 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 53 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 54 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 55 | "#######################################################################################################################\n", 56 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 57 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 58 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 59 | "#######################################################################################################################\n", 60 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 61 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 62 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 63 | "#######################################################################################################################\n", 64 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 65 | "Energy_Data.reset_index(inplace=True)\n", 66 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D2.xlsx')\n", 67 | "#######################################################################################################################\n", 68 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 69 | "Energy_Data.dropna(axis=0,inplace=True)\n", 70 | "#######################################################################################################################\n", 71 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 72 | " 'day': Energy_Data['Day of Month']}))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 84 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 85 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 86 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 87 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 88 | " 'Relative Humidity_MIN', 'Lag1']\n", 89 | "\n", 90 | "X = Energy_Data[Feature_Names].as_matrix()\n", 91 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 92 | "date_time = Energy_Data['Date_Time']" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "#################################################################################################\n", 104 | "# To test anomaly detector\n", 105 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 106 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 107 | "#################################################################################################" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "def energymodel_SVM():\n", 119 | " t0 = time()\n", 120 | " np.random.seed(7)\n", 121 | " ########################################################################################\n", 122 | " # Regression\n", 123 | " kf = KFold(n_splits=5, shuffle=True)\n", 124 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 125 | " \n", 126 | " estimators = []\n", 127 | " estimators.append(('standardize', MinMaxScaler()))\n", 128 | " estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 129 | " estimators.append(('SVM', SVR()))\n", 130 | " pipe = Pipeline(estimators)\n", 131 | " \n", 132 | " y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n", 133 | " \n", 134 | " p_grid = dict(FS__k = [int(i) for i in np.arange(1,len(Feature_Names)+1,1)],\n", 135 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 136 | " SVM__C = np.logspace(0, 3, 4))\n", 137 | " \n", 138 | " model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 139 | " model.fit(X_train, y_train_scaled)\n", 140 | " \n", 141 | " params = model.best_params_\n", 142 | " print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 143 | " \n", 144 | " \n", 145 | " Y_Test_Pred_scaled = model.predict(X_test)\n", 146 | " Y_Test_Pred = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n", 147 | " \n", 148 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 149 | " data_range = y_test.max() - y_test.min()\n", 150 | " NRMSE = (rmse/data_range) * 100.0\n", 151 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 152 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 153 | " print(\"R-squared: %0.3f\" % RSQ)\n", 154 | " \n", 155 | " fig = plt.figure(figsize=(30,20))\n", 156 | " ax = fig.add_subplot(1, 1, 1)\n", 157 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 158 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 159 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 160 | " plt.xticks(fontsize=25)\n", 161 | " plt.yticks(fontsize=25)\n", 162 | " plt.savefig('Scatter-Target-vs-Pred-SVM-D2')\n", 163 | " \n", 164 | " fig = plt.figure(figsize=(30,20))\n", 165 | " ax = fig.add_subplot(1, 1, 1)\n", 166 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 167 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 168 | " plt.xlabel('Date Time',fontsize=40)\n", 169 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 170 | " plt.xticks(fontsize=25)\n", 171 | " plt.yticks(fontsize=25)\n", 172 | " plt.legend(loc='best',fontsize=30)\n", 173 | " plt.savefig('Plot-Target-vs-Pred-SVM-D2')\n", 174 | " \n", 175 | " t1 = time()\n", 176 | " print('Time taken for this trial %f' %(t1-t0))\n", 177 | " \n", 178 | " return model, y_test, Y_Test_Pred" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": false 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "energymodel_SVM()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "anaconda-cloud": {}, 204 | "kernelspec": { 205 | "display_name": "Python [Root]", 206 | "language": "python", 207 | "name": "Python [Root]" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.5.4" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 1 224 | } 225 | -------------------------------------------------------------------------------- /Chapter5-EnergyModel-Comparison/Energy_Modeling_SVM_D3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n", 18 | "from time import time\n", 19 | "from sklearn.preprocessing import MinMaxScaler\n", 20 | "from sklearn.preprocessing import quantile_transform\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectFromModel, mutual_info_regression, SelectKBest\n", 23 | "from xgboost import XGBRegressor\n", 24 | "from sklearn.svm import SVR\n", 25 | "from sklearn.decomposition import PCA\n", 26 | "from sklearn.pipeline import Pipeline\n", 27 | "import pprint as pp\n", 28 | "import datetime\n", 29 | "%matplotlib inline" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "Climate_Data = pd.read_excel('Climate_Data.xls')\n", 41 | "#######################################################################################################################\n", 42 | "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n", 43 | "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n", 44 | " 'Temperature', 'Relative Humidity']]\n", 45 | "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 46 | " 'Temperature_AVG', 'Relative Humidity_AVG']\n", 47 | "#######################################################################################################################\n", 48 | "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n", 49 | "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 50 | "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n", 51 | "#######################################################################################################################\n", 52 | "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n", 53 | "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 54 | "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n", 55 | "#######################################################################################################################\n", 56 | "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n", 57 | "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 58 | "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n", 59 | "#######################################################################################################################\n", 60 | "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n", 61 | "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n", 62 | "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n", 63 | "#######################################################################################################################\n", 64 | "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n", 65 | "Energy_Data.reset_index(inplace=True)\n", 66 | "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D3.xlsx')\n", 67 | "#######################################################################################################################\n", 68 | "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n", 69 | "Energy_Data.dropna(axis=0,inplace=True)\n", 70 | "#######################################################################################################################\n", 71 | "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n", 72 | " 'day': Energy_Data['Day of Month']}))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n", 84 | " 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n", 85 | " 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n", 86 | " 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n", 87 | " 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n", 88 | " 'Relative Humidity_MIN', 'Lag1']\n", 89 | "\n", 90 | "X = Energy_Data[Feature_Names].as_matrix()\n", 91 | "y = Energy_Data['Energy_Consumption'].as_matrix()\n", 92 | "date_time = Energy_Data['Date_Time']" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "#################################################################################################\n", 104 | "# To test anomaly detector\n", 105 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n", 106 | "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n", 107 | "#################################################################################################" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "def energymodel_SVM():\n", 119 | " t0 = time()\n", 120 | " np.random.seed(7)\n", 121 | " ########################################################################################\n", 122 | " # Regression\n", 123 | " kf = KFold(n_splits=5, shuffle=True)\n", 124 | " scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 125 | " \n", 126 | " estimators = []\n", 127 | " estimators.append(('standardize', MinMaxScaler()))\n", 128 | " estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 129 | " estimators.append(('SVM', SVR()))\n", 130 | " pipe = Pipeline(estimators)\n", 131 | " \n", 132 | " y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n", 133 | " \n", 134 | " p_grid = dict(FS__k = [int(i) for i in np.arange(1,len(Feature_Names)+1,1)],\n", 135 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 136 | " SVM__C = np.logspace(0, 3, 4))\n", 137 | " \n", 138 | " model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 139 | " model.fit(X_train, y_train_scaled)\n", 140 | " \n", 141 | " params = model.best_params_\n", 142 | " print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 143 | " \n", 144 | " \n", 145 | " Y_Test_Pred_scaled = model.predict(X_test)\n", 146 | " Y_Test_Pred = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n", 147 | " \n", 148 | " rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n", 149 | " data_range = y_test.max() - y_test.min()\n", 150 | " NRMSE = (rmse/data_range) * 100.0\n", 151 | " RSQ = r2_score(y_test,Y_Test_Pred)\n", 152 | " print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 153 | " print(\"R-squared: %0.3f\" % RSQ)\n", 154 | " \n", 155 | " fig = plt.figure(figsize=(30,20))\n", 156 | " ax = fig.add_subplot(1, 1, 1)\n", 157 | " plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n", 158 | " plt.xlabel(\"Target [J]\", fontsize=40)\n", 159 | " plt.ylabel(\"Predictions [J]\", fontsize=40)\n", 160 | " plt.xticks(fontsize=25)\n", 161 | " plt.yticks(fontsize=25)\n", 162 | " plt.savefig('Scatter-Target-vs-Pred-SVM-D3')\n", 163 | " \n", 164 | " fig = plt.figure(figsize=(30,20))\n", 165 | " ax = fig.add_subplot(1, 1, 1)\n", 166 | " plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n", 167 | " plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n", 168 | " plt.xlabel('Date Time',fontsize=40)\n", 169 | " plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n", 170 | " plt.xticks(fontsize=25)\n", 171 | " plt.yticks(fontsize=25)\n", 172 | " plt.legend(loc='best',fontsize=30)\n", 173 | " plt.savefig('Plot-Target-vs-Pred-SVM-D3')\n", 174 | " \n", 175 | " t1 = time()\n", 176 | " print('Time taken for this trial %f' %(t1-t0))\n", 177 | " \n", 178 | " return model, y_test, Y_Test_Pred" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": false 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "energymodel_SVM()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "anaconda-cloud": {}, 204 | "kernelspec": { 205 | "display_name": "Python [Root]", 206 | "language": "python", 207 | "name": "Python [Root]" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.5.4" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 1 224 | } 225 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Condenser_Foul_SVM_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 18 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 19 | "from time import time\n", 20 | "from sklearn.preprocessing import MinMaxScaler\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n", 23 | "from sklearn.svm import SVR\n", 24 | "from sklearn.pipeline import Pipeline\n", 25 | "import pprint as pp\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 49 | "Chiller_Data.reset_index(drop=True, inplace=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 61 | "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n", 62 | "Chiller_Data.dropna(axis=0,inplace=True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "y = Chiller_Data['Target'].as_matrix()\n", 74 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 75 | "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n", 76 | "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n", 77 | "# 'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n", 78 | "Feature_Names = list(Chiller_Data)\n", 79 | "X = Chiller_Data[Feature_Names].as_matrix()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "#################################################################################################\n", 91 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 92 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 93 | "#################################################################################################" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "def calc_dyn_threshold(A, P, I, N):\n", 105 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 106 | " threshold = np.zeros(I-1)\n", 107 | " threshold[0:(I-1)] = P[0:(I-1)]\n", 108 | " labels = np.zeros(I-1)\n", 109 | " for k in np.arange(I,len(P)+1):\n", 110 | " mu = np.mean(P[(k-I):k])\n", 111 | " sigma = np.std(P[(k-I):k])\n", 112 | " T = mu - N*sigma\n", 113 | " threshold = np.append(threshold,T)\n", 114 | " if (A[k-1] < threshold[k-1]) :\n", 115 | " labels = np.append(labels,1)\n", 116 | " else:\n", 117 | " labels = np.append(labels,0)\n", 118 | " return labels, threshold" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "t0 = time()\n", 130 | "np.random.seed(7)\n", 131 | "########################################################################################\n", 132 | "# Regression\n", 133 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 134 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 135 | "\n", 136 | "estimators = []\n", 137 | "estimators.append(('standardize', MinMaxScaler()))\n", 138 | "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 139 | "estimators.append(('SVM', SVR()))\n", 140 | "pipe = Pipeline(estimators)\n", 141 | " \n", 142 | "y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n", 143 | " \n", 144 | "p_grid = dict(FS__k = [8, 16],\n", 145 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 146 | " SVM__C = np.logspace(0, 3, 4))\n", 147 | " \n", 148 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 149 | "model.fit(X_train, y_train_scaled)\n", 150 | " \n", 151 | "params = model.best_params_\n", 152 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 153 | " \n", 154 | "Y_Test_Pred_scaled = model.predict(X_test)\n", 155 | "Y_Test_Predicted = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n", 156 | " \n", 157 | "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n", 158 | "data_range = y_test.max() - y_test.min()\n", 159 | "NRMSE = (rmse/data_range) * 100.0\n", 160 | "RSQ = r2_score(y_test,Y_Test_Predicted)\n", 161 | "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 162 | "print(\"R-squared: %0.3f\" % RSQ)\n", 163 | "\n", 164 | "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n", 165 | "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n", 166 | " 'Threshold':Threshold, 'Pred_Labels': Labels})\n", 167 | "\n", 168 | "print(\"########################################################################################\")\n", 169 | "print(\"Confusion Matrix - testing:\")\n", 170 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 171 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 172 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 173 | "print(\"False positive means false alarms\")\n", 174 | "print(\"False Negative means missed faults\")\n", 175 | "print(\"########################################################################################\")\n", 176 | "print(\"Classification Report - testing:\")\n", 177 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 178 | "print(\"########################################################################################\")\n", 179 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 180 | "print(\"########################################################################################\")\n", 181 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 182 | "print(\"########################################################################################\")\n", 183 | "########################################################################################\n", 184 | "\n", 185 | "fig = plt.figure(figsize=(25,20))\n", 186 | "ax = fig.add_subplot(1, 1, 1)\n", 187 | "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n", 188 | "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n", 189 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n", 190 | " edgecolors='y', marker='o', label=u'Actual normal data')\n", 191 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n", 192 | " edgecolors='y', marker='^', label=u'Actual fault data')\n", 193 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 194 | "plt.xlabel('Data index',fontsize=30)\n", 195 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 196 | "plt.xticks(fontsize=30)\n", 197 | "plt.yticks(fontsize=30)\n", 198 | "plt.legend(loc='best',fontsize=30)\n", 199 | "plt.savefig('M0-Cond-Foul-Actual-Labels-Predictions')\n", 200 | "\n", 201 | "fig = plt.figure(figsize=(25,20))\n", 202 | "ax = fig.add_subplot(1, 1, 1)\n", 203 | "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n", 204 | "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n", 205 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n", 206 | " edgecolors='y', marker='o', label=u'Predicted normal data')\n", 207 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n", 208 | " edgecolors='y', marker='^', label=u'Predicted fault data')\n", 209 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 210 | "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n", 211 | "plt.xlabel('Data index',fontsize=30)\n", 212 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 213 | "plt.xticks(fontsize=30)\n", 214 | "plt.yticks(fontsize=30)\n", 215 | "plt.legend(loc='best',fontsize=30)\n", 216 | "plt.savefig('M0-Cond-Foul-SVM-Dynamic-Threshold-Predicted-Labels')\n", 217 | "\n", 218 | "t1 = time()\n", 219 | "print('Time taken for this trial %f' %(t1-t0))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [] 230 | } 231 | ], 232 | "metadata": { 233 | "anaconda-cloud": {}, 234 | "kernelspec": { 235 | "display_name": "Python [Root]", 236 | "language": "python", 237 | "name": "Python [Root]" 238 | }, 239 | "language_info": { 240 | "codemirror_mode": { 241 | "name": "ipython", 242 | "version": 3 243 | }, 244 | "file_extension": ".py", 245 | "mimetype": "text/x-python", 246 | "name": "python", 247 | "nbconvert_exporter": "python", 248 | "pygments_lexer": "ipython3", 249 | "version": "3.5.4" 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 1 254 | } 255 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-20/Condenser_Foul_SVM_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 18 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 19 | "from time import time\n", 20 | "from sklearn.preprocessing import MinMaxScaler\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n", 23 | "from sklearn.svm import SVR\n", 24 | "from sklearn.pipeline import Pipeline\n", 25 | "import pprint as pp\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 49 | "Chiller_Data.reset_index(drop=True, inplace=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 61 | "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n", 62 | "Chiller_Data.dropna(axis=0,inplace=True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "y = Chiller_Data['Target'].as_matrix()\n", 74 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 75 | "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n", 76 | "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n", 77 | "# 'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n", 78 | "Feature_Names = list(Chiller_Data)\n", 79 | "X = Chiller_Data[Feature_Names].as_matrix()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "#################################################################################################\n", 91 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 92 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 93 | "#################################################################################################" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "def calc_dyn_threshold(A, P, I, N):\n", 105 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 106 | " threshold = np.zeros(I-1)\n", 107 | " threshold[0:(I-1)] = P[0:(I-1)]\n", 108 | " labels = np.zeros(I-1)\n", 109 | " for k in np.arange(I,len(P)+1):\n", 110 | " mu = np.mean(P[(k-I):k])\n", 111 | " sigma = np.std(P[(k-I):k])\n", 112 | " T = mu - N*sigma\n", 113 | " threshold = np.append(threshold,T)\n", 114 | " if (A[k-1] < threshold[k-1]) :\n", 115 | " labels = np.append(labels,1)\n", 116 | " else:\n", 117 | " labels = np.append(labels,0)\n", 118 | " return labels, threshold" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "t0 = time()\n", 130 | "np.random.seed(7)\n", 131 | "########################################################################################\n", 132 | "# Regression\n", 133 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 134 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 135 | "\n", 136 | "estimators = []\n", 137 | "estimators.append(('standardize', MinMaxScaler()))\n", 138 | "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 139 | "estimators.append(('SVM', SVR()))\n", 140 | "pipe = Pipeline(estimators)\n", 141 | " \n", 142 | "y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n", 143 | " \n", 144 | "p_grid = dict(FS__k = [8, 16],\n", 145 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 146 | " SVM__C = np.logspace(0, 3, 4))\n", 147 | " \n", 148 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 149 | "model.fit(X_train, y_train_scaled)\n", 150 | " \n", 151 | "params = model.best_params_\n", 152 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 153 | " \n", 154 | "Y_Test_Pred_scaled = model.predict(X_test)\n", 155 | "Y_Test_Predicted = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n", 156 | " \n", 157 | "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n", 158 | "data_range = y_test.max() - y_test.min()\n", 159 | "NRMSE = (rmse/data_range) * 100.0\n", 160 | "RSQ = r2_score(y_test,Y_Test_Predicted)\n", 161 | "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 162 | "print(\"R-squared: %0.3f\" % RSQ)\n", 163 | "\n", 164 | "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n", 165 | "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n", 166 | " 'Threshold':Threshold, 'Pred_Labels': Labels})\n", 167 | "\n", 168 | "print(\"########################################################################################\")\n", 169 | "print(\"Confusion Matrix - testing:\")\n", 170 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 171 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 172 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 173 | "print(\"False positive means false alarms\")\n", 174 | "print(\"False Negative means missed faults\")\n", 175 | "print(\"########################################################################################\")\n", 176 | "print(\"Classification Report - testing:\")\n", 177 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 178 | "print(\"########################################################################################\")\n", 179 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 180 | "print(\"########################################################################################\")\n", 181 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 182 | "print(\"########################################################################################\")\n", 183 | "########################################################################################\n", 184 | "\n", 185 | "fig = plt.figure(figsize=(25,20))\n", 186 | "ax = fig.add_subplot(1, 1, 1)\n", 187 | "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n", 188 | "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n", 189 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n", 190 | " edgecolors='y', marker='o', label=u'Actual normal data')\n", 191 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n", 192 | " edgecolors='y', marker='^', label=u'Actual fault data')\n", 193 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 194 | "plt.xlabel('Data index',fontsize=30)\n", 195 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 196 | "plt.xticks(fontsize=30)\n", 197 | "plt.yticks(fontsize=30)\n", 198 | "plt.legend(loc='best',fontsize=30)\n", 199 | "plt.savefig('M0-Cond-Foul-Actual-Labels-Predictions')\n", 200 | "\n", 201 | "fig = plt.figure(figsize=(25,20))\n", 202 | "ax = fig.add_subplot(1, 1, 1)\n", 203 | "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n", 204 | "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n", 205 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n", 206 | " edgecolors='y', marker='o', label=u'Predicted normal data')\n", 207 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n", 208 | " edgecolors='y', marker='^', label=u'Predicted fault data')\n", 209 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 210 | "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n", 211 | "plt.xlabel('Data index',fontsize=30)\n", 212 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 213 | "plt.xticks(fontsize=30)\n", 214 | "plt.yticks(fontsize=30)\n", 215 | "plt.legend(loc='best',fontsize=30)\n", 216 | "plt.savefig('M0-Cond-Foul-SVM-Dynamic-Threshold-Predicted-Labels')\n", 217 | "\n", 218 | "t1 = time()\n", 219 | "print('Time taken for this trial %f' %(t1-t0))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [] 230 | } 231 | ], 232 | "metadata": { 233 | "anaconda-cloud": {}, 234 | "kernelspec": { 235 | "display_name": "Python [Root]", 236 | "language": "python", 237 | "name": "Python [Root]" 238 | }, 239 | "language_info": { 240 | "codemirror_mode": { 241 | "name": "ipython", 242 | "version": 3 243 | }, 244 | "file_extension": ".py", 245 | "mimetype": "text/x-python", 246 | "name": "python", 247 | "nbconvert_exporter": "python", 248 | "pygments_lexer": "ipython3", 249 | "version": "3.5.4" 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 1 254 | } 255 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-30/Condenser_Foul_SVM_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 18 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 19 | "from time import time\n", 20 | "from sklearn.preprocessing import MinMaxScaler\n", 21 | "import scipy.stats as st\n", 22 | "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n", 23 | "from sklearn.svm import SVR\n", 24 | "from sklearn.pipeline import Pipeline\n", 25 | "import pprint as pp\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 49 | "Chiller_Data.reset_index(drop=True, inplace=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 61 | "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n", 62 | "Chiller_Data.dropna(axis=0,inplace=True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "y = Chiller_Data['Target'].as_matrix()\n", 74 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 75 | "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n", 76 | "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n", 77 | "# 'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n", 78 | "Feature_Names = list(Chiller_Data)\n", 79 | "X = Chiller_Data[Feature_Names].as_matrix()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "#################################################################################################\n", 91 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 92 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 93 | "#################################################################################################" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "def calc_dyn_threshold(A, P, I, N):\n", 105 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 106 | " threshold = np.zeros(I-1)\n", 107 | " threshold[0:(I-1)] = P[0:(I-1)]\n", 108 | " labels = np.zeros(I-1)\n", 109 | " for k in np.arange(I,len(P)+1):\n", 110 | " mu = np.mean(P[(k-I):k])\n", 111 | " sigma = np.std(P[(k-I):k])\n", 112 | " T = mu - N*sigma\n", 113 | " threshold = np.append(threshold,T)\n", 114 | " if (A[k-1] < threshold[k-1]) :\n", 115 | " labels = np.append(labels,1)\n", 116 | " else:\n", 117 | " labels = np.append(labels,0)\n", 118 | " return labels, threshold" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "t0 = time()\n", 130 | "np.random.seed(7)\n", 131 | "########################################################################################\n", 132 | "# Regression\n", 133 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 134 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 135 | "\n", 136 | "estimators = []\n", 137 | "estimators.append(('standardize', MinMaxScaler()))\n", 138 | "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n", 139 | "estimators.append(('SVM', SVR()))\n", 140 | "pipe = Pipeline(estimators)\n", 141 | " \n", 142 | "y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n", 143 | " \n", 144 | "p_grid = dict(FS__k = [8, 16],\n", 145 | " SVM__gamma = np.logspace(-3, 0, 4),\n", 146 | " SVM__C = np.logspace(0, 3, 4))\n", 147 | " \n", 148 | "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n", 149 | "model.fit(X_train, y_train_scaled)\n", 150 | " \n", 151 | "params = model.best_params_\n", 152 | "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n", 153 | " \n", 154 | "Y_Test_Pred_scaled = model.predict(X_test)\n", 155 | "Y_Test_Predicted = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n", 156 | " \n", 157 | "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n", 158 | "data_range = y_test.max() - y_test.min()\n", 159 | "NRMSE = (rmse/data_range) * 100.0\n", 160 | "RSQ = r2_score(y_test,Y_Test_Predicted)\n", 161 | "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 162 | "print(\"R-squared: %0.3f\" % RSQ)\n", 163 | "\n", 164 | "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n", 165 | "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n", 166 | " 'Threshold':Threshold, 'Pred_Labels': Labels})\n", 167 | "\n", 168 | "print(\"########################################################################################\")\n", 169 | "print(\"Confusion Matrix - testing:\")\n", 170 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 171 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 172 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 173 | "print(\"False positive means false alarms\")\n", 174 | "print(\"False Negative means missed faults\")\n", 175 | "print(\"########################################################################################\")\n", 176 | "print(\"Classification Report - testing:\")\n", 177 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 178 | "print(\"########################################################################################\")\n", 179 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 180 | "print(\"########################################################################################\")\n", 181 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 182 | "print(\"########################################################################################\")\n", 183 | "########################################################################################\n", 184 | "\n", 185 | "fig = plt.figure(figsize=(25,20))\n", 186 | "ax = fig.add_subplot(1, 1, 1)\n", 187 | "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n", 188 | "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n", 189 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n", 190 | " edgecolors='y', marker='o', label=u'Actual normal data')\n", 191 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n", 192 | " edgecolors='y', marker='^', label=u'Actual fault data')\n", 193 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 194 | "plt.xlabel('Data index',fontsize=30)\n", 195 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 196 | "plt.xticks(fontsize=30)\n", 197 | "plt.yticks(fontsize=30)\n", 198 | "plt.legend(loc='best',fontsize=30)\n", 199 | "plt.savefig('M0-Cond-Foul-Actual-Labels-Predictions')\n", 200 | "\n", 201 | "fig = plt.figure(figsize=(25,20))\n", 202 | "ax = fig.add_subplot(1, 1, 1)\n", 203 | "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n", 204 | "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n", 205 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n", 206 | " edgecolors='y', marker='o', label=u'Predicted normal data')\n", 207 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n", 208 | " edgecolors='y', marker='^', label=u'Predicted fault data')\n", 209 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 210 | "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n", 211 | "plt.xlabel('Data index',fontsize=30)\n", 212 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 213 | "plt.xticks(fontsize=30)\n", 214 | "plt.yticks(fontsize=30)\n", 215 | "plt.legend(loc='best',fontsize=30)\n", 216 | "plt.savefig('M0-Cond-Foul-SVM-Dynamic-Threshold-Predicted-Labels')\n", 217 | "\n", 218 | "t1 = time()\n", 219 | "print('Time taken for this trial %f' %(t1-t0))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [] 230 | } 231 | ], 232 | "metadata": { 233 | "anaconda-cloud": {}, 234 | "kernelspec": { 235 | "display_name": "Python [Root]", 236 | "language": "python", 237 | "name": "Python [Root]" 238 | }, 239 | "language_info": { 240 | "codemirror_mode": { 241 | "name": "ipython", 242 | "version": 3 243 | }, 244 | "file_extension": ".py", 245 | "mimetype": "text/x-python", 246 | "name": "python", 247 | "nbconvert_exporter": "python", 248 | "pygments_lexer": "ipython3", 249 | "version": "3.5.4" 250 | } 251 | }, 252 | "nbformat": 4, 253 | "nbformat_minor": 1 254 | } 255 | -------------------------------------------------------------------------------- /Chapter6-Case-Study/SL-10/Condenser_Foul_RF_Dynamic_Threshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import seaborn as sns\n", 16 | "sns.set(color_codes=True)\n", 17 | "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n", 18 | " accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n", 19 | "from time import time\n", 20 | "import scipy.stats as st\n", 21 | "from sklearn.feature_selection import RFE, RFECV\n", 22 | "from sklearn.ensemble import RandomForestRegressor\n", 23 | "from sklearn.pipeline import Pipeline\n", 24 | "import pprint as pp\n", 25 | "%matplotlib inline" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n", 48 | "Chiller_Data.reset_index(drop=True, inplace=True)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n", 60 | "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n", 61 | "Chiller_Data.dropna(axis=0,inplace=True)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "y = Chiller_Data['Target'].as_matrix()\n", 73 | "True_Labels = Chiller_Data['Label'].as_matrix()\n", 74 | "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n", 75 | "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n", 76 | "# 'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n", 77 | "Feature_Names = list(Chiller_Data)\n", 78 | "X = Chiller_Data[Feature_Names].as_matrix()" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "#################################################################################################\n", 90 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n", 91 | "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n", 92 | "#################################################################################################" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "def calc_dyn_threshold(A, P, I, N):\n", 104 | " # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n", 105 | " threshold = np.zeros(I-1)\n", 106 | " threshold[0:(I-1)] = P[0:(I-1)]\n", 107 | " labels = np.zeros(I-1)\n", 108 | " for k in np.arange(I,len(P)+1):\n", 109 | " mu = np.mean(P[(k-I):k])\n", 110 | " sigma = np.std(P[(k-I):k])\n", 111 | " T = mu - N*sigma\n", 112 | " threshold = np.append(threshold,T)\n", 113 | " if (A[k-1] < threshold[k-1]) :\n", 114 | " labels = np.append(labels,1)\n", 115 | " else:\n", 116 | " labels = np.append(labels,0)\n", 117 | " return labels, threshold" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "t0 = time()\n", 129 | "np.random.seed(7)\n", 130 | "########################################################################################\n", 131 | "# Regression\n", 132 | "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n", 133 | "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n", 134 | "\n", 135 | "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n", 136 | "FS_model = rfecv.fit(X_train, y_train)\n", 137 | "\n", 138 | "ranks = FS_model.ranking_\n", 139 | "FN =[]\n", 140 | "for i in range(len(ranks)):\n", 141 | " if ranks[i] == 1:\n", 142 | " FN.append(Feature_Names[i])\n", 143 | "print(FN)\n", 144 | "\n", 145 | "X = Chiller_Data[FN].as_matrix()\n", 146 | "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n", 147 | "\n", 148 | "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n", 149 | "p_grid = dict()\n", 150 | "p_grid = dict(n_estimators = NE)\n", 151 | "\n", 152 | "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, \n", 153 | " cv = kf, n_jobs=-1)\n", 154 | "model.fit(X_train, y_train)\n", 155 | " \n", 156 | "params = model.best_params_\n", 157 | "print(\"Best Est: %s\" % (params['n_estimators']))\n", 158 | " \n", 159 | "Y_Test_Predicted = model.predict(X_test)\n", 160 | " \n", 161 | "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n", 162 | "data_range = y_test.max() - y_test.min()\n", 163 | "NRMSE = (rmse/data_range) * 100.0\n", 164 | "RSQ = r2_score(y_test,Y_Test_Predicted)\n", 165 | "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n", 166 | "print(\"R-squared: %0.3f\" % RSQ)\n", 167 | "\n", 168 | "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n", 169 | "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n", 170 | " 'Threshold':Threshold, 'Pred_Labels': Labels})\n", 171 | "\n", 172 | "print(\"########################################################################################\")\n", 173 | "print(\"Confusion Matrix - testing:\")\n", 174 | "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n", 175 | "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n", 176 | "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n", 177 | "print(\"False positive means false alarms\")\n", 178 | "print(\"False Negative means missed faults\")\n", 179 | "print(\"########################################################################################\")\n", 180 | "print(\"Classification Report - testing:\")\n", 181 | "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n", 182 | "print(\"########################################################################################\")\n", 183 | "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n", 184 | "print(\"########################################################################################\")\n", 185 | "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n", 186 | "print(\"########################################################################################\")\n", 187 | "########################################################################################\n", 188 | "\n", 189 | "fig = plt.figure(figsize=(25,20))\n", 190 | "ax = fig.add_subplot(1, 1, 1)\n", 191 | "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n", 192 | "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n", 193 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n", 194 | " edgecolors='y', marker='o', label=u'Actual normal data')\n", 195 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n", 196 | " edgecolors='y', marker='^', label=u'Actual fault data')\n", 197 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 198 | "plt.xlabel('Data index',fontsize=30)\n", 199 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 200 | "plt.xticks(fontsize=30)\n", 201 | "plt.yticks(fontsize=30)\n", 202 | "plt.legend(loc='best',fontsize=30)\n", 203 | "plt.savefig('M2-Cond-Foul-Actual-Labels-Predictions')\n", 204 | "\n", 205 | "fig = plt.figure(figsize=(25,20))\n", 206 | "ax = fig.add_subplot(1, 1, 1)\n", 207 | "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n", 208 | "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n", 209 | "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n", 210 | " edgecolors='y', marker='o', label=u'Predicted normal data')\n", 211 | "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n", 212 | " edgecolors='y', marker='^', label=u'Predicted fault data')\n", 213 | "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n", 214 | "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n", 215 | "plt.xlabel('Data index',fontsize=30)\n", 216 | "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n", 217 | "plt.xticks(fontsize=30)\n", 218 | "plt.yticks(fontsize=30)\n", 219 | "plt.legend(loc='best',fontsize=30)\n", 220 | "plt.savefig('M2-Cond-Foul-RF-Dynamic-Threshold-Predicted-Labels')\n", 221 | "\n", 222 | "print(FN,(model.best_estimator_.feature_importances_))\n", 223 | "\n", 224 | "t1 = time()\n", 225 | "print('Time taken for this trial %f' %(t1-t0))" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": { 232 | "collapsed": true 233 | }, 234 | "outputs": [], 235 | "source": [] 236 | } 237 | ], 238 | "metadata": { 239 | "anaconda-cloud": {}, 240 | "kernelspec": { 241 | "display_name": "Python [Root]", 242 | "language": "python", 243 | "name": "Python [Root]" 244 | }, 245 | "language_info": { 246 | "codemirror_mode": { 247 | "name": "ipython", 248 | "version": 3 249 | }, 250 | "file_extension": ".py", 251 | "mimetype": "text/x-python", 252 | "name": "python", 253 | "nbconvert_exporter": "python", 254 | "pygments_lexer": "ipython3", 255 | "version": "3.5.4" 256 | } 257 | }, 258 | "nbformat": 4, 259 | "nbformat_minor": 1 260 | } 261 | --------------------------------------------------------------------------------