├── Chapter6-Case-Study
    ├── SL-10
    │   ├── cf12.xls
    │   ├── fwc10.xls
    │   ├── fwe10.xls
    │   ├── rl10.xls
    │   ├── normal1.xls
    │   ├── normal cf.xls
    │   ├── normal r.xls
    │   ├── Condenser_Fouling_Fault_Data.xlsx
    │   ├── Refrigerant_Leak_Fault_Data.xlsx
    │   ├── Reduced_Condenser_Water_Flow_Fault_Data.xlsx
    │   ├── Reduced_Evaporator_Water_Flow_Fault_Data.xlsx
    │   ├── Refrigerant_Leak_RF_Dynamic_Threshold.ipynb
    │   ├── Condenser_Foul_SVM_Dynamic_Threshold.ipynb
    │   └── Condenser_Foul_RF_Dynamic_Threshold.ipynb
    ├── SL-20
    │   ├── cf20.xls
    │   ├── fwc20.xls
    │   ├── fwe20.xls
    │   ├── rl20.xls
    │   ├── normal1.xls
    │   ├── normal cf.xls
    │   ├── normal r.xls
    │   ├── Condenser_Fouling_Fault_Data.xlsx
    │   ├── Refrigerant_Leak_Fault_Data.xlsx
    │   ├── Reduced_Condenser_Water_Flow_Fault_Data.xlsx
    │   ├── Reduced_Evaporator_Water_Flow_Fault_Data.xlsx
    │   ├── Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb
    │   ├── Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb
    │   ├── Refrigerant_Leak_RF_Dynamic_Threshold.ipynb
    │   └── Condenser_Foul_SVM_Dynamic_Threshold.ipynb
    └── SL-30
    │   ├── cf30.xls
    │   ├── fwc30.xls
    │   ├── fwe30.xls
    │   ├── rl30.xls
    │   ├── normal1.xls
    │   ├── normal cf.xls
    │   ├── normal r.xls
    │   ├── Condenser_Fouling_Fault_Data.xlsx
    │   ├── Refrigerant_Leak_Fault_Data.xlsx
    │   ├── Reduced_Condenser_Water_Flow_Fault_Data.xlsx
    │   ├── Reduced_Evaporator_Water_Flow_Fault_Data.xlsx
    │   ├── Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb
    │   ├── Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb
    │   ├── Refrigerant_Leak_RF_Dynamic_Threshold.ipynb
    │   └── Condenser_Foul_SVM_Dynamic_Threshold.ipynb
├── Chapter4-Detection-of-Faults
    ├── Climate_Data.xls
    ├── EnergyData_D1.xlsx
    ├── EnergyData_D2.xlsx
    ├── EnergyData_D3.xlsx
    └── EnergyData_D4.xlsx
├── Chapter5-Threshold-Comparison
    ├── Climate_Data.xls
    ├── EnergyData_D1.xlsx
    ├── EnergyData_D2.xlsx
    ├── EnergyData_D3.xlsx
    └── ~$Feature_Scores.xlsx
├── Chapter5-EnergyModel-Comparison
    ├── Climate_Data.xls
    ├── EnergyData_D1.xlsx
    ├── EnergyData_D2.xlsx
    ├── EnergyData_D3.xlsx
    ├── Energy_Modeling_ERF_D1.ipynb
    ├── Energy_Modeling_ERF_D2.ipynb
    ├── Energy_Modeling_ERF_D3.ipynb
    ├── Energy_Modeling_RF_D1.ipynb
    ├── Energy_Modeling_RF_D2.ipynb
    ├── Energy_Modeling_RF_D3.ipynb
    ├── Energy_Modeling_SVM_D1.ipynb
    ├── Energy_Modeling_SVM_D2.ipynb
    └── Energy_Modeling_SVM_D3.ipynb
└── README.md


/Chapter6-Case-Study/SL-10/cf12.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/cf12.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/fwc10.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/fwc10.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/fwe10.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/fwe10.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/rl10.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/rl10.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/cf20.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/cf20.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/fwc20.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/fwc20.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/fwe20.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/fwe20.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/rl20.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/rl20.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/cf30.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/cf30.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/fwc30.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/fwc30.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/fwe30.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/fwe30.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/rl30.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/rl30.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/normal1.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/normal1.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/normal1.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/normal1.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/normal1.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/normal1.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/normal cf.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/normal cf.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/normal r.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/normal r.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/normal cf.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/normal cf.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/normal r.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/normal r.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/normal cf.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/normal cf.xls


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/normal r.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/normal r.xls


--------------------------------------------------------------------------------
/Chapter4-Detection-of-Faults/Climate_Data.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/Climate_Data.xls


--------------------------------------------------------------------------------
/Chapter4-Detection-of-Faults/EnergyData_D1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D1.xlsx


--------------------------------------------------------------------------------
/Chapter4-Detection-of-Faults/EnergyData_D2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D2.xlsx


--------------------------------------------------------------------------------
/Chapter4-Detection-of-Faults/EnergyData_D3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D3.xlsx


--------------------------------------------------------------------------------
/Chapter4-Detection-of-Faults/EnergyData_D4.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter4-Detection-of-Faults/EnergyData_D4.xlsx


--------------------------------------------------------------------------------
/Chapter5-Threshold-Comparison/Climate_Data.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/Climate_Data.xls


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Climate_Data.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/Climate_Data.xls


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/EnergyData_D1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/EnergyData_D1.xlsx


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/EnergyData_D2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/EnergyData_D2.xlsx


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/EnergyData_D3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-EnergyModel-Comparison/EnergyData_D3.xlsx


--------------------------------------------------------------------------------
/Chapter5-Threshold-Comparison/EnergyData_D1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/EnergyData_D1.xlsx


--------------------------------------------------------------------------------
/Chapter5-Threshold-Comparison/EnergyData_D2.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/EnergyData_D2.xlsx


--------------------------------------------------------------------------------
/Chapter5-Threshold-Comparison/EnergyData_D3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/EnergyData_D3.xlsx


--------------------------------------------------------------------------------
/Chapter5-Threshold-Comparison/~$Feature_Scores.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter5-Threshold-Comparison/~$Feature_Scores.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Condenser_Fouling_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Condenser_Fouling_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Refrigerant_Leak_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Refrigerant_Leak_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Condenser_Fouling_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Condenser_Fouling_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Refrigerant_Leak_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Refrigerant_Leak_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Condenser_Fouling_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Condenser_Fouling_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Refrigerant_Leak_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Refrigerant_Leak_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Reduced_Condenser_Water_Flow_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Reduced_Condenser_Water_Flow_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Reduced_Condenser_Water_Flow_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Reduced_Condenser_Water_Flow_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Reduced_Condenser_Water_Flow_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Reduced_Condenser_Water_Flow_Fault_Data.xlsx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Fault-Detection-HVAC
2 | Python Source code and datasets used in my doctoral dissertation - Detection of faults in HVAC systems using tree-based ensemble models and dynamic thresholds
3 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-10/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-20/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DC-777/Fault-Detection-HVAC/HEAD/Chapter6-Case-Study/SL-30/Reduced_Evaporator_Water_Flow_Fault_Data.xlsx


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 18 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 19 |     "from time import time\n",
 20 |     "from sklearn.preprocessing import MinMaxScaler\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n",
 23 |     "from sklearn.svm import SVR\n",
 24 |     "from sklearn.pipeline import Pipeline\n",
 25 |     "import pprint as pp\n",
 26 |     "%matplotlib inline"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 49 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 61 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 62 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 63 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 64 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 65 |     "#Time_data = Chiller_Data['Time (minutes)']"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 77 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 78 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 79 |     "X = Chiller_Data.as_matrix()\n",
 80 |     "Feature_Names = list(Chiller_Data)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "collapsed": true
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "#################################################################################################\n",
 92 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 93 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 94 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
 95 |     "#################################################################################################"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
107 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
108 |     "    threshold_EPS = np.zeros(I-1)\n",
109 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
110 |     "    threshold_LMTD = np.zeros(I-1)\n",
111 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
112 |     "    labels = np.zeros(I-1)\n",
113 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
114 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
115 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
116 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
117 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
118 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
119 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
120 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
121 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
122 |     "        \n",
123 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
124 |     "            labels = np.append(labels,1)\n",
125 |     "        else:\n",
126 |     "            labels = np.append(labels,0)\n",
127 |     "    return labels, threshold_EPS, threshold_LMTD"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": false
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "t0 = time()\n",
139 |     "np.random.seed(7)\n",
140 |     "########################################################################################\n",
141 |     "# Regression\n",
142 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
143 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
144 |     "Y_Test_Pred_scaled = np.zeros((len(y_test),2))\n",
145 |     "\n",
146 |     "scaler = MinMaxScaler()\n",
147 |     "scaler.fit(y_train)\n",
148 |     "y_train_scaled = scaler.transform(y_train)\n",
149 |     "\n",
150 |     "estimators = []\n",
151 |     "estimators.append(('standardize', MinMaxScaler()))\n",
152 |     "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
153 |     "estimators.append(('SVM', SVR()))\n",
154 |     "pipe = Pipeline(estimators)\n",
155 |     "    \n",
156 |     "p_grid = dict(FS__k = [8, 16],\n",
157 |     "              SVM__gamma = np.logspace(-3, 0, 4),\n",
158 |     "              SVM__C = np.logspace(0, 3, 4))\n",
159 |     "    \n",
160 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
161 |     "model.fit(X_train, y_train_scaled[:,0])\n",
162 |     "    \n",
163 |     "params = model.best_params_\n",
164 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
165 |     "    \n",
166 |     "Y_Test_Pred_scaled[:,0] = model.predict(X_test)\n",
167 |     "\n",
168 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train_scaled[:,1])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
173 |     "    \n",
174 |     "Y_Test_Pred_scaled[:,1] = model.predict(X_test)\n",
175 |     "Y_Test_Pred = scaler.inverse_transform(Y_Test_Pred_scaled)\n",
176 |     "\n",
177 |     "P_EPS = Y_Test_Pred[:,0]\n",
178 |     "P_LMTD = Y_Test_Pred[:,1]\n",
179 |     "    \n",
180 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
181 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
182 |     "\n",
183 |     "print(\"########################################################################################\")\n",
184 |     "print(\"Confusion Matrix - testing:\")\n",
185 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
186 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
187 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
188 |     "print(\"False positive means false alarms\")\n",
189 |     "print(\"False Negative means missed faults\")\n",
190 |     "print(\"########################################################################################\")\n",
191 |     "print(\"Classification Report - testing:\")\n",
192 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
193 |     "print(\"########################################################################################\")\n",
194 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
195 |     "print(\"########################################################################################\")\n",
196 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
197 |     "print(\"########################################################################################\")\n",
198 |     "########################################################################################\n",
199 |     "    \n",
200 |     "t1 = time()\n",
201 |     "print('Time taken for this trial %f' %(t1-t0))"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "anaconda-cloud": {},
216 |   "kernelspec": {
217 |    "display_name": "Python [Root]",
218 |    "language": "python",
219 |    "name": "Python [Root]"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.5.4"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 1
236 | }
237 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Refrigerant_Leak_SVM_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 18 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 19 |     "from time import time\n",
 20 |     "from sklearn.preprocessing import MinMaxScaler\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n",
 23 |     "from sklearn.svm import SVR\n",
 24 |     "from sklearn.pipeline import Pipeline\n",
 25 |     "import pprint as pp\n",
 26 |     "%matplotlib inline"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 49 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 61 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 62 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 63 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 64 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 65 |     "#Time_data = Chiller_Data['Time (minutes)']"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 77 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 78 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 79 |     "X = Chiller_Data.as_matrix()\n",
 80 |     "Feature_Names = list(Chiller_Data)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "collapsed": true
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "#################################################################################################\n",
 92 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 93 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 94 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
 95 |     "#################################################################################################"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
107 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
108 |     "    threshold_EPS = np.zeros(I-1)\n",
109 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
110 |     "    threshold_LMTD = np.zeros(I-1)\n",
111 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
112 |     "    labels = np.zeros(I-1)\n",
113 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
114 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
115 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
116 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
117 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
118 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
119 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
120 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
121 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
122 |     "        \n",
123 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
124 |     "            labels = np.append(labels,1)\n",
125 |     "        else:\n",
126 |     "            labels = np.append(labels,0)\n",
127 |     "    return labels, threshold_EPS, threshold_LMTD"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": false
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "t0 = time()\n",
139 |     "np.random.seed(7)\n",
140 |     "########################################################################################\n",
141 |     "# Regression\n",
142 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
143 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
144 |     "Y_Test_Pred_scaled = np.zeros((len(y_test),2))\n",
145 |     "\n",
146 |     "scaler = MinMaxScaler()\n",
147 |     "scaler.fit(y_train)\n",
148 |     "y_train_scaled = scaler.transform(y_train)\n",
149 |     "\n",
150 |     "estimators = []\n",
151 |     "estimators.append(('standardize', MinMaxScaler()))\n",
152 |     "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
153 |     "estimators.append(('SVM', SVR()))\n",
154 |     "pipe = Pipeline(estimators)\n",
155 |     "    \n",
156 |     "p_grid = dict(FS__k = [8, 16],\n",
157 |     "              SVM__gamma = np.logspace(-3, 0, 4),\n",
158 |     "              SVM__C = np.logspace(0, 3, 4))\n",
159 |     "    \n",
160 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
161 |     "model.fit(X_train, y_train_scaled[:,0])\n",
162 |     "    \n",
163 |     "params = model.best_params_\n",
164 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
165 |     "    \n",
166 |     "Y_Test_Pred_scaled[:,0] = model.predict(X_test)\n",
167 |     "\n",
168 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train_scaled[:,1])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
173 |     "    \n",
174 |     "Y_Test_Pred_scaled[:,1] = model.predict(X_test)\n",
175 |     "Y_Test_Pred = scaler.inverse_transform(Y_Test_Pred_scaled)\n",
176 |     "\n",
177 |     "P_EPS = Y_Test_Pred[:,0]\n",
178 |     "P_LMTD = Y_Test_Pred[:,1]\n",
179 |     "    \n",
180 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
181 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
182 |     "\n",
183 |     "print(\"########################################################################################\")\n",
184 |     "print(\"Confusion Matrix - testing:\")\n",
185 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
186 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
187 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
188 |     "print(\"False positive means false alarms\")\n",
189 |     "print(\"False Negative means missed faults\")\n",
190 |     "print(\"########################################################################################\")\n",
191 |     "print(\"Classification Report - testing:\")\n",
192 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
193 |     "print(\"########################################################################################\")\n",
194 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
195 |     "print(\"########################################################################################\")\n",
196 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
197 |     "print(\"########################################################################################\")\n",
198 |     "########################################################################################\n",
199 |     "    \n",
200 |     "t1 = time()\n",
201 |     "print('Time taken for this trial %f' %(t1-t0))"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "anaconda-cloud": {},
216 |   "kernelspec": {
217 |    "display_name": "Python [Root]",
218 |    "language": "python",
219 |    "name": "Python [Root]"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.5.4"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 1
236 | }
237 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n",
 14 |     "                                    train_test_split\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "import seaborn as sns\n",
 17 |     "sns.set(color_codes=True)\n",
 18 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 19 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 20 |     "from time import time\n",
 21 |     "from sklearn.preprocessing import MinMaxScaler\n",
 22 |     "from sklearn.preprocessing import quantile_transform\n",
 23 |     "import scipy.stats as st\n",
 24 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 25 |     "from xgboost import XGBRegressor\n",
 26 |     "from sklearn.ensemble import ExtraTreesRegressor\n",
 27 |     "from sklearn.pipeline import Pipeline\n",
 28 |     "from sklearn.multioutput import MultiOutputRegressor\n",
 29 |     "import pprint as pp\n",
 30 |     "import datetime\n",
 31 |     "%matplotlib inline"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 54 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 66 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 67 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 68 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 69 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#Time_data = Chiller_Data['Time (minutes)']"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 82 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 83 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 84 |     "X = Chiller_Data.as_matrix()\n",
 85 |     "Feature_Names = list(Chiller_Data)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#################################################################################################\n",
 97 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 98 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 99 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
100 |     "#################################################################################################"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
112 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
113 |     "    threshold_EPS = np.zeros(I-1)\n",
114 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
115 |     "    threshold_LMTD = np.zeros(I-1)\n",
116 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
117 |     "    labels = np.zeros(I-1)\n",
118 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
119 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
120 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
121 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
122 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
123 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
124 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
125 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
126 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
127 |     "        \n",
128 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
129 |     "            labels = np.append(labels,1)\n",
130 |     "        else:\n",
131 |     "            labels = np.append(labels,0)\n",
132 |     "    return labels, threshold_EPS, threshold_LMTD"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "t0 = time()\n",
144 |     "np.random.seed(7)\n",
145 |     "########################################################################################\n",
146 |     "# Regression\n",
147 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
148 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
149 |     "\n",
150 |     "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
151 |     "FS_model = rfecv.fit(X_train, y_train[:,0])\n",
152 |     "\n",
153 |     "ranks = FS_model.ranking_\n",
154 |     "FN =[]\n",
155 |     "for i in range(len(ranks)):\n",
156 |     "    if ranks[i] == 1:\n",
157 |     "        FN.append(Feature_Names[i])\n",
158 |     "print(FN)\n",
159 |     "\n",
160 |     "X = Chiller_Data[FN].as_matrix()\n",
161 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
162 |     "\n",
163 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
164 |     "p_grid = dict()\n",
165 |     "p_grid = dict(n_estimators = NE)\n",
166 |     "\n",
167 |     "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
168 |     "                     n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train[:,0])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
173 |     "    \n",
174 |     "P_EPS = model.predict(X_test)\n",
175 |     "\n",
176 |     "######################################################################################################\n",
177 |     "\n",
178 |     "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
179 |     "FS_model = rfecv.fit(X_train, y_train[:,1])\n",
180 |     "\n",
181 |     "ranks = FS_model.ranking_\n",
182 |     "FN =[]\n",
183 |     "for i in range(len(ranks)):\n",
184 |     "    if ranks[i] == 1:\n",
185 |     "        FN.append(Feature_Names[i])\n",
186 |     "print(FN)\n",
187 |     "\n",
188 |     "X = Chiller_Data[FN].as_matrix()\n",
189 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
190 |     "\n",
191 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
192 |     "p_grid = dict()\n",
193 |     "p_grid = dict(n_estimators = NE)\n",
194 |     "\n",
195 |     "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
196 |     "                     n_jobs=-1)\n",
197 |     "model.fit(X_train, y_train[:,1])\n",
198 |     "    \n",
199 |     "params = model.best_params_\n",
200 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
201 |     "    \n",
202 |     "P_LMTD = model.predict(X_test)\n",
203 |     "\n",
204 |     "\n",
205 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
206 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
207 |     "\n",
208 |     "print(\"########################################################################################\")\n",
209 |     "print(\"Confusion Matrix - testing:\")\n",
210 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
211 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
212 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
213 |     "print(\"False positive means false alarms\")\n",
214 |     "print(\"False Negative means missed faults\")\n",
215 |     "print(\"########################################################################################\")\n",
216 |     "print(\"Classification Report - testing:\")\n",
217 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
218 |     "print(\"########################################################################################\")\n",
219 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
220 |     "print(\"########################################################################################\")\n",
221 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
222 |     "print(\"########################################################################################\")\n",
223 |     "########################################################################################\n",
224 |     "    \n",
225 |     "t1 = time()\n",
226 |     "print('Time taken for this trial %f' %(t1-t0))"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": true
234 |    },
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "anaconda-cloud": {},
241 |   "kernelspec": {
242 |    "display_name": "Python [Root]",
243 |    "language": "python",
244 |    "name": "Python [Root]"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.5.4"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 1
261 | }
262 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Refrigerant_Leak_ERF_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n",
 14 |     "                                    train_test_split\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "import seaborn as sns\n",
 17 |     "sns.set(color_codes=True)\n",
 18 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 19 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 20 |     "from time import time\n",
 21 |     "from sklearn.preprocessing import MinMaxScaler\n",
 22 |     "from sklearn.preprocessing import quantile_transform\n",
 23 |     "import scipy.stats as st\n",
 24 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 25 |     "from xgboost import XGBRegressor\n",
 26 |     "from sklearn.ensemble import ExtraTreesRegressor\n",
 27 |     "from sklearn.pipeline import Pipeline\n",
 28 |     "from sklearn.multioutput import MultiOutputRegressor\n",
 29 |     "import pprint as pp\n",
 30 |     "import datetime\n",
 31 |     "%matplotlib inline"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 54 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 66 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 67 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 68 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 69 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#Time_data = Chiller_Data['Time (minutes)']"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 82 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 83 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 84 |     "X = Chiller_Data.as_matrix()\n",
 85 |     "Feature_Names = list(Chiller_Data)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#################################################################################################\n",
 97 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 98 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 99 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
100 |     "#################################################################################################"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
112 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
113 |     "    threshold_EPS = np.zeros(I-1)\n",
114 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
115 |     "    threshold_LMTD = np.zeros(I-1)\n",
116 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
117 |     "    labels = np.zeros(I-1)\n",
118 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
119 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
120 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
121 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
122 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
123 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
124 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
125 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
126 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
127 |     "        \n",
128 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
129 |     "            labels = np.append(labels,1)\n",
130 |     "        else:\n",
131 |     "            labels = np.append(labels,0)\n",
132 |     "    return labels, threshold_EPS, threshold_LMTD"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "t0 = time()\n",
144 |     "np.random.seed(7)\n",
145 |     "########################################################################################\n",
146 |     "# Regression\n",
147 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
148 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
149 |     "\n",
150 |     "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
151 |     "FS_model = rfecv.fit(X_train, y_train[:,0])\n",
152 |     "\n",
153 |     "ranks = FS_model.ranking_\n",
154 |     "FN =[]\n",
155 |     "for i in range(len(ranks)):\n",
156 |     "    if ranks[i] == 1:\n",
157 |     "        FN.append(Feature_Names[i])\n",
158 |     "print(FN)\n",
159 |     "\n",
160 |     "X = Chiller_Data[FN].as_matrix()\n",
161 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
162 |     "\n",
163 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
164 |     "p_grid = dict()\n",
165 |     "p_grid = dict(n_estimators = NE)\n",
166 |     "\n",
167 |     "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
168 |     "                     n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train[:,0])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
173 |     "    \n",
174 |     "P_EPS = model.predict(X_test)\n",
175 |     "\n",
176 |     "######################################################################################################\n",
177 |     "\n",
178 |     "rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
179 |     "FS_model = rfecv.fit(X_train, y_train[:,1])\n",
180 |     "\n",
181 |     "ranks = FS_model.ranking_\n",
182 |     "FN =[]\n",
183 |     "for i in range(len(ranks)):\n",
184 |     "    if ranks[i] == 1:\n",
185 |     "        FN.append(Feature_Names[i])\n",
186 |     "print(FN)\n",
187 |     "\n",
188 |     "X = Chiller_Data[FN].as_matrix()\n",
189 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
190 |     "\n",
191 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
192 |     "p_grid = dict()\n",
193 |     "p_grid = dict(n_estimators = NE)\n",
194 |     "\n",
195 |     "model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
196 |     "                     n_jobs=-1)\n",
197 |     "model.fit(X_train, y_train[:,1])\n",
198 |     "    \n",
199 |     "params = model.best_params_\n",
200 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
201 |     "    \n",
202 |     "P_LMTD = model.predict(X_test)\n",
203 |     "\n",
204 |     "\n",
205 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
206 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
207 |     "\n",
208 |     "print(\"########################################################################################\")\n",
209 |     "print(\"Confusion Matrix - testing:\")\n",
210 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
211 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
212 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
213 |     "print(\"False positive means false alarms\")\n",
214 |     "print(\"False Negative means missed faults\")\n",
215 |     "print(\"########################################################################################\")\n",
216 |     "print(\"Classification Report - testing:\")\n",
217 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
218 |     "print(\"########################################################################################\")\n",
219 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
220 |     "print(\"########################################################################################\")\n",
221 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
222 |     "print(\"########################################################################################\")\n",
223 |     "########################################################################################\n",
224 |     "    \n",
225 |     "t1 = time()\n",
226 |     "print('Time taken for this trial %f' %(t1-t0))"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": true
234 |    },
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "anaconda-cloud": {},
241 |   "kernelspec": {
242 |    "display_name": "Python [Root]",
243 |    "language": "python",
244 |    "name": "Python [Root]"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.5.4"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 1
261 | }
262 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Refrigerant_Leak_RF_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n",
 14 |     "                                    train_test_split\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "import seaborn as sns\n",
 17 |     "sns.set(color_codes=True)\n",
 18 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 19 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 20 |     "from time import time\n",
 21 |     "from sklearn.preprocessing import MinMaxScaler\n",
 22 |     "from sklearn.preprocessing import quantile_transform\n",
 23 |     "import scipy.stats as st\n",
 24 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 25 |     "from xgboost import XGBRegressor\n",
 26 |     "from sklearn.ensemble import RandomForestRegressor\n",
 27 |     "from sklearn.pipeline import Pipeline\n",
 28 |     "from sklearn.multioutput import MultiOutputRegressor\n",
 29 |     "import pprint as pp\n",
 30 |     "import datetime\n",
 31 |     "%matplotlib inline"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 54 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 66 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 67 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 68 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 69 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#Time_data = Chiller_Data['Time (minutes)']"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 82 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 83 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 84 |     "X = Chiller_Data.as_matrix()\n",
 85 |     "Feature_Names = list(Chiller_Data)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#################################################################################################\n",
 97 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 98 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 99 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
100 |     "#################################################################################################"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
112 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
113 |     "    threshold_EPS = np.zeros(I-1)\n",
114 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
115 |     "    threshold_LMTD = np.zeros(I-1)\n",
116 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
117 |     "    labels = np.zeros(I-1)\n",
118 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
119 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
120 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
121 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
122 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
123 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
124 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
125 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
126 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
127 |     "        \n",
128 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
129 |     "            labels = np.append(labels,1)\n",
130 |     "        else:\n",
131 |     "            labels = np.append(labels,0)\n",
132 |     "    return labels, threshold_EPS, threshold_LMTD"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "t0 = time()\n",
144 |     "np.random.seed(7)\n",
145 |     "########################################################################################\n",
146 |     "# Regression\n",
147 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
148 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
149 |     "\n",
150 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
151 |     "FS_model = rfecv.fit(X_train, y_train[:,0])\n",
152 |     "\n",
153 |     "ranks = FS_model.ranking_\n",
154 |     "FN =[]\n",
155 |     "for i in range(len(ranks)):\n",
156 |     "    if ranks[i] == 1:\n",
157 |     "        FN.append(Feature_Names[i])\n",
158 |     "print(FN)\n",
159 |     "\n",
160 |     "X = Chiller_Data[FN].as_matrix()\n",
161 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
162 |     "\n",
163 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
164 |     "p_grid = dict()\n",
165 |     "p_grid = dict(n_estimators = NE)\n",
166 |     "\n",
167 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
168 |     "                     n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train[:,0])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
173 |     "    \n",
174 |     "P_EPS = model.predict(X_test)\n",
175 |     "\n",
176 |     "######################################################################################################\n",
177 |     "\n",
178 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
179 |     "FS_model = rfecv.fit(X_train, y_train[:,1])\n",
180 |     "\n",
181 |     "ranks = FS_model.ranking_\n",
182 |     "FN =[]\n",
183 |     "for i in range(len(ranks)):\n",
184 |     "    if ranks[i] == 1:\n",
185 |     "        FN.append(Feature_Names[i])\n",
186 |     "print(FN)\n",
187 |     "\n",
188 |     "X = Chiller_Data[FN].as_matrix()\n",
189 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
190 |     "\n",
191 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
192 |     "p_grid = dict()\n",
193 |     "p_grid = dict(n_estimators = NE)\n",
194 |     "\n",
195 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
196 |     "                     n_jobs=-1)\n",
197 |     "model.fit(X_train, y_train[:,1])\n",
198 |     "    \n",
199 |     "params = model.best_params_\n",
200 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
201 |     "    \n",
202 |     "P_LMTD = model.predict(X_test)\n",
203 |     "\n",
204 |     "\n",
205 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
206 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
207 |     "\n",
208 |     "print(\"########################################################################################\")\n",
209 |     "print(\"Confusion Matrix - testing:\")\n",
210 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
211 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
212 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
213 |     "print(\"False positive means false alarms\")\n",
214 |     "print(\"False Negative means missed faults\")\n",
215 |     "print(\"########################################################################################\")\n",
216 |     "print(\"Classification Report - testing:\")\n",
217 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
218 |     "print(\"########################################################################################\")\n",
219 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
220 |     "print(\"########################################################################################\")\n",
221 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
222 |     "print(\"########################################################################################\")\n",
223 |     "########################################################################################\n",
224 |     "    \n",
225 |     "t1 = time()\n",
226 |     "print('Time taken for this trial %f' %(t1-t0))"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": true
234 |    },
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "anaconda-cloud": {},
241 |   "kernelspec": {
242 |    "display_name": "Python [Root]",
243 |    "language": "python",
244 |    "name": "Python [Root]"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.5.4"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 1
261 | }
262 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Refrigerant_Leak_RF_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n",
 14 |     "                                    train_test_split\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "import seaborn as sns\n",
 17 |     "sns.set(color_codes=True)\n",
 18 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 19 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 20 |     "from time import time\n",
 21 |     "from sklearn.preprocessing import MinMaxScaler\n",
 22 |     "from sklearn.preprocessing import quantile_transform\n",
 23 |     "import scipy.stats as st\n",
 24 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 25 |     "from xgboost import XGBRegressor\n",
 26 |     "from sklearn.ensemble import RandomForestRegressor\n",
 27 |     "from sklearn.pipeline import Pipeline\n",
 28 |     "from sklearn.multioutput import MultiOutputRegressor\n",
 29 |     "import pprint as pp\n",
 30 |     "import datetime\n",
 31 |     "%matplotlib inline"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 54 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 66 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 67 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 68 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 69 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#Time_data = Chiller_Data['Time (minutes)']"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 82 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 83 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 84 |     "X = Chiller_Data.as_matrix()\n",
 85 |     "Feature_Names = list(Chiller_Data)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#################################################################################################\n",
 97 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 98 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 99 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
100 |     "#################################################################################################"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
112 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
113 |     "    threshold_EPS = np.zeros(I-1)\n",
114 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
115 |     "    threshold_LMTD = np.zeros(I-1)\n",
116 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
117 |     "    labels = np.zeros(I-1)\n",
118 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
119 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
120 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
121 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
122 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
123 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
124 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
125 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
126 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
127 |     "        \n",
128 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
129 |     "            labels = np.append(labels,1)\n",
130 |     "        else:\n",
131 |     "            labels = np.append(labels,0)\n",
132 |     "    return labels, threshold_EPS, threshold_LMTD"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "t0 = time()\n",
144 |     "np.random.seed(7)\n",
145 |     "########################################################################################\n",
146 |     "# Regression\n",
147 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
148 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
149 |     "\n",
150 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
151 |     "FS_model = rfecv.fit(X_train, y_train[:,0])\n",
152 |     "\n",
153 |     "ranks = FS_model.ranking_\n",
154 |     "FN =[]\n",
155 |     "for i in range(len(ranks)):\n",
156 |     "    if ranks[i] == 1:\n",
157 |     "        FN.append(Feature_Names[i])\n",
158 |     "print(FN)\n",
159 |     "\n",
160 |     "X = Chiller_Data[FN].as_matrix()\n",
161 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
162 |     "\n",
163 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
164 |     "p_grid = dict()\n",
165 |     "p_grid = dict(n_estimators = NE)\n",
166 |     "\n",
167 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
168 |     "                     n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train[:,0])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
173 |     "    \n",
174 |     "P_EPS = model.predict(X_test)\n",
175 |     "\n",
176 |     "######################################################################################################\n",
177 |     "\n",
178 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
179 |     "FS_model = rfecv.fit(X_train, y_train[:,1])\n",
180 |     "\n",
181 |     "ranks = FS_model.ranking_\n",
182 |     "FN =[]\n",
183 |     "for i in range(len(ranks)):\n",
184 |     "    if ranks[i] == 1:\n",
185 |     "        FN.append(Feature_Names[i])\n",
186 |     "print(FN)\n",
187 |     "\n",
188 |     "X = Chiller_Data[FN].as_matrix()\n",
189 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
190 |     "\n",
191 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
192 |     "p_grid = dict()\n",
193 |     "p_grid = dict(n_estimators = NE)\n",
194 |     "\n",
195 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
196 |     "                     n_jobs=-1)\n",
197 |     "model.fit(X_train, y_train[:,1])\n",
198 |     "    \n",
199 |     "params = model.best_params_\n",
200 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
201 |     "    \n",
202 |     "P_LMTD = model.predict(X_test)\n",
203 |     "\n",
204 |     "\n",
205 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
206 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
207 |     "\n",
208 |     "print(\"########################################################################################\")\n",
209 |     "print(\"Confusion Matrix - testing:\")\n",
210 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
211 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
212 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
213 |     "print(\"False positive means false alarms\")\n",
214 |     "print(\"False Negative means missed faults\")\n",
215 |     "print(\"########################################################################################\")\n",
216 |     "print(\"Classification Report - testing:\")\n",
217 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
218 |     "print(\"########################################################################################\")\n",
219 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
220 |     "print(\"########################################################################################\")\n",
221 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
222 |     "print(\"########################################################################################\")\n",
223 |     "########################################################################################\n",
224 |     "    \n",
225 |     "t1 = time()\n",
226 |     "print('Time taken for this trial %f' %(t1-t0))"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": true
234 |    },
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "anaconda-cloud": {},
241 |   "kernelspec": {
242 |    "display_name": "Python [Root]",
243 |    "language": "python",
244 |    "name": "Python [Root]"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.5.4"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 1
261 | }
262 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Refrigerant_Leak_RF_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, \\\n",
 14 |     "                                    train_test_split\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "import seaborn as sns\n",
 17 |     "sns.set(color_codes=True)\n",
 18 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 19 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 20 |     "from time import time\n",
 21 |     "from sklearn.preprocessing import MinMaxScaler\n",
 22 |     "from sklearn.preprocessing import quantile_transform\n",
 23 |     "import scipy.stats as st\n",
 24 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 25 |     "from xgboost import XGBRegressor\n",
 26 |     "from sklearn.ensemble import RandomForestRegressor\n",
 27 |     "from sklearn.pipeline import Pipeline\n",
 28 |     "from sklearn.multioutput import MultiOutputRegressor\n",
 29 |     "import pprint as pp\n",
 30 |     "import datetime\n",
 31 |     "%matplotlib inline"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 54 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 66 |     "Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))\n",
 67 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))\n",
 68 |     "Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))\n",
 69 |     "Chiller_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#Time_data = Chiller_Data['Time (minutes)']"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()\n",
 82 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 83 |     "Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)\n",
 84 |     "X = Chiller_Data.as_matrix()\n",
 85 |     "Feature_Names = list(Chiller_Data)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#################################################################################################\n",
 97 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 98 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 99 |     "#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)\n",
100 |     "#################################################################################################"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):\n",
112 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
113 |     "    threshold_EPS = np.zeros(I-1)\n",
114 |     "    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]\n",
115 |     "    threshold_LMTD = np.zeros(I-1)\n",
116 |     "    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]\n",
117 |     "    labels = np.zeros(I-1)\n",
118 |     "    for k in np.arange(I,len(P_EPS)+1):\n",
119 |     "        mu_EPS = np.mean(P_EPS[(k-I):k])\n",
120 |     "        sigma_EPS = np.std(P_EPS[(k-I):k])\n",
121 |     "        T_EPS = mu_EPS - N*sigma_EPS\n",
122 |     "        threshold_EPS = np.append(threshold_EPS,T_EPS)\n",
123 |     "        mu_LMTD = np.mean(P_LMTD[(k-I):k])\n",
124 |     "        sigma_LMTD = np.std(P_LMTD[(k-I):k])\n",
125 |     "        T_LMTD = mu_LMTD - N*sigma_LMTD\n",
126 |     "        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)\n",
127 |     "        \n",
128 |     "        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :\n",
129 |     "            labels = np.append(labels,1)\n",
130 |     "        else:\n",
131 |     "            labels = np.append(labels,0)\n",
132 |     "    return labels, threshold_EPS, threshold_LMTD"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "t0 = time()\n",
144 |     "np.random.seed(7)\n",
145 |     "########################################################################################\n",
146 |     "# Regression\n",
147 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
148 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
149 |     "\n",
150 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
151 |     "FS_model = rfecv.fit(X_train, y_train[:,0])\n",
152 |     "\n",
153 |     "ranks = FS_model.ranking_\n",
154 |     "FN =[]\n",
155 |     "for i in range(len(ranks)):\n",
156 |     "    if ranks[i] == 1:\n",
157 |     "        FN.append(Feature_Names[i])\n",
158 |     "print(FN)\n",
159 |     "\n",
160 |     "X = Chiller_Data[FN].as_matrix()\n",
161 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
162 |     "\n",
163 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
164 |     "p_grid = dict()\n",
165 |     "p_grid = dict(n_estimators = NE)\n",
166 |     "\n",
167 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
168 |     "                     n_jobs=-1)\n",
169 |     "model.fit(X_train, y_train[:,0])\n",
170 |     "    \n",
171 |     "params = model.best_params_\n",
172 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
173 |     "    \n",
174 |     "P_EPS = model.predict(X_test)\n",
175 |     "\n",
176 |     "######################################################################################################\n",
177 |     "\n",
178 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
179 |     "FS_model = rfecv.fit(X_train, y_train[:,1])\n",
180 |     "\n",
181 |     "ranks = FS_model.ranking_\n",
182 |     "FN =[]\n",
183 |     "for i in range(len(ranks)):\n",
184 |     "    if ranks[i] == 1:\n",
185 |     "        FN.append(Feature_Names[i])\n",
186 |     "print(FN)\n",
187 |     "\n",
188 |     "X = Chiller_Data[FN].as_matrix()\n",
189 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
190 |     "\n",
191 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
192 |     "p_grid = dict()\n",
193 |     "p_grid = dict(n_estimators = NE)\n",
194 |     "\n",
195 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, cv = kf, \n",
196 |     "                     n_jobs=-1)\n",
197 |     "model.fit(X_train, y_train[:,1])\n",
198 |     "    \n",
199 |     "params = model.best_params_\n",
200 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
201 |     "    \n",
202 |     "P_LMTD = model.predict(X_test)\n",
203 |     "\n",
204 |     "\n",
205 |     "Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)\n",
206 |     "Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})\n",
207 |     "\n",
208 |     "print(\"########################################################################################\")\n",
209 |     "print(\"Confusion Matrix - testing:\")\n",
210 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
211 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
212 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
213 |     "print(\"False positive means false alarms\")\n",
214 |     "print(\"False Negative means missed faults\")\n",
215 |     "print(\"########################################################################################\")\n",
216 |     "print(\"Classification Report - testing:\")\n",
217 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
218 |     "print(\"########################################################################################\")\n",
219 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
220 |     "print(\"########################################################################################\")\n",
221 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
222 |     "print(\"########################################################################################\")\n",
223 |     "########################################################################################\n",
224 |     "    \n",
225 |     "t1 = time()\n",
226 |     "print('Time taken for this trial %f' %(t1-t0))"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": true
234 |    },
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "anaconda-cloud": {},
241 |   "kernelspec": {
242 |    "display_name": "Python [Root]",
243 |    "language": "python",
244 |    "name": "Python [Root]"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.5.4"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 1
261 | }
262 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_ERF_D1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 23 |     "from sklearn.ensemble import ExtraTreesRegressor\n",
 24 |     "from sklearn.decomposition import PCA\n",
 25 |     "from sklearn.pipeline import Pipeline\n",
 26 |     "import pprint as pp\n",
 27 |     "import datetime\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 40 |     "#######################################################################################################################\n",
 41 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 42 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 43 |     "                                     'Temperature', 'Relative Humidity']]\n",
 44 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 45 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 46 |     "#######################################################################################################################\n",
 47 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 48 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 49 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 50 |     "#######################################################################################################################\n",
 51 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 52 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 53 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 54 |     "#######################################################################################################################\n",
 55 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 56 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 57 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 58 |     "#######################################################################################################################\n",
 59 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 60 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 61 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 62 |     "#######################################################################################################################\n",
 63 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 64 |     "Energy_Data.reset_index(inplace=True)\n",
 65 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D1.xlsx')\n",
 66 |     "#######################################################################################################################\n",
 67 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 68 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 69 |     "#######################################################################################################################\n",
 70 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 71 |     "                                                        'day': Energy_Data['Day of Month']}))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 83 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 84 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 85 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 86 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 87 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 88 |     "\n",
 89 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 90 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 91 |     "date_time = Energy_Data['Date_Time']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "#################################################################################################\n",
103 |     "# To test anomaly detector\n",
104 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
105 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
106 |     "#################################################################################################"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def energymodel_RF():\n",
118 |     "    t0 = time()\n",
119 |     "    np.random.seed(7)\n",
120 |     "    ########################################################################################\n",
121 |     "    # Regression\n",
122 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
123 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
124 |     "    \n",
125 |     "    rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n",
126 |     "    FS_model = rfecv.fit(X_train, y_train)\n",
127 |     "    \n",
128 |     "    ranks = FS_model.ranking_\n",
129 |     "    FN =[]\n",
130 |     "    for i in range(len(ranks)):\n",
131 |     "        if ranks[i] == 1:\n",
132 |     "            FN.append(Feature_Names[i])    \n",
133 |     "    print(FN)\n",
134 |     "    \n",
135 |     "    X = Energy_Data[FN].as_matrix()\n",
136 |     "    X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n",
137 |     "    \n",
138 |     "    p_grid = dict()\n",
139 |     "    p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n",
140 |     "    \n",
141 |     "    model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), \n",
142 |     "                         param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
143 |     "    model.fit(X_train_transformed, y_train)\n",
144 |     "    \n",
145 |     "    params = model.best_params_\n",
146 |     "    print(\"Best Est: %s\" % (params['n_estimators']))\n",
147 |     "    \n",
148 |     "    Y_Test_Pred = model.predict(X_test_transformed)\n",
149 |     "    \n",
150 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
151 |     "    data_range = y_test.max() - y_test.min()\n",
152 |     "    NRMSE = (rmse/data_range) * 100.0\n",
153 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
154 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
155 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
156 |     "    \n",
157 |     "    fig = plt.figure(figsize=(30,20))\n",
158 |     "    ax = fig.add_subplot(1, 1, 1)\n",
159 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
160 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
161 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
162 |     "    plt.xticks(fontsize=25)\n",
163 |     "    plt.yticks(fontsize=25)\n",
164 |     "    plt.savefig('Scatter-Target-vs-Pred-ET-D1')\n",
165 |     "    \n",
166 |     "    fig = plt.figure(figsize=(30,20))\n",
167 |     "    ax = fig.add_subplot(1, 1, 1)\n",
168 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
169 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
170 |     "    plt.xlabel('Date Time',fontsize=40)\n",
171 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
172 |     "    plt.xticks(fontsize=25)\n",
173 |     "    plt.yticks(fontsize=25)\n",
174 |     "    plt.legend(loc='best',fontsize=30)\n",
175 |     "    plt.savefig('Plot-Target-vs-Pred-ET-D1')\n",
176 |     "    \n",
177 |     "    t1 = time()\n",
178 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
179 |     "    \n",
180 |     "    return model, y_test, Y_Test_Pred"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "energymodel_RF()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "anaconda-cloud": {},
206 |   "kernelspec": {
207 |    "display_name": "Python [Root]",
208 |    "language": "python",
209 |    "name": "Python [Root]"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 1
226 | }
227 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_ERF_D2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 23 |     "from sklearn.ensemble import ExtraTreesRegressor\n",
 24 |     "from sklearn.decomposition import PCA\n",
 25 |     "from sklearn.pipeline import Pipeline\n",
 26 |     "import pprint as pp\n",
 27 |     "import datetime\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 40 |     "#######################################################################################################################\n",
 41 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 42 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 43 |     "                                     'Temperature', 'Relative Humidity']]\n",
 44 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 45 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 46 |     "#######################################################################################################################\n",
 47 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 48 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 49 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 50 |     "#######################################################################################################################\n",
 51 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 52 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 53 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 54 |     "#######################################################################################################################\n",
 55 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 56 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 57 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 58 |     "#######################################################################################################################\n",
 59 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 60 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 61 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 62 |     "#######################################################################################################################\n",
 63 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 64 |     "Energy_Data.reset_index(inplace=True)\n",
 65 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D2.xlsx')\n",
 66 |     "#######################################################################################################################\n",
 67 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 68 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 69 |     "#######################################################################################################################\n",
 70 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 71 |     "                                                        'day': Energy_Data['Day of Month']}))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 83 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 84 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 85 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 86 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 87 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 88 |     "\n",
 89 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 90 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 91 |     "date_time = Energy_Data['Date_Time']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "#################################################################################################\n",
103 |     "# To test anomaly detector\n",
104 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
105 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
106 |     "#################################################################################################"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def energymodel_RF():\n",
118 |     "    t0 = time()\n",
119 |     "    np.random.seed(7)\n",
120 |     "    ########################################################################################\n",
121 |     "    # Regression\n",
122 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
123 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
124 |     "    \n",
125 |     "    rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n",
126 |     "    FS_model = rfecv.fit(X_train, y_train)\n",
127 |     "    \n",
128 |     "    ranks = FS_model.ranking_\n",
129 |     "    FN =[]\n",
130 |     "    for i in range(len(ranks)):\n",
131 |     "        if ranks[i] == 1:\n",
132 |     "            FN.append(Feature_Names[i])    \n",
133 |     "    print(FN)\n",
134 |     "    \n",
135 |     "    X = Energy_Data[FN].as_matrix()\n",
136 |     "    X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n",
137 |     "    \n",
138 |     "    p_grid = dict()\n",
139 |     "    p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n",
140 |     "    \n",
141 |     "    model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), \n",
142 |     "                         param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
143 |     "    model.fit(X_train_transformed, y_train)\n",
144 |     "    \n",
145 |     "    params = model.best_params_\n",
146 |     "    print(\"Best Est: %s\" % (params['n_estimators']))\n",
147 |     "    \n",
148 |     "    Y_Test_Pred = model.predict(X_test_transformed)\n",
149 |     "    \n",
150 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
151 |     "    data_range = y_test.max() - y_test.min()\n",
152 |     "    NRMSE = (rmse/data_range) * 100.0\n",
153 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
154 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
155 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
156 |     "    \n",
157 |     "    fig = plt.figure(figsize=(30,20))\n",
158 |     "    ax = fig.add_subplot(1, 1, 1)\n",
159 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
160 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
161 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
162 |     "    plt.xticks(fontsize=25)\n",
163 |     "    plt.yticks(fontsize=25)\n",
164 |     "    plt.savefig('Scatter-Target-vs-Pred-ET-D2')\n",
165 |     "    \n",
166 |     "    fig = plt.figure(figsize=(30,20))\n",
167 |     "    ax = fig.add_subplot(1, 1, 1)\n",
168 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
169 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
170 |     "    plt.xlabel('Date Time',fontsize=40)\n",
171 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
172 |     "    plt.xticks(fontsize=25)\n",
173 |     "    plt.yticks(fontsize=25)\n",
174 |     "    plt.legend(loc='best',fontsize=30)\n",
175 |     "    plt.savefig('Plot-Target-vs-Pred-ET-D2')\n",
176 |     "    \n",
177 |     "    t1 = time()\n",
178 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
179 |     "    \n",
180 |     "    return model, y_test, Y_Test_Pred"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "energymodel_RF()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "anaconda-cloud": {},
206 |   "kernelspec": {
207 |    "display_name": "Python [Root]",
208 |    "language": "python",
209 |    "name": "Python [Root]"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 1
226 | }
227 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_ERF_D3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 23 |     "from sklearn.ensemble import ExtraTreesRegressor\n",
 24 |     "from sklearn.decomposition import PCA\n",
 25 |     "from sklearn.pipeline import Pipeline\n",
 26 |     "import pprint as pp\n",
 27 |     "import datetime\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 40 |     "#######################################################################################################################\n",
 41 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 42 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 43 |     "                                     'Temperature', 'Relative Humidity']]\n",
 44 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 45 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 46 |     "#######################################################################################################################\n",
 47 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 48 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 49 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 50 |     "#######################################################################################################################\n",
 51 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 52 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 53 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 54 |     "#######################################################################################################################\n",
 55 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 56 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 57 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 58 |     "#######################################################################################################################\n",
 59 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 60 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 61 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 62 |     "#######################################################################################################################\n",
 63 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 64 |     "Energy_Data.reset_index(inplace=True)\n",
 65 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D3.xlsx')\n",
 66 |     "#######################################################################################################################\n",
 67 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 68 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 69 |     "#######################################################################################################################\n",
 70 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 71 |     "                                                        'day': Energy_Data['Day of Month']}))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 83 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 84 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 85 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 86 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 87 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 88 |     "\n",
 89 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 90 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 91 |     "date_time = Energy_Data['Date_Time']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "#################################################################################################\n",
103 |     "# To test anomaly detector\n",
104 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
105 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
106 |     "#################################################################################################"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def energymodel_RF():\n",
118 |     "    t0 = time()\n",
119 |     "    np.random.seed(7)\n",
120 |     "    ########################################################################################\n",
121 |     "    # Regression\n",
122 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
123 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
124 |     "    \n",
125 |     "    rfecv = RFECV(estimator=ExtraTreesRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n",
126 |     "    FS_model = rfecv.fit(X_train, y_train)\n",
127 |     "    \n",
128 |     "    ranks = FS_model.ranking_\n",
129 |     "    FN =[]\n",
130 |     "    for i in range(len(ranks)):\n",
131 |     "        if ranks[i] == 1:\n",
132 |     "            FN.append(Feature_Names[i])    \n",
133 |     "    print(FN)\n",
134 |     "    \n",
135 |     "    X = Energy_Data[FN].as_matrix()\n",
136 |     "    X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n",
137 |     "    \n",
138 |     "    p_grid = dict()\n",
139 |     "    p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n",
140 |     "    \n",
141 |     "    model = GridSearchCV(estimator = ExtraTreesRegressor(n_jobs=-1), \n",
142 |     "                         param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
143 |     "    model.fit(X_train_transformed, y_train)\n",
144 |     "    \n",
145 |     "    params = model.best_params_\n",
146 |     "    print(\"Best Est: %s\" % (params['n_estimators']))\n",
147 |     "    \n",
148 |     "    Y_Test_Pred = model.predict(X_test_transformed)\n",
149 |     "    \n",
150 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
151 |     "    data_range = y_test.max() - y_test.min()\n",
152 |     "    NRMSE = (rmse/data_range) * 100.0\n",
153 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
154 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
155 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
156 |     "    \n",
157 |     "    fig = plt.figure(figsize=(30,20))\n",
158 |     "    ax = fig.add_subplot(1, 1, 1)\n",
159 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
160 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
161 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
162 |     "    plt.xticks(fontsize=25)\n",
163 |     "    plt.yticks(fontsize=25)\n",
164 |     "    plt.savefig('Scatter-Target-vs-Pred-ET-D3')\n",
165 |     "    \n",
166 |     "    fig = plt.figure(figsize=(30,20))\n",
167 |     "    ax = fig.add_subplot(1, 1, 1)\n",
168 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
169 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
170 |     "    plt.xlabel('Date Time',fontsize=40)\n",
171 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
172 |     "    plt.xticks(fontsize=25)\n",
173 |     "    plt.yticks(fontsize=25)\n",
174 |     "    plt.legend(loc='best',fontsize=30)\n",
175 |     "    plt.savefig('Plot-Target-vs-Pred-ET-D3')\n",
176 |     "    \n",
177 |     "    t1 = time()\n",
178 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
179 |     "    \n",
180 |     "    return model, y_test, Y_Test_Pred"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "energymodel_RF()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "anaconda-cloud": {},
206 |   "kernelspec": {
207 |    "display_name": "Python [Root]",
208 |    "language": "python",
209 |    "name": "Python [Root]"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 1
226 | }
227 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_RF_D1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 23 |     "from sklearn.ensemble import RandomForestRegressor\n",
 24 |     "from sklearn.decomposition import PCA\n",
 25 |     "from sklearn.pipeline import Pipeline\n",
 26 |     "import pprint as pp\n",
 27 |     "import datetime\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 40 |     "#######################################################################################################################\n",
 41 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 42 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 43 |     "                                     'Temperature', 'Relative Humidity']]\n",
 44 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 45 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 46 |     "#######################################################################################################################\n",
 47 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 48 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 49 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 50 |     "#######################################################################################################################\n",
 51 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 52 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 53 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 54 |     "#######################################################################################################################\n",
 55 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 56 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 57 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 58 |     "#######################################################################################################################\n",
 59 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 60 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 61 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 62 |     "#######################################################################################################################\n",
 63 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 64 |     "Energy_Data.reset_index(inplace=True)\n",
 65 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D1.xlsx')\n",
 66 |     "#######################################################################################################################\n",
 67 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 68 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 69 |     "#######################################################################################################################\n",
 70 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 71 |     "                                                        'day': Energy_Data['Day of Month']}))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 83 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 84 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 85 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 86 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 87 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 88 |     "\n",
 89 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 90 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 91 |     "date_time = Energy_Data['Date_Time']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "#################################################################################################\n",
103 |     "# To test anomaly detector\n",
104 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
105 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
106 |     "#################################################################################################"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def energymodel_RF():\n",
118 |     "    t0 = time()\n",
119 |     "    np.random.seed(7)\n",
120 |     "    ########################################################################################\n",
121 |     "    # Regression\n",
122 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
123 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
124 |     "    \n",
125 |     "    rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n",
126 |     "    FS_model = rfecv.fit(X_train, y_train)\n",
127 |     "    \n",
128 |     "    ranks = FS_model.ranking_\n",
129 |     "    FN =[]\n",
130 |     "    for i in range(len(ranks)):\n",
131 |     "        if ranks[i] == 1:\n",
132 |     "            FN.append(Feature_Names[i])    \n",
133 |     "    print(FN)\n",
134 |     "    \n",
135 |     "    X = Energy_Data[FN].as_matrix()\n",
136 |     "    X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n",
137 |     "    \n",
138 |     "    p_grid = dict()\n",
139 |     "    p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n",
140 |     "    \n",
141 |     "    model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), \n",
142 |     "                         param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
143 |     "    model.fit(X_train_transformed, y_train)\n",
144 |     "    \n",
145 |     "    params = model.best_params_\n",
146 |     "    print(\"Best Est: %s\" % (params['n_estimators']))\n",
147 |     "    \n",
148 |     "    Y_Test_Pred = model.predict(X_test_transformed)\n",
149 |     "    \n",
150 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
151 |     "    data_range = y_test.max() - y_test.min()\n",
152 |     "    NRMSE = (rmse/data_range) * 100.0\n",
153 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
154 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
155 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
156 |     "    \n",
157 |     "    fig = plt.figure(figsize=(30,20))\n",
158 |     "    ax = fig.add_subplot(1, 1, 1)\n",
159 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
160 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
161 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
162 |     "    plt.xticks(fontsize=25)\n",
163 |     "    plt.yticks(fontsize=25)\n",
164 |     "    plt.savefig('Scatter-Target-vs-Pred-RF-D1')\n",
165 |     "    \n",
166 |     "    fig = plt.figure(figsize=(30,20))\n",
167 |     "    ax = fig.add_subplot(1, 1, 1)\n",
168 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
169 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
170 |     "    plt.xlabel('Date Time',fontsize=40)\n",
171 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
172 |     "    plt.xticks(fontsize=25)\n",
173 |     "    plt.yticks(fontsize=25)\n",
174 |     "    plt.legend(loc='best',fontsize=30)\n",
175 |     "    plt.savefig('Plot-Target-vs-Pred-RF-D1')\n",
176 |     "    \n",
177 |     "    t1 = time()\n",
178 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
179 |     "    \n",
180 |     "    return model, y_test, Y_Test_Pred"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "energymodel_RF()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "anaconda-cloud": {},
206 |   "kernelspec": {
207 |    "display_name": "Python [Root]",
208 |    "language": "python",
209 |    "name": "Python [Root]"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 1
226 | }
227 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_RF_D2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 23 |     "from sklearn.ensemble import RandomForestRegressor\n",
 24 |     "from sklearn.decomposition import PCA\n",
 25 |     "from sklearn.pipeline import Pipeline\n",
 26 |     "import pprint as pp\n",
 27 |     "import datetime\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 40 |     "#######################################################################################################################\n",
 41 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 42 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 43 |     "                                     'Temperature', 'Relative Humidity']]\n",
 44 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 45 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 46 |     "#######################################################################################################################\n",
 47 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 48 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 49 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 50 |     "#######################################################################################################################\n",
 51 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 52 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 53 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 54 |     "#######################################################################################################################\n",
 55 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 56 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 57 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 58 |     "#######################################################################################################################\n",
 59 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 60 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 61 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 62 |     "#######################################################################################################################\n",
 63 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 64 |     "Energy_Data.reset_index(inplace=True)\n",
 65 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D2.xlsx')\n",
 66 |     "#######################################################################################################################\n",
 67 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 68 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 69 |     "#######################################################################################################################\n",
 70 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 71 |     "                                                        'day': Energy_Data['Day of Month']}))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 83 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 84 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 85 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 86 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 87 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 88 |     "\n",
 89 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 90 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 91 |     "date_time = Energy_Data['Date_Time']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "#################################################################################################\n",
103 |     "# To test anomaly detector\n",
104 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
105 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
106 |     "#################################################################################################"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def energymodel_RF():\n",
118 |     "    t0 = time()\n",
119 |     "    np.random.seed(7)\n",
120 |     "    ########################################################################################\n",
121 |     "    # Regression\n",
122 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
123 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
124 |     "    \n",
125 |     "    rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n",
126 |     "    FS_model = rfecv.fit(X_train, y_train)\n",
127 |     "    \n",
128 |     "    ranks = FS_model.ranking_\n",
129 |     "    FN =[]\n",
130 |     "    for i in range(len(ranks)):\n",
131 |     "        if ranks[i] == 1:\n",
132 |     "            FN.append(Feature_Names[i])    \n",
133 |     "    print(FN)\n",
134 |     "    \n",
135 |     "    X = Energy_Data[FN].as_matrix()\n",
136 |     "    X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n",
137 |     "    \n",
138 |     "    p_grid = dict()\n",
139 |     "    p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n",
140 |     "    \n",
141 |     "    model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), \n",
142 |     "                         param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
143 |     "    model.fit(X_train_transformed, y_train)\n",
144 |     "    \n",
145 |     "    params = model.best_params_\n",
146 |     "    print(\"Best Est: %s\" % (params['n_estimators']))\n",
147 |     "    \n",
148 |     "    Y_Test_Pred = model.predict(X_test_transformed)\n",
149 |     "    \n",
150 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
151 |     "    data_range = y_test.max() - y_test.min()\n",
152 |     "    NRMSE = (rmse/data_range) * 100.0\n",
153 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
154 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
155 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
156 |     "    \n",
157 |     "    fig = plt.figure(figsize=(30,20))\n",
158 |     "    ax = fig.add_subplot(1, 1, 1)\n",
159 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
160 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
161 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
162 |     "    plt.xticks(fontsize=25)\n",
163 |     "    plt.yticks(fontsize=25)\n",
164 |     "    plt.savefig('Scatter-Target-vs-Pred-RF-D2')\n",
165 |     "    \n",
166 |     "    fig = plt.figure(figsize=(30,20))\n",
167 |     "    ax = fig.add_subplot(1, 1, 1)\n",
168 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
169 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
170 |     "    plt.xlabel('Date Time',fontsize=40)\n",
171 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
172 |     "    plt.xticks(fontsize=25)\n",
173 |     "    plt.yticks(fontsize=25)\n",
174 |     "    plt.legend(loc='best',fontsize=30)\n",
175 |     "    plt.savefig('Plot-Target-vs-Pred-RF-D2')\n",
176 |     "    \n",
177 |     "    t1 = time()\n",
178 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
179 |     "    \n",
180 |     "    return model, y_test, Y_Test_Pred"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "energymodel_RF()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "anaconda-cloud": {},
206 |   "kernelspec": {
207 |    "display_name": "Python [Root]",
208 |    "language": "python",
209 |    "name": "Python [Root]"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 1
226 | }
227 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_RF_D3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel\n",
 23 |     "from sklearn.ensemble import RandomForestRegressor\n",
 24 |     "from sklearn.decomposition import PCA\n",
 25 |     "from sklearn.pipeline import Pipeline\n",
 26 |     "import pprint as pp\n",
 27 |     "import datetime\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 40 |     "#######################################################################################################################\n",
 41 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 42 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 43 |     "                                     'Temperature', 'Relative Humidity']]\n",
 44 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 45 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 46 |     "#######################################################################################################################\n",
 47 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 48 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 49 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 50 |     "#######################################################################################################################\n",
 51 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 52 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 53 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 54 |     "#######################################################################################################################\n",
 55 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 56 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 57 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 58 |     "#######################################################################################################################\n",
 59 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 60 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 61 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 62 |     "#######################################################################################################################\n",
 63 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 64 |     "Energy_Data.reset_index(inplace=True)\n",
 65 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D3.xlsx')\n",
 66 |     "#######################################################################################################################\n",
 67 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 68 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 69 |     "#######################################################################################################################\n",
 70 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 71 |     "                                                        'day': Energy_Data['Day of Month']}))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 83 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 84 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 85 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 86 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 87 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 88 |     "\n",
 89 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 90 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 91 |     "date_time = Energy_Data['Date_Time']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "#################################################################################################\n",
103 |     "# To test anomaly detector\n",
104 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
105 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
106 |     "#################################################################################################"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def energymodel_RF():\n",
118 |     "    t0 = time()\n",
119 |     "    np.random.seed(7)\n",
120 |     "    ########################################################################################\n",
121 |     "    # Regression\n",
122 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
123 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
124 |     "    \n",
125 |     "    rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param)\n",
126 |     "    FS_model = rfecv.fit(X_train, y_train)\n",
127 |     "    \n",
128 |     "    ranks = FS_model.ranking_\n",
129 |     "    FN =[]\n",
130 |     "    for i in range(len(ranks)):\n",
131 |     "        if ranks[i] == 1:\n",
132 |     "            FN.append(Feature_Names[i])    \n",
133 |     "    print(FN)\n",
134 |     "    \n",
135 |     "    X = Energy_Data[FN].as_matrix()\n",
136 |     "    X_train_transformed, X_test_transformed = train_test_split(X, test_size=0.5, shuffle=False)\n",
137 |     "    \n",
138 |     "    p_grid = dict()\n",
139 |     "    p_grid = dict(n_estimators = [int(i) for i in np.linspace(100,1000,num=10)])\n",
140 |     "    \n",
141 |     "    model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), \n",
142 |     "                         param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
143 |     "    model.fit(X_train_transformed, y_train)\n",
144 |     "    \n",
145 |     "    params = model.best_params_\n",
146 |     "    print(\"Best Est: %s\" % (params['n_estimators']))\n",
147 |     "    \n",
148 |     "    Y_Test_Pred = model.predict(X_test_transformed)\n",
149 |     "    \n",
150 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
151 |     "    data_range = y_test.max() - y_test.min()\n",
152 |     "    NRMSE = (rmse/data_range) * 100.0\n",
153 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
154 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
155 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
156 |     "    \n",
157 |     "    fig = plt.figure(figsize=(30,20))\n",
158 |     "    ax = fig.add_subplot(1, 1, 1)\n",
159 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
160 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
161 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
162 |     "    plt.xticks(fontsize=25)\n",
163 |     "    plt.yticks(fontsize=25)\n",
164 |     "    plt.savefig('Scatter-Target-vs-Pred-RF-D3')\n",
165 |     "    \n",
166 |     "    fig = plt.figure(figsize=(30,20))\n",
167 |     "    ax = fig.add_subplot(1, 1, 1)\n",
168 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
169 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
170 |     "    plt.xlabel('Date Time',fontsize=40)\n",
171 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
172 |     "    plt.xticks(fontsize=25)\n",
173 |     "    plt.yticks(fontsize=25)\n",
174 |     "    plt.legend(loc='best',fontsize=30)\n",
175 |     "    plt.savefig('Plot-Target-vs-Pred-RF-D3')\n",
176 |     "    \n",
177 |     "    t1 = time()\n",
178 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
179 |     "    \n",
180 |     "    return model, y_test, Y_Test_Pred"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "energymodel_RF()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "anaconda-cloud": {},
206 |   "kernelspec": {
207 |    "display_name": "Python [Root]",
208 |    "language": "python",
209 |    "name": "Python [Root]"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 1
226 | }
227 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_SVM_D1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel, mutual_info_regression, SelectKBest\n",
 23 |     "from xgboost import XGBRegressor\n",
 24 |     "from sklearn.svm import SVR\n",
 25 |     "from sklearn.decomposition import PCA\n",
 26 |     "from sklearn.pipeline import Pipeline\n",
 27 |     "import pprint as pp\n",
 28 |     "import datetime\n",
 29 |     "%matplotlib inline"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 41 |     "#######################################################################################################################\n",
 42 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 43 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 44 |     "                                     'Temperature', 'Relative Humidity']]\n",
 45 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 46 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 47 |     "#######################################################################################################################\n",
 48 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 49 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 50 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 51 |     "#######################################################################################################################\n",
 52 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 53 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 54 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 55 |     "#######################################################################################################################\n",
 56 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 57 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 58 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 59 |     "#######################################################################################################################\n",
 60 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 61 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 62 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 63 |     "#######################################################################################################################\n",
 64 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 65 |     "Energy_Data.reset_index(inplace=True)\n",
 66 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D1.xlsx')\n",
 67 |     "#######################################################################################################################\n",
 68 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 69 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#######################################################################################################################\n",
 71 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 72 |     "                                                        'day': Energy_Data['Day of Month']}))"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 84 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 85 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 86 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 87 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 88 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 89 |     "\n",
 90 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 91 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 92 |     "date_time = Energy_Data['Date_Time']"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "#################################################################################################\n",
104 |     "# To test anomaly detector\n",
105 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
106 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
107 |     "#################################################################################################"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "def energymodel_SVM():\n",
119 |     "    t0 = time()\n",
120 |     "    np.random.seed(7)\n",
121 |     "    ########################################################################################\n",
122 |     "    # Regression\n",
123 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
124 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
125 |     "    \n",
126 |     "    estimators = []\n",
127 |     "    estimators.append(('standardize', MinMaxScaler()))\n",
128 |     "    estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
129 |     "    estimators.append(('SVM', SVR()))\n",
130 |     "    pipe = Pipeline(estimators)\n",
131 |     "    \n",
132 |     "    y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n",
133 |     "    \n",
134 |     "    p_grid = dict(FS__k = [int(i) for i in np.arange(1,len(Feature_Names)+1,1)],\n",
135 |     "                  SVM__gamma = np.logspace(-3, 0, 4),\n",
136 |     "                  SVM__C = np.logspace(0, 3, 4))\n",
137 |     "    \n",
138 |     "    model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
139 |     "    model.fit(X_train, y_train_scaled)\n",
140 |     "    \n",
141 |     "    params = model.best_params_\n",
142 |     "    print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
143 |     "    \n",
144 |     "    \n",
145 |     "    Y_Test_Pred_scaled = model.predict(X_test)\n",
146 |     "    Y_Test_Pred = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n",
147 |     "    \n",
148 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
149 |     "    data_range = y_test.max() - y_test.min()\n",
150 |     "    NRMSE = (rmse/data_range) * 100.0\n",
151 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
152 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
153 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
154 |     "    \n",
155 |     "    fig = plt.figure(figsize=(30,20))\n",
156 |     "    ax = fig.add_subplot(1, 1, 1)\n",
157 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
158 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
159 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
160 |     "    plt.xticks(fontsize=25)\n",
161 |     "    plt.yticks(fontsize=25)\n",
162 |     "    plt.savefig('Scatter-Target-vs-Pred-SVM-D1')\n",
163 |     "    \n",
164 |     "    fig = plt.figure(figsize=(30,20))\n",
165 |     "    ax = fig.add_subplot(1, 1, 1)\n",
166 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
167 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
168 |     "    plt.xlabel('Date Time',fontsize=40)\n",
169 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
170 |     "    plt.xticks(fontsize=25)\n",
171 |     "    plt.yticks(fontsize=25)\n",
172 |     "    plt.legend(loc='best',fontsize=30)\n",
173 |     "    plt.savefig('Plot-Target-vs-Pred-SVM-D1')\n",
174 |     "    \n",
175 |     "    t1 = time()\n",
176 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
177 |     "    \n",
178 |     "    return model, y_test, Y_Test_Pred"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": false
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "energymodel_SVM()"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": true
197 |    },
198 |    "outputs": [],
199 |    "source": []
200 |   }
201 |  ],
202 |  "metadata": {
203 |   "anaconda-cloud": {},
204 |   "kernelspec": {
205 |    "display_name": "Python [Root]",
206 |    "language": "python",
207 |    "name": "Python [Root]"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.5.4"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 1
224 | }
225 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_SVM_D2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel, mutual_info_regression, SelectKBest\n",
 23 |     "from xgboost import XGBRegressor\n",
 24 |     "from sklearn.svm import SVR\n",
 25 |     "from sklearn.decomposition import PCA\n",
 26 |     "from sklearn.pipeline import Pipeline\n",
 27 |     "import pprint as pp\n",
 28 |     "import datetime\n",
 29 |     "%matplotlib inline"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 41 |     "#######################################################################################################################\n",
 42 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 43 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 44 |     "                                     'Temperature', 'Relative Humidity']]\n",
 45 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 46 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 47 |     "#######################################################################################################################\n",
 48 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 49 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 50 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 51 |     "#######################################################################################################################\n",
 52 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 53 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 54 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 55 |     "#######################################################################################################################\n",
 56 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 57 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 58 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 59 |     "#######################################################################################################################\n",
 60 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 61 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 62 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 63 |     "#######################################################################################################################\n",
 64 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 65 |     "Energy_Data.reset_index(inplace=True)\n",
 66 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D2.xlsx')\n",
 67 |     "#######################################################################################################################\n",
 68 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 69 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#######################################################################################################################\n",
 71 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 72 |     "                                                        'day': Energy_Data['Day of Month']}))"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 84 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 85 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 86 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 87 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 88 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 89 |     "\n",
 90 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 91 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 92 |     "date_time = Energy_Data['Date_Time']"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "#################################################################################################\n",
104 |     "# To test anomaly detector\n",
105 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
106 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
107 |     "#################################################################################################"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "def energymodel_SVM():\n",
119 |     "    t0 = time()\n",
120 |     "    np.random.seed(7)\n",
121 |     "    ########################################################################################\n",
122 |     "    # Regression\n",
123 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
124 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
125 |     "    \n",
126 |     "    estimators = []\n",
127 |     "    estimators.append(('standardize', MinMaxScaler()))\n",
128 |     "    estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
129 |     "    estimators.append(('SVM', SVR()))\n",
130 |     "    pipe = Pipeline(estimators)\n",
131 |     "    \n",
132 |     "    y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n",
133 |     "    \n",
134 |     "    p_grid = dict(FS__k = [int(i) for i in np.arange(1,len(Feature_Names)+1,1)],\n",
135 |     "                  SVM__gamma = np.logspace(-3, 0, 4),\n",
136 |     "                  SVM__C = np.logspace(0, 3, 4))\n",
137 |     "    \n",
138 |     "    model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
139 |     "    model.fit(X_train, y_train_scaled)\n",
140 |     "    \n",
141 |     "    params = model.best_params_\n",
142 |     "    print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
143 |     "    \n",
144 |     "    \n",
145 |     "    Y_Test_Pred_scaled = model.predict(X_test)\n",
146 |     "    Y_Test_Pred = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n",
147 |     "    \n",
148 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
149 |     "    data_range = y_test.max() - y_test.min()\n",
150 |     "    NRMSE = (rmse/data_range) * 100.0\n",
151 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
152 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
153 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
154 |     "    \n",
155 |     "    fig = plt.figure(figsize=(30,20))\n",
156 |     "    ax = fig.add_subplot(1, 1, 1)\n",
157 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
158 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
159 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
160 |     "    plt.xticks(fontsize=25)\n",
161 |     "    plt.yticks(fontsize=25)\n",
162 |     "    plt.savefig('Scatter-Target-vs-Pred-SVM-D2')\n",
163 |     "    \n",
164 |     "    fig = plt.figure(figsize=(30,20))\n",
165 |     "    ax = fig.add_subplot(1, 1, 1)\n",
166 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
167 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
168 |     "    plt.xlabel('Date Time',fontsize=40)\n",
169 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
170 |     "    plt.xticks(fontsize=25)\n",
171 |     "    plt.yticks(fontsize=25)\n",
172 |     "    plt.legend(loc='best',fontsize=30)\n",
173 |     "    plt.savefig('Plot-Target-vs-Pred-SVM-D2')\n",
174 |     "    \n",
175 |     "    t1 = time()\n",
176 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
177 |     "    \n",
178 |     "    return model, y_test, Y_Test_Pred"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": false
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "energymodel_SVM()"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": true
197 |    },
198 |    "outputs": [],
199 |    "source": []
200 |   }
201 |  ],
202 |  "metadata": {
203 |   "anaconda-cloud": {},
204 |   "kernelspec": {
205 |    "display_name": "Python [Root]",
206 |    "language": "python",
207 |    "name": "Python [Root]"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.5.4"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 1
224 | }
225 | 


--------------------------------------------------------------------------------
/Chapter5-EnergyModel-Comparison/Energy_Modeling_SVM_D3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, TimeSeriesSplit, KFold, RepeatedKFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer\n",
 18 |     "from time import time\n",
 19 |     "from sklearn.preprocessing import MinMaxScaler\n",
 20 |     "from sklearn.preprocessing import quantile_transform\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectFromModel, mutual_info_regression, SelectKBest\n",
 23 |     "from xgboost import XGBRegressor\n",
 24 |     "from sklearn.svm import SVR\n",
 25 |     "from sklearn.decomposition import PCA\n",
 26 |     "from sklearn.pipeline import Pipeline\n",
 27 |     "import pprint as pp\n",
 28 |     "import datetime\n",
 29 |     "%matplotlib inline"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "Climate_Data = pd.read_excel('Climate_Data.xls')\n",
 41 |     "#######################################################################################################################\n",
 42 |     "Energy_Data_mean = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).mean()\n",
 43 |     "Energy_Data_mean = Energy_Data_mean[['Day of Week', 'Is Holiday', 'Daylight Savings', 'DHI', 'DNI', 'Dew Point', \n",
 44 |     "                                     'Temperature', 'Relative Humidity']]\n",
 45 |     "Energy_Data_mean.columns = ['Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 46 |     "                            'Temperature_AVG', 'Relative Humidity_AVG']\n",
 47 |     "#######################################################################################################################\n",
 48 |     "Energy_Data_sum = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).sum()\n",
 49 |     "Energy_Data_sum = Energy_Data_sum[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 50 |     "Energy_Data_sum.columns = ['DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', 'Relative Humidity_SUM']\n",
 51 |     "#######################################################################################################################\n",
 52 |     "Energy_Data_max = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).max()\n",
 53 |     "Energy_Data_max = Energy_Data_max[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 54 |     "Energy_Data_max.columns = ['DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', 'Relative Humidity_MAX']\n",
 55 |     "#######################################################################################################################\n",
 56 |     "Energy_Data_std = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).std()\n",
 57 |     "Energy_Data_std = Energy_Data_std[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 58 |     "Energy_Data_std.columns = ['DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', 'Relative Humidity_STD']\n",
 59 |     "#######################################################################################################################\n",
 60 |     "Energy_Data_min = Climate_Data.groupby(['Year', 'Month', 'Day of Month']).min()\n",
 61 |     "Energy_Data_min = Energy_Data_min[['DHI', 'DNI', 'Dew Point', 'Temperature', 'Relative Humidity']]\n",
 62 |     "Energy_Data_min.columns = ['DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', 'Relative Humidity_MIN']\n",
 63 |     "#######################################################################################################################\n",
 64 |     "Energy_Data = pd.concat([Energy_Data_mean, Energy_Data_sum, Energy_Data_max, Energy_Data_std, Energy_Data_min], axis=1)\n",
 65 |     "Energy_Data.reset_index(inplace=True)\n",
 66 |     "Energy_Data['Energy_Consumption'] = pd.read_excel('EnergyData_D3.xlsx')\n",
 67 |     "#######################################################################################################################\n",
 68 |     "Energy_Data['Lag1'] = (Energy_Data['Energy_Consumption'].shift(1))\n",
 69 |     "Energy_Data.dropna(axis=0,inplace=True)\n",
 70 |     "#######################################################################################################################\n",
 71 |     "Energy_Data['Date_Time'] = pd.to_datetime(pd.DataFrame({'year': Energy_Data['Year'],'month': Energy_Data['Month'] + 1,\n",
 72 |     "                                                        'day': Energy_Data['Day of Month']}))"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "Feature_Names = ['Month','Day_of_Week', 'Is_Holiday', 'Daylight_Savings', 'DHI_AVG', 'DNI_AVG', 'Dew Point_AVG', \n",
 84 |     "                 'Temperature_AVG', 'Relative Humidity_AVG', 'DHI_SUM', 'DNI_SUM', 'Dew Point_SUM', 'Temperature_SUM', \n",
 85 |     "                 'Relative Humidity_SUM', 'DHI_MAX', 'DNI_MAX', 'Dew Point_MAX', 'Temperature_MAX', \n",
 86 |     "                 'Relative Humidity_MAX', 'DHI_STD', 'DNI_STD', 'Dew Point_STD', 'Temperature_STD', \n",
 87 |     "                 'Relative Humidity_STD', 'DHI_MIN', 'DNI_MIN', 'Dew Point_MIN', 'Temperature_MIN', \n",
 88 |     "                 'Relative Humidity_MIN', 'Lag1']\n",
 89 |     "\n",
 90 |     "X = Energy_Data[Feature_Names].as_matrix()\n",
 91 |     "y = Energy_Data['Energy_Consumption'].as_matrix()\n",
 92 |     "date_time = Energy_Data['Date_Time']"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "#################################################################################################\n",
104 |     "# To test anomaly detector\n",
105 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)\n",
106 |     "DT_train, DT_Test = train_test_split(date_time, test_size=0.5, shuffle=False)\n",
107 |     "#################################################################################################"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "def energymodel_SVM():\n",
119 |     "    t0 = time()\n",
120 |     "    np.random.seed(7)\n",
121 |     "    ########################################################################################\n",
122 |     "    # Regression\n",
123 |     "    kf = KFold(n_splits=5, shuffle=True)\n",
124 |     "    scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
125 |     "    \n",
126 |     "    estimators = []\n",
127 |     "    estimators.append(('standardize', MinMaxScaler()))\n",
128 |     "    estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
129 |     "    estimators.append(('SVM', SVR()))\n",
130 |     "    pipe = Pipeline(estimators)\n",
131 |     "    \n",
132 |     "    y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n",
133 |     "    \n",
134 |     "    p_grid = dict(FS__k = [int(i) for i in np.arange(1,len(Feature_Names)+1,1)],\n",
135 |     "                  SVM__gamma = np.logspace(-3, 0, 4),\n",
136 |     "                  SVM__C = np.logspace(0, 3, 4))\n",
137 |     "    \n",
138 |     "    model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
139 |     "    model.fit(X_train, y_train_scaled)\n",
140 |     "    \n",
141 |     "    params = model.best_params_\n",
142 |     "    print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
143 |     "    \n",
144 |     "    \n",
145 |     "    Y_Test_Pred_scaled = model.predict(X_test)\n",
146 |     "    Y_Test_Pred = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n",
147 |     "    \n",
148 |     "    rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Pred))\n",
149 |     "    data_range = y_test.max() - y_test.min()\n",
150 |     "    NRMSE = (rmse/data_range) * 100.0\n",
151 |     "    RSQ = r2_score(y_test,Y_Test_Pred)\n",
152 |     "    print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
153 |     "    print(\"R-squared: %0.3f\" % RSQ)\n",
154 |     "    \n",
155 |     "    fig = plt.figure(figsize=(30,20))\n",
156 |     "    ax = fig.add_subplot(1, 1, 1)\n",
157 |     "    plt.scatter(y_test, Y_Test_Pred, c=\"g\", s=200, alpha=0.5)\n",
158 |     "    plt.xlabel(\"Target [J]\", fontsize=40)\n",
159 |     "    plt.ylabel(\"Predictions [J]\", fontsize=40)\n",
160 |     "    plt.xticks(fontsize=25)\n",
161 |     "    plt.yticks(fontsize=25)\n",
162 |     "    plt.savefig('Scatter-Target-vs-Pred-SVM-D3')\n",
163 |     "    \n",
164 |     "    fig = plt.figure(figsize=(30,20))\n",
165 |     "    ax = fig.add_subplot(1, 1, 1)\n",
166 |     "    plt.plot(DT_Test.dt.to_pydatetime(), y_test, 'k.', lw=5, markersize=20, label=u'Observations')\n",
167 |     "    plt.plot(DT_Test.dt.to_pydatetime(), Y_Test_Pred, 'r-', lw=5, label=u'Prediction')\n",
168 |     "    plt.xlabel('Date Time',fontsize=40)\n",
169 |     "    plt.ylabel('Energy Consumption - Facility [J]',fontsize=40)\n",
170 |     "    plt.xticks(fontsize=25)\n",
171 |     "    plt.yticks(fontsize=25)\n",
172 |     "    plt.legend(loc='best',fontsize=30)\n",
173 |     "    plt.savefig('Plot-Target-vs-Pred-SVM-D3')\n",
174 |     "    \n",
175 |     "    t1 = time()\n",
176 |     "    print('Time taken for this trial %f' %(t1-t0))\n",
177 |     "    \n",
178 |     "    return model, y_test, Y_Test_Pred"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": false
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "energymodel_SVM()"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": true
197 |    },
198 |    "outputs": [],
199 |    "source": []
200 |   }
201 |  ],
202 |  "metadata": {
203 |   "anaconda-cloud": {},
204 |   "kernelspec": {
205 |    "display_name": "Python [Root]",
206 |    "language": "python",
207 |    "name": "Python [Root]"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.5.4"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 1
224 | }
225 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Condenser_Foul_SVM_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 18 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 19 |     "from time import time\n",
 20 |     "from sklearn.preprocessing import MinMaxScaler\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n",
 23 |     "from sklearn.svm import SVR\n",
 24 |     "from sklearn.pipeline import Pipeline\n",
 25 |     "import pprint as pp\n",
 26 |     "%matplotlib inline"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 49 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 61 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n",
 62 |     "Chiller_Data.dropna(axis=0,inplace=True)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "y = Chiller_Data['Target'].as_matrix()\n",
 74 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 75 |     "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n",
 76 |     "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n",
 77 |     "#                'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n",
 78 |     "Feature_Names = list(Chiller_Data)\n",
 79 |     "X = Chiller_Data[Feature_Names].as_matrix()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "#################################################################################################\n",
 91 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 92 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 93 |     "#################################################################################################"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "def calc_dyn_threshold(A, P, I, N):\n",
105 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
106 |     "    threshold = np.zeros(I-1)\n",
107 |     "    threshold[0:(I-1)] = P[0:(I-1)]\n",
108 |     "    labels = np.zeros(I-1)\n",
109 |     "    for k in np.arange(I,len(P)+1):\n",
110 |     "        mu = np.mean(P[(k-I):k])\n",
111 |     "        sigma = np.std(P[(k-I):k])\n",
112 |     "        T = mu - N*sigma\n",
113 |     "        threshold = np.append(threshold,T)\n",
114 |     "        if (A[k-1] < threshold[k-1]) :\n",
115 |     "            labels = np.append(labels,1)\n",
116 |     "        else:\n",
117 |     "            labels = np.append(labels,0)\n",
118 |     "    return labels, threshold"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "t0 = time()\n",
130 |     "np.random.seed(7)\n",
131 |     "########################################################################################\n",
132 |     "# Regression\n",
133 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
134 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
135 |     "\n",
136 |     "estimators = []\n",
137 |     "estimators.append(('standardize', MinMaxScaler()))\n",
138 |     "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
139 |     "estimators.append(('SVM', SVR()))\n",
140 |     "pipe = Pipeline(estimators)\n",
141 |     "    \n",
142 |     "y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n",
143 |     "    \n",
144 |     "p_grid = dict(FS__k = [8, 16],\n",
145 |     "              SVM__gamma = np.logspace(-3, 0, 4),\n",
146 |     "              SVM__C = np.logspace(0, 3, 4))\n",
147 |     "    \n",
148 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
149 |     "model.fit(X_train, y_train_scaled)\n",
150 |     "    \n",
151 |     "params = model.best_params_\n",
152 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
153 |     "    \n",
154 |     "Y_Test_Pred_scaled = model.predict(X_test)\n",
155 |     "Y_Test_Predicted = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n",
156 |     "    \n",
157 |     "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n",
158 |     "data_range = y_test.max() - y_test.min()\n",
159 |     "NRMSE = (rmse/data_range) * 100.0\n",
160 |     "RSQ = r2_score(y_test,Y_Test_Predicted)\n",
161 |     "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
162 |     "print(\"R-squared: %0.3f\" % RSQ)\n",
163 |     "\n",
164 |     "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n",
165 |     "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n",
166 |     "                               'Threshold':Threshold, 'Pred_Labels': Labels})\n",
167 |     "\n",
168 |     "print(\"########################################################################################\")\n",
169 |     "print(\"Confusion Matrix - testing:\")\n",
170 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
171 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
172 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
173 |     "print(\"False positive means false alarms\")\n",
174 |     "print(\"False Negative means missed faults\")\n",
175 |     "print(\"########################################################################################\")\n",
176 |     "print(\"Classification Report - testing:\")\n",
177 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
178 |     "print(\"########################################################################################\")\n",
179 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
180 |     "print(\"########################################################################################\")\n",
181 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
182 |     "print(\"########################################################################################\")\n",
183 |     "########################################################################################\n",
184 |     "\n",
185 |     "fig = plt.figure(figsize=(25,20))\n",
186 |     "ax = fig.add_subplot(1, 1, 1)\n",
187 |     "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n",
188 |     "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n",
189 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n",
190 |     "           edgecolors='y', marker='o', label=u'Actual normal data')\n",
191 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n",
192 |     "           edgecolors='y', marker='^', label=u'Actual fault data')\n",
193 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
194 |     "plt.xlabel('Data index',fontsize=30)\n",
195 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
196 |     "plt.xticks(fontsize=30)\n",
197 |     "plt.yticks(fontsize=30)\n",
198 |     "plt.legend(loc='best',fontsize=30)\n",
199 |     "plt.savefig('M0-Cond-Foul-Actual-Labels-Predictions')\n",
200 |     "\n",
201 |     "fig = plt.figure(figsize=(25,20))\n",
202 |     "ax = fig.add_subplot(1, 1, 1)\n",
203 |     "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n",
204 |     "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n",
205 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n",
206 |     "           edgecolors='y', marker='o', label=u'Predicted normal data')\n",
207 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n",
208 |     "           edgecolors='y', marker='^', label=u'Predicted fault data')\n",
209 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
210 |     "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n",
211 |     "plt.xlabel('Data index',fontsize=30)\n",
212 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
213 |     "plt.xticks(fontsize=30)\n",
214 |     "plt.yticks(fontsize=30)\n",
215 |     "plt.legend(loc='best',fontsize=30)\n",
216 |     "plt.savefig('M0-Cond-Foul-SVM-Dynamic-Threshold-Predicted-Labels')\n",
217 |     "\n",
218 |     "t1 = time()\n",
219 |     "print('Time taken for this trial %f' %(t1-t0))"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": true
227 |    },
228 |    "outputs": [],
229 |    "source": []
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "anaconda-cloud": {},
234 |   "kernelspec": {
235 |    "display_name": "Python [Root]",
236 |    "language": "python",
237 |    "name": "Python [Root]"
238 |   },
239 |   "language_info": {
240 |    "codemirror_mode": {
241 |     "name": "ipython",
242 |     "version": 3
243 |    },
244 |    "file_extension": ".py",
245 |    "mimetype": "text/x-python",
246 |    "name": "python",
247 |    "nbconvert_exporter": "python",
248 |    "pygments_lexer": "ipython3",
249 |    "version": "3.5.4"
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 1
254 | }
255 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-20/Condenser_Foul_SVM_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 18 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 19 |     "from time import time\n",
 20 |     "from sklearn.preprocessing import MinMaxScaler\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n",
 23 |     "from sklearn.svm import SVR\n",
 24 |     "from sklearn.pipeline import Pipeline\n",
 25 |     "import pprint as pp\n",
 26 |     "%matplotlib inline"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 49 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 61 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n",
 62 |     "Chiller_Data.dropna(axis=0,inplace=True)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "y = Chiller_Data['Target'].as_matrix()\n",
 74 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 75 |     "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n",
 76 |     "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n",
 77 |     "#                'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n",
 78 |     "Feature_Names = list(Chiller_Data)\n",
 79 |     "X = Chiller_Data[Feature_Names].as_matrix()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "#################################################################################################\n",
 91 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 92 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 93 |     "#################################################################################################"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "def calc_dyn_threshold(A, P, I, N):\n",
105 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
106 |     "    threshold = np.zeros(I-1)\n",
107 |     "    threshold[0:(I-1)] = P[0:(I-1)]\n",
108 |     "    labels = np.zeros(I-1)\n",
109 |     "    for k in np.arange(I,len(P)+1):\n",
110 |     "        mu = np.mean(P[(k-I):k])\n",
111 |     "        sigma = np.std(P[(k-I):k])\n",
112 |     "        T = mu - N*sigma\n",
113 |     "        threshold = np.append(threshold,T)\n",
114 |     "        if (A[k-1] < threshold[k-1]) :\n",
115 |     "            labels = np.append(labels,1)\n",
116 |     "        else:\n",
117 |     "            labels = np.append(labels,0)\n",
118 |     "    return labels, threshold"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "t0 = time()\n",
130 |     "np.random.seed(7)\n",
131 |     "########################################################################################\n",
132 |     "# Regression\n",
133 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
134 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
135 |     "\n",
136 |     "estimators = []\n",
137 |     "estimators.append(('standardize', MinMaxScaler()))\n",
138 |     "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
139 |     "estimators.append(('SVM', SVR()))\n",
140 |     "pipe = Pipeline(estimators)\n",
141 |     "    \n",
142 |     "y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n",
143 |     "    \n",
144 |     "p_grid = dict(FS__k = [8, 16],\n",
145 |     "              SVM__gamma = np.logspace(-3, 0, 4),\n",
146 |     "              SVM__C = np.logspace(0, 3, 4))\n",
147 |     "    \n",
148 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
149 |     "model.fit(X_train, y_train_scaled)\n",
150 |     "    \n",
151 |     "params = model.best_params_\n",
152 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
153 |     "    \n",
154 |     "Y_Test_Pred_scaled = model.predict(X_test)\n",
155 |     "Y_Test_Predicted = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n",
156 |     "    \n",
157 |     "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n",
158 |     "data_range = y_test.max() - y_test.min()\n",
159 |     "NRMSE = (rmse/data_range) * 100.0\n",
160 |     "RSQ = r2_score(y_test,Y_Test_Predicted)\n",
161 |     "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
162 |     "print(\"R-squared: %0.3f\" % RSQ)\n",
163 |     "\n",
164 |     "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n",
165 |     "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n",
166 |     "                               'Threshold':Threshold, 'Pred_Labels': Labels})\n",
167 |     "\n",
168 |     "print(\"########################################################################################\")\n",
169 |     "print(\"Confusion Matrix - testing:\")\n",
170 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
171 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
172 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
173 |     "print(\"False positive means false alarms\")\n",
174 |     "print(\"False Negative means missed faults\")\n",
175 |     "print(\"########################################################################################\")\n",
176 |     "print(\"Classification Report - testing:\")\n",
177 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
178 |     "print(\"########################################################################################\")\n",
179 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
180 |     "print(\"########################################################################################\")\n",
181 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
182 |     "print(\"########################################################################################\")\n",
183 |     "########################################################################################\n",
184 |     "\n",
185 |     "fig = plt.figure(figsize=(25,20))\n",
186 |     "ax = fig.add_subplot(1, 1, 1)\n",
187 |     "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n",
188 |     "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n",
189 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n",
190 |     "           edgecolors='y', marker='o', label=u'Actual normal data')\n",
191 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n",
192 |     "           edgecolors='y', marker='^', label=u'Actual fault data')\n",
193 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
194 |     "plt.xlabel('Data index',fontsize=30)\n",
195 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
196 |     "plt.xticks(fontsize=30)\n",
197 |     "plt.yticks(fontsize=30)\n",
198 |     "plt.legend(loc='best',fontsize=30)\n",
199 |     "plt.savefig('M0-Cond-Foul-Actual-Labels-Predictions')\n",
200 |     "\n",
201 |     "fig = plt.figure(figsize=(25,20))\n",
202 |     "ax = fig.add_subplot(1, 1, 1)\n",
203 |     "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n",
204 |     "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n",
205 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n",
206 |     "           edgecolors='y', marker='o', label=u'Predicted normal data')\n",
207 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n",
208 |     "           edgecolors='y', marker='^', label=u'Predicted fault data')\n",
209 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
210 |     "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n",
211 |     "plt.xlabel('Data index',fontsize=30)\n",
212 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
213 |     "plt.xticks(fontsize=30)\n",
214 |     "plt.yticks(fontsize=30)\n",
215 |     "plt.legend(loc='best',fontsize=30)\n",
216 |     "plt.savefig('M0-Cond-Foul-SVM-Dynamic-Threshold-Predicted-Labels')\n",
217 |     "\n",
218 |     "t1 = time()\n",
219 |     "print('Time taken for this trial %f' %(t1-t0))"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": true
227 |    },
228 |    "outputs": [],
229 |    "source": []
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "anaconda-cloud": {},
234 |   "kernelspec": {
235 |    "display_name": "Python [Root]",
236 |    "language": "python",
237 |    "name": "Python [Root]"
238 |   },
239 |   "language_info": {
240 |    "codemirror_mode": {
241 |     "name": "ipython",
242 |     "version": 3
243 |    },
244 |    "file_extension": ".py",
245 |    "mimetype": "text/x-python",
246 |    "name": "python",
247 |    "nbconvert_exporter": "python",
248 |    "pygments_lexer": "ipython3",
249 |    "version": "3.5.4"
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 1
254 | }
255 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-30/Condenser_Foul_SVM_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 18 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 19 |     "from time import time\n",
 20 |     "from sklearn.preprocessing import MinMaxScaler\n",
 21 |     "import scipy.stats as st\n",
 22 |     "from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression\n",
 23 |     "from sklearn.svm import SVR\n",
 24 |     "from sklearn.pipeline import Pipeline\n",
 25 |     "import pprint as pp\n",
 26 |     "%matplotlib inline"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 49 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 61 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n",
 62 |     "Chiller_Data.dropna(axis=0,inplace=True)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "y = Chiller_Data['Target'].as_matrix()\n",
 74 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 75 |     "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n",
 76 |     "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n",
 77 |     "#                'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n",
 78 |     "Feature_Names = list(Chiller_Data)\n",
 79 |     "X = Chiller_Data[Feature_Names].as_matrix()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "#################################################################################################\n",
 91 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 92 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 93 |     "#################################################################################################"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "def calc_dyn_threshold(A, P, I, N):\n",
105 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
106 |     "    threshold = np.zeros(I-1)\n",
107 |     "    threshold[0:(I-1)] = P[0:(I-1)]\n",
108 |     "    labels = np.zeros(I-1)\n",
109 |     "    for k in np.arange(I,len(P)+1):\n",
110 |     "        mu = np.mean(P[(k-I):k])\n",
111 |     "        sigma = np.std(P[(k-I):k])\n",
112 |     "        T = mu - N*sigma\n",
113 |     "        threshold = np.append(threshold,T)\n",
114 |     "        if (A[k-1] < threshold[k-1]) :\n",
115 |     "            labels = np.append(labels,1)\n",
116 |     "        else:\n",
117 |     "            labels = np.append(labels,0)\n",
118 |     "    return labels, threshold"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "t0 = time()\n",
130 |     "np.random.seed(7)\n",
131 |     "########################################################################################\n",
132 |     "# Regression\n",
133 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
134 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
135 |     "\n",
136 |     "estimators = []\n",
137 |     "estimators.append(('standardize', MinMaxScaler()))\n",
138 |     "estimators.append(('FS', SelectKBest(mutual_info_regression)))\n",
139 |     "estimators.append(('SVM', SVR()))\n",
140 |     "pipe = Pipeline(estimators)\n",
141 |     "    \n",
142 |     "y_train_scaled = (y_train - y_train.min())/(y_train.max() - y_train.min())\n",
143 |     "    \n",
144 |     "p_grid = dict(FS__k = [8, 16],\n",
145 |     "              SVM__gamma = np.logspace(-3, 0, 4),\n",
146 |     "              SVM__C = np.logspace(0, 3, 4))\n",
147 |     "    \n",
148 |     "model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)\n",
149 |     "model.fit(X_train, y_train_scaled)\n",
150 |     "    \n",
151 |     "params = model.best_params_\n",
152 |     "print(\"Best best k: %s Best gamma: %f Best C: %s\" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))\n",
153 |     "    \n",
154 |     "Y_Test_Pred_scaled = model.predict(X_test)\n",
155 |     "Y_Test_Predicted = (Y_Test_Pred_scaled*(y_train.max()-y_train.min()))+y_train.min()\n",
156 |     "    \n",
157 |     "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n",
158 |     "data_range = y_test.max() - y_test.min()\n",
159 |     "NRMSE = (rmse/data_range) * 100.0\n",
160 |     "RSQ = r2_score(y_test,Y_Test_Predicted)\n",
161 |     "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
162 |     "print(\"R-squared: %0.3f\" % RSQ)\n",
163 |     "\n",
164 |     "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n",
165 |     "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n",
166 |     "                               'Threshold':Threshold, 'Pred_Labels': Labels})\n",
167 |     "\n",
168 |     "print(\"########################################################################################\")\n",
169 |     "print(\"Confusion Matrix - testing:\")\n",
170 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
171 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
172 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
173 |     "print(\"False positive means false alarms\")\n",
174 |     "print(\"False Negative means missed faults\")\n",
175 |     "print(\"########################################################################################\")\n",
176 |     "print(\"Classification Report - testing:\")\n",
177 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
178 |     "print(\"########################################################################################\")\n",
179 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
180 |     "print(\"########################################################################################\")\n",
181 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
182 |     "print(\"########################################################################################\")\n",
183 |     "########################################################################################\n",
184 |     "\n",
185 |     "fig = plt.figure(figsize=(25,20))\n",
186 |     "ax = fig.add_subplot(1, 1, 1)\n",
187 |     "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n",
188 |     "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n",
189 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n",
190 |     "           edgecolors='y', marker='o', label=u'Actual normal data')\n",
191 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n",
192 |     "           edgecolors='y', marker='^', label=u'Actual fault data')\n",
193 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
194 |     "plt.xlabel('Data index',fontsize=30)\n",
195 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
196 |     "plt.xticks(fontsize=30)\n",
197 |     "plt.yticks(fontsize=30)\n",
198 |     "plt.legend(loc='best',fontsize=30)\n",
199 |     "plt.savefig('M0-Cond-Foul-Actual-Labels-Predictions')\n",
200 |     "\n",
201 |     "fig = plt.figure(figsize=(25,20))\n",
202 |     "ax = fig.add_subplot(1, 1, 1)\n",
203 |     "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n",
204 |     "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n",
205 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n",
206 |     "           edgecolors='y', marker='o', label=u'Predicted normal data')\n",
207 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n",
208 |     "           edgecolors='y', marker='^', label=u'Predicted fault data')\n",
209 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
210 |     "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n",
211 |     "plt.xlabel('Data index',fontsize=30)\n",
212 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
213 |     "plt.xticks(fontsize=30)\n",
214 |     "plt.yticks(fontsize=30)\n",
215 |     "plt.legend(loc='best',fontsize=30)\n",
216 |     "plt.savefig('M0-Cond-Foul-SVM-Dynamic-Threshold-Predicted-Labels')\n",
217 |     "\n",
218 |     "t1 = time()\n",
219 |     "print('Time taken for this trial %f' %(t1-t0))"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": true
227 |    },
228 |    "outputs": [],
229 |    "source": []
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "anaconda-cloud": {},
234 |   "kernelspec": {
235 |    "display_name": "Python [Root]",
236 |    "language": "python",
237 |    "name": "Python [Root]"
238 |   },
239 |   "language_info": {
240 |    "codemirror_mode": {
241 |     "name": "ipython",
242 |     "version": 3
243 |    },
244 |    "file_extension": ".py",
245 |    "mimetype": "text/x-python",
246 |    "name": "python",
247 |    "nbconvert_exporter": "python",
248 |    "pygments_lexer": "ipython3",
249 |    "version": "3.5.4"
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 1
254 | }
255 | 


--------------------------------------------------------------------------------
/Chapter6-Case-Study/SL-10/Condenser_Foul_RF_Dynamic_Threshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "from sklearn.model_selection import GridSearchCV, KFold, train_test_split\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import seaborn as sns\n",
 16 |     "sns.set(color_codes=True)\n",
 17 |     "from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \\\n",
 18 |     "                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score\n",
 19 |     "from time import time\n",
 20 |     "import scipy.stats as st\n",
 21 |     "from sklearn.feature_selection import RFE, RFECV\n",
 22 |     "from sklearn.ensemble import RandomForestRegressor\n",
 23 |     "from sklearn.pipeline import Pipeline\n",
 24 |     "import pprint as pp\n",
 25 |     "%matplotlib inline"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "Chiller_Data = pd.read_excel('Condenser_Fouling_Fault_Data.xlsx')"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]\n",
 48 |     "Chiller_Data.reset_index(drop=True, inplace=True)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "Chiller_Data['Target'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])\n",
 60 |     "Chiller_Data['Lag1'] = (Chiller_Data['Target'].shift(1))\n",
 61 |     "Chiller_Data.dropna(axis=0,inplace=True)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {
 68 |     "collapsed": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "y = Chiller_Data['Target'].as_matrix()\n",
 73 |     "True_Labels = Chiller_Data['Label'].as_matrix()\n",
 74 |     "Chiller_Data.drop(['Target','Label','Time (minutes)'], axis=1, inplace=True)\n",
 75 |     "#Feature_Names = ['Lag1','TEI','TEO','TCI','TCO','kW','FWC','FWE','TEA','TCA','TRE','PRE','TRC','PRC','TRC_sub','T_suc',\n",
 76 |     "#                'Tsh_suc','TR_dis','Tsh_dis','P_lift','TO_sump','TO_feed','PO_feed','TWCD','TWED']\n",
 77 |     "Feature_Names = list(Chiller_Data)\n",
 78 |     "X = Chiller_Data[Feature_Names].as_matrix()"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "#################################################################################################\n",
 90 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)\n",
 91 |     "TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)\n",
 92 |     "#################################################################################################"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "def calc_dyn_threshold(A, P, I, N):\n",
104 |     "    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms\n",
105 |     "    threshold = np.zeros(I-1)\n",
106 |     "    threshold[0:(I-1)] = P[0:(I-1)]\n",
107 |     "    labels = np.zeros(I-1)\n",
108 |     "    for k in np.arange(I,len(P)+1):\n",
109 |     "        mu = np.mean(P[(k-I):k])\n",
110 |     "        sigma = np.std(P[(k-I):k])\n",
111 |     "        T = mu - N*sigma\n",
112 |     "        threshold = np.append(threshold,T)\n",
113 |     "        if (A[k-1] < threshold[k-1]) :\n",
114 |     "            labels = np.append(labels,1)\n",
115 |     "        else:\n",
116 |     "            labels = np.append(labels,0)\n",
117 |     "    return labels, threshold"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "t0 = time()\n",
129 |     "np.random.seed(7)\n",
130 |     "########################################################################################\n",
131 |     "# Regression\n",
132 |     "kf = KFold(n_splits=10, shuffle=True, random_state=7)\n",
133 |     "scoring_param = make_scorer(mean_squared_error,greater_is_better=False)\n",
134 |     "\n",
135 |     "rfecv = RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=1, cv=kf, scoring=scoring_param, n_jobs=-1)\n",
136 |     "FS_model = rfecv.fit(X_train, y_train)\n",
137 |     "\n",
138 |     "ranks = FS_model.ranking_\n",
139 |     "FN =[]\n",
140 |     "for i in range(len(ranks)):\n",
141 |     "    if ranks[i] == 1:\n",
142 |     "        FN.append(Feature_Names[i])\n",
143 |     "print(FN)\n",
144 |     "\n",
145 |     "X = Chiller_Data[FN].as_matrix()\n",
146 |     "X_train, X_test = train_test_split(X, test_size=0.55, shuffle=False)\n",
147 |     "\n",
148 |     "NE = [int(i) for i in np.linspace(100,1000,num=10)]\n",
149 |     "p_grid = dict()\n",
150 |     "p_grid = dict(n_estimators = NE)\n",
151 |     "\n",
152 |     "model = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), param_grid = p_grid, scoring = scoring_param, \n",
153 |     "                     cv = kf, n_jobs=-1)\n",
154 |     "model.fit(X_train, y_train)\n",
155 |     "    \n",
156 |     "params = model.best_params_\n",
157 |     "print(\"Best Est: %s\" % (params['n_estimators']))\n",
158 |     "    \n",
159 |     "Y_Test_Predicted = model.predict(X_test)\n",
160 |     "    \n",
161 |     "rmse = np.sqrt(mean_squared_error(y_test,Y_Test_Predicted))\n",
162 |     "data_range = y_test.max() - y_test.min()\n",
163 |     "NRMSE = (rmse/data_range) * 100.0\n",
164 |     "RSQ = r2_score(y_test,Y_Test_Predicted)\n",
165 |     "print(\"Normalized RMSE: %0.3f\" % NRMSE)\n",
166 |     "print(\"R-squared: %0.3f\" % RSQ)\n",
167 |     "\n",
168 |     "Labels, Threshold = calc_dyn_threshold(y_test, Y_Test_Predicted, 2, 2)\n",
169 |     "Temp = pd.DataFrame(data={'Actual': y_test, 'Predicted':Y_Test_Predicted, 'Labels':TL_Test, \n",
170 |     "                               'Threshold':Threshold, 'Pred_Labels': Labels})\n",
171 |     "\n",
172 |     "print(\"########################################################################################\")\n",
173 |     "print(\"Confusion Matrix - testing:\")\n",
174 |     "print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))\n",
175 |     "tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()\n",
176 |     "print(\"True Negative, False Positive, False Negative, True Positive {}.\".format([tn, fp, fn, tp]))\n",
177 |     "print(\"False positive means false alarms\")\n",
178 |     "print(\"False Negative means missed faults\")\n",
179 |     "print(\"########################################################################################\")\n",
180 |     "print(\"Classification Report - testing:\")\n",
181 |     "print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))\n",
182 |     "print(\"########################################################################################\")\n",
183 |     "print(\"Accuracy - testing: %0.3f\" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))\n",
184 |     "print(\"########################################################################################\")\n",
185 |     "print(\"ROC AUC score - testing: %0.3f\" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))\n",
186 |     "print(\"########################################################################################\")\n",
187 |     "########################################################################################\n",
188 |     "\n",
189 |     "fig = plt.figure(figsize=(25,20))\n",
190 |     "ax = fig.add_subplot(1, 1, 1)\n",
191 |     "Data_0 = Temp.loc[Temp['Labels'][Temp['Labels']==0].index]\n",
192 |     "Data_1 = Temp.loc[Temp['Labels'][Temp['Labels']==1].index]\n",
193 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200,\n",
194 |     "           edgecolors='y', marker='o', label=u'Actual normal data')\n",
195 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200, \n",
196 |     "           edgecolors='y', marker='^', label=u'Actual fault data')\n",
197 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
198 |     "plt.xlabel('Data index',fontsize=30)\n",
199 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
200 |     "plt.xticks(fontsize=30)\n",
201 |     "plt.yticks(fontsize=30)\n",
202 |     "plt.legend(loc='best',fontsize=30)\n",
203 |     "plt.savefig('M2-Cond-Foul-Actual-Labels-Predictions')\n",
204 |     "\n",
205 |     "fig = plt.figure(figsize=(25,20))\n",
206 |     "ax = fig.add_subplot(1, 1, 1)\n",
207 |     "Data_0 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==0].index]\n",
208 |     "Data_1 = Temp.loc[Temp['Pred_Labels'][Temp['Pred_Labels']==1].index]\n",
209 |     "ax.scatter(list(Data_0.index), Data_0['Actual'], c=plt.cm.coolwarm(0.), s=200, \n",
210 |     "           edgecolors='y', marker='o', label=u'Predicted normal data')\n",
211 |     "ax.scatter(list(Data_1.index), Data_1['Actual'], c=plt.cm.coolwarm(1.), s=200,\n",
212 |     "           edgecolors='y', marker='^', label=u'Predicted fault data')\n",
213 |     "plt.plot(list(Temp.index), Temp['Predicted'], 'c-*', lw = 4, ms = 5, label=u'XGBoost Prediction')\n",
214 |     "plt.plot(list(Temp.index), Temp['Threshold'], 'k--', lw = 4, label=u'Dynamic threshold')\n",
215 |     "plt.xlabel('Data index',fontsize=30)\n",
216 |     "plt.ylabel('Heat exchanger efficiency of the sub-cooling section',fontsize=30)\n",
217 |     "plt.xticks(fontsize=30)\n",
218 |     "plt.yticks(fontsize=30)\n",
219 |     "plt.legend(loc='best',fontsize=30)\n",
220 |     "plt.savefig('M2-Cond-Foul-RF-Dynamic-Threshold-Predicted-Labels')\n",
221 |     "\n",
222 |     "print(FN,(model.best_estimator_.feature_importances_))\n",
223 |     "\n",
224 |     "t1 = time()\n",
225 |     "print('Time taken for this trial %f' %(t1-t0))"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {
232 |     "collapsed": true
233 |    },
234 |    "outputs": [],
235 |    "source": []
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "anaconda-cloud": {},
240 |   "kernelspec": {
241 |    "display_name": "Python [Root]",
242 |    "language": "python",
243 |    "name": "Python [Root]"
244 |   },
245 |   "language_info": {
246 |    "codemirror_mode": {
247 |     "name": "ipython",
248 |     "version": 3
249 |    },
250 |    "file_extension": ".py",
251 |    "mimetype": "text/x-python",
252 |    "name": "python",
253 |    "nbconvert_exporter": "python",
254 |    "pygments_lexer": "ipython3",
255 |    "version": "3.5.4"
256 |   }
257 |  },
258 |  "nbformat": 4,
259 |  "nbformat_minor": 1
260 | }
261 | 


--------------------------------------------------------------------------------