├── README.md ├── build_model.ipynb ├── data_generator.ipynb ├── data_processing.ipynb ├── main.ipynb └── utils.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # EMD_LSTM 2 | 3 | apply EMD_LSTM to forecast the energy consumption of the residential building 4 | -------------------------------------------------------------------------------- /build_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | }, 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "importing Jupyter notebook from utils.ipynb\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "import import_ipynb\n", 25 | "import os\n", 26 | "import math\n", 27 | "import numpy as np\n", 28 | "import datetime as dt\n", 29 | "from numpy import newaxis\n", 30 | "from keras.layers import Dense, Activation, Dropout, LSTM\n", 31 | "from keras.models import Sequential, load_model\n", 32 | "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", 33 | "from keras.callbacks import TensorBoard\n", 34 | "from utils import Timer" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "class Model():\n", 44 | " \"\"\"A class for an building and inferencing an lstm model\"\"\"\n", 45 | " def __init__(self):\n", 46 | " self.model = Sequential()\n", 47 | " def load_model(self, filepath):\n", 48 | " print('[Model] Loading model from file %s' % filepath)\n", 49 | " self.model = load_model(filepath)\n", 50 | " def build_model(self, configs):\n", 51 | " timer = Timer()\n", 52 | " timer.start()\n", 53 | " for layer in configs['model']['layers']:\n", 54 | " neurons = layer['neurons'] if 'neurons' in layer else None\n", 55 | " dropout_rate = layer['rate'] if 'rate' in layer else None\n", 56 | " activation = layer['activation'] if 'activation' in layer else None\n", 57 | " return_seq = layer['return_seq'] if 'return_seq' in layer else None\n", 58 | " input_timesteps = layer['input_timesteps'] if 'input_timesteps' in layer else None\n", 59 | " input_dim = layer['input_dim'] if 'input_dim' in layer else None\n", 60 | " if layer['type'] == 'dense':\n", 61 | " self.model.add(Dense(neurons, activation=activation))\n", 62 | " if layer['type'] == 'lstm':\n", 63 | " self.model.add(LSTM(neurons, input_shape=(input_timesteps, input_dim),return_sequences=return_seq))\n", 64 | " if layer['type'] == 'dropout':\n", 65 | " self.model.add(Dropout(dropout_rate))\n", 66 | " self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer'])\n", 67 | " print('[Model] Model Compiled')\n", 68 | " timer.stop()\n", 69 | " \n", 70 | " def model_to_json(self,save_dir):\n", 71 | " model_json = self.model.to_json()\n", 72 | " fname = os.path.join(save_dir, 'model.json')\n", 73 | " with open(fname, \"w\") as json_file:\n", 74 | " json_file.write(model_json)\n", 75 | " print('[Model] Serialize model to JSON at %s' % fname)\n", 76 | " \n", 77 | " def train(self, x, y, epochs, batch_size, save_dir):\n", 78 | " timer = Timer()\n", 79 | " timer.start()\n", 80 | " print('[Model] Training Started')\n", 81 | " print('[Model] %s epochs, %s batch size' % (epochs, batch_size))\n", 82 | " \n", 83 | " save_fname = os.path.join(save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs)))\n", 84 | " callbacks = [\n", 85 | " EarlyStopping(monitor='val_loss', patience=2),\n", 86 | " ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True)\n", 87 | " ]\n", 88 | " self.model.fit(\n", 89 | " x,\n", 90 | " y,\n", 91 | " epochs=epochs,\n", 92 | " batch_size=batch_size,\n", 93 | " callbacks=callbacks\n", 94 | " )\n", 95 | " self.model.save(save_fname)\n", 96 | " print('[Model] Training Completed. Model saved as %s' % save_fname)\n", 97 | " timer.stop()\n", 98 | " \n", 99 | " def train_generator(self,data_gen,val_gen,epochs, batch_size,steps_per_epoch,log_fname,save_fname):\n", 100 | " timer = Timer()\n", 101 | " timer.start()\n", 102 | " print('[Model] Training Started')\n", 103 | " print('[Model] %s epochs, %s batch size, %s batches per epoch' % (epochs, batch_size,steps_per_epoch))\n", 104 | "\n", 105 | " callbacks = [\n", 106 | " ModelCheckpoint(filepath=save_fname, monitor='val_loss', save_best_only=True),\n", 107 | " TensorBoard(log_dir=log_fname, histogram_freq=0,write_graph=True, write_images=True)\n", 108 | " ]\n", 109 | " self.model.fit_generator(\n", 110 | " data_gen,\n", 111 | " validation_data=val_gen,\n", 112 | " validation_steps=1,\n", 113 | " steps_per_epoch=steps_per_epoch,\n", 114 | " epochs=epochs,\n", 115 | " callbacks=callbacks,\n", 116 | "# use_multiprocessing = True,\n", 117 | " workers=1\n", 118 | " )\n", 119 | " \n", 120 | " print('[Model] Training Completed. Model saved as %s' % save_fname)\n", 121 | " timer.stop()\n", 122 | " \n", 123 | " def predict_point_by_point(self, data):\n", 124 | " #Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time\n", 125 | " print('[Model] Predicting Point-by-Point...')\n", 126 | " predicted = self.model.predict(data)\n", 127 | "# print('before the predicted size is', predicted.shape )\n", 128 | " predicted = np.reshape(predicted, (predicted.size,))\n", 129 | " return predicted\n", 130 | " \n", 131 | " def evaluate(self, x_test, y_test):\n", 132 | " score = self.model.evaluate(x=x_test, y=y_test)\n", 133 | " return score\n", 134 | " \n", 135 | " def predict_sequences_multiple(self, data, window_size, prediction_len):\n", 136 | " #Predict sequence of 50 steps before shifting prediction run forward by 50 steps\n", 137 | " print('[Model] Predicting Sequences Multiple...')\n", 138 | " prediction_seqs = []\n", 139 | "# print('length of test data',len(data), 'prediction length:', prediction_len)\n", 140 | " for i in range(int(len(data)/prediction_len)):\n", 141 | " curr_frame = data[i*prediction_len]\n", 142 | " \n", 143 | " predicted = []\n", 144 | " for j in range(prediction_len):\n", 145 | "# print(self.model.predict(curr_frame[newaxis,:,:]))\n", 146 | "# print(self.model.predict(curr_frame[newaxis,:,:])[0,0])\n", 147 | " predicted.append(self.model.predict(curr_frame[newaxis,:,:])[0,0])\n", 148 | " \n", 149 | " curr_frame = curr_frame[1:]\n", 150 | " curr_frame = np.insert(curr_frame, [window_size-2], predicted[-1], axis=0)\n", 151 | " prediction_seqs.append(predicted)\n", 152 | " return prediction_seqs\n", 153 | " \n", 154 | " def predict_sequence_full(self, data, window_size):\n", 155 | " #Shift the window by 1 new prediction each time, re-run predictions on new window\n", 156 | " print('[Model] Predicting Sequences Full...')\n", 157 | " curr_frame = data[0]\n", 158 | " predicted = []\n", 159 | " for i in range(len(data)):\n", 160 | " predicted.append(self.model.predict(curr_frame[newaxis,:,:])[0,0])\n", 161 | " curr_frame = curr_frame[1:]\n", 162 | " curr_frame = np.insert(curr_frame, [window_size-2], predicted[-1], axis=0)\n", 163 | " return predicted" 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "EMD_LSTM", 170 | "language": "python", 171 | "name": "emd_lstm" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.7.3" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /data_generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import numpy as np\n", 18 | "from keras.utils import Sequence" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "class DataLoaderSequence(Sequence):\n", 28 | "\n", 29 | " def __init__(self, x_set, y_set, batch_size):\n", 30 | " self.x, self.y = x_set, y_set\n", 31 | " self.batch_size = batch_size\n", 32 | "\n", 33 | " def __len__(self):\n", 34 | " return int(np.ceil(len(self.x) / float(self.batch_size)))\n", 35 | "\n", 36 | " def __getitem__(self, idx):\n", 37 | " \n", 38 | " batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size,:,:]\n", 39 | " batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size,:]\n", 40 | "\n", 41 | " return np.array(batch_x), np.array(batch_y)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [] 50 | } 51 | ], 52 | "metadata": { 53 | "kernelspec": { 54 | "display_name": "EMD_LSTM", 55 | "language": "python", 56 | "name": "emd_lstm" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 3 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython3", 68 | "version": "3.7.3" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 2 73 | } 74 | -------------------------------------------------------------------------------- /data_processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 356, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import import_ipynb\n", 10 | "\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import os\n", 14 | "from sklearn import preprocessing\n", 15 | "import json\n", 16 | "from data_generator import DataLoaderSequence\n", 17 | "import math\n", 18 | "import random\n", 19 | "from PyEMD import EMD" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 357, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "class DataLoader():\n", 29 | " \"\"\"A class for loading and transforming data for the EMD_lstm model\"\"\"\n", 30 | "\n", 31 | " def __init__(self, filename, split1, split2, cols, pre_len, input_timesteps,seq_len,**EMD_para):\n", 32 | " \"\"\"\n", 33 | " :param filename: the name of the file contains the data, type: .csv\n", 34 | " :param split1: split the data into 2 parts: training, (validation, test)\n", 35 | " :param split2: split the data into 2 parts: validation, test\n", 36 | " |-------------------------------------------|-------------|--------------|\n", 37 | " split1(0.7) split2(0.85) \n", 38 | " :param cols: the features \n", 39 | " :param pre_len: the prediction length (24, 48....)\n", 40 | " :param input_timesteps: the length of looking back (1 month or 1 year), unit: hours\n", 41 | " :param seq_len: the sum of input_timesteps and pre_len\n", 42 | " :param **EMD_para: if apply EMD before LSTM\n", 43 | " \"\"\"\n", 44 | " \n", 45 | " self.dataframe = pd.read_csv(filename,sep=',')\n", 46 | " if not isinstance(self.dataframe.index, pd.DatetimeIndex):\n", 47 | " self.dataframe['Date_Time'] = pd.to_datetime(self.dataframe['Date_Time'])\n", 48 | " self.dataframe = self.dataframe.set_index('Date_Time')\n", 49 | " \n", 50 | " self.cols = cols\n", 51 | " self.split1 = split1\n", 52 | " self.split2 = split2\n", 53 | " self.len_train_windows = None\n", 54 | " self.pre_len = pre_len\n", 55 | " self.input_timesteps = input_timesteps\n", 56 | " self.seq_len = seq_len\n", 57 | " print('the input cols are:', self.cols)\n", 58 | " self.Normalization(**EMD_para)\n", 59 | " \n", 60 | " def scale_EMD(self, activate_EMD=False):\n", 61 | " '''\n", 62 | " using log to scale the data in order to emphasize the peak\n", 63 | " \n", 64 | " param activate_EMD: decide whether use EMD to do preprocessing\n", 65 | " '''\n", 66 | " for col in self.cols:\n", 67 | " if col == 'Consumption':\n", 68 | " self.dataframe['Consumption'] = self.dataframe.set_index('Consumption').index.map(lambda x: math.log(x))\n", 69 | " print('scaling Consumption is done!')\n", 70 | " \n", 71 | " if activate_EMD==True:\n", 72 | "\n", 73 | " self.IMFs = EMD().emd(self.dataframe['Consumption'].values)\n", 74 | " print('the signal is decomposed into '+ str(self.IMFs.shape[0]) +' parts')\n", 75 | " \n", 76 | " self.df_names_IMF = locals()\n", 77 | " \n", 78 | " for ind, IMF in enumerate(self.IMFs):\n", 79 | " \n", 80 | " IMF_name = 'IMF'+str(ind)+'_consumption'\n", 81 | " data={IMF_name:self.IMFs[ind]}\n", 82 | " IMF_i = pd.DataFrame(data=data)\n", 83 | " self.df_names_IMF['IMF'+str(ind)] = pd.concat([IMF_i[IMF_name], self.dataframe.get(self.cols[1:])],axis=1)\n", 84 | "\n", 85 | "\n", 86 | " def Normalization(self, **EMD_para):\n", 87 | " '''\n", 88 | " call function scale_EMD(), decide whether use EMD to do preprocessing\n", 89 | " normalize the training data and appy the same scale to validation and test data\n", 90 | " '''\n", 91 | " \n", 92 | " i_split1 = int(len(self.dataframe) * self.split1)\n", 93 | " i_split2 = int(len(self.dataframe) * self.split2)\n", 94 | " \n", 95 | " if len(EMD_para) ==0:\n", 96 | " self.scale_EMD()\n", 97 | " \n", 98 | " self.data_train_original = self.dataframe.get(self.cols)[:i_split1]\n", 99 | " self.data_val_original = self.dataframe.get(self.cols)[i_split1:i_split2]\n", 100 | " self.data_test_original = self.dataframe.get(self.cols)[i_split2:]\n", 101 | " \n", 102 | " else:\n", 103 | " self.scale_EMD(activate_EMD=True)\n", 104 | " IMF_number = EMD_para['IMF_num']\n", 105 | " \n", 106 | " print('processing the data of IM'+ str(IMF_number))\n", 107 | " \n", 108 | " if IMF_number in range(self.IMFs.shape[0]):\n", 109 | " self.data_train_original = self.df_names_IMF['IMF'+str(IMF_number)][:i_split1]\n", 110 | " self.data_val_original = self.df_names_IMF['IMF'+str(IMF_number)][i_split1:i_split2]\n", 111 | " self.data_test_original = self.df_names_IMF['IMF'+str(IMF_number)][i_split2:]\n", 112 | " else:\n", 113 | " print(\"Oops!IMF_number was no valid number. it must between 0 and \"+str(self.IMFs.shape[0]-1))\n", 114 | "\n", 115 | " self.min_max_scaler = preprocessing.MinMaxScaler().fit(self.data_train_original.values)\n", 116 | "\n", 117 | " self.data_train = self.min_max_scaler.transform(self.data_train_original.values)\n", 118 | " self.data_val = self.min_max_scaler.transform(self.data_val_original.values)\n", 119 | " self.data_test = self.min_max_scaler.transform(self.data_test_original.values)\n", 120 | "\n", 121 | " self.len_train = len(self.data_train_original)\n", 122 | " self.len_val = len(self.data_val_original)\n", 123 | " self.len_test = len(self.data_test_original)\n", 124 | " \n", 125 | " def get_pre_time(self):\n", 126 | " data_windows = []\n", 127 | "\n", 128 | " for i in range((self.len_test-self.input_timesteps)//self.pre_len):\n", 129 | " data_windows.append(self.data_test_original.index[i*self.pre_len:i*self.pre_len+self.seq_len])\n", 130 | " \n", 131 | " pre_time = np.array([p[self.input_timesteps:] for p in data_windows])\n", 132 | " \n", 133 | " return pre_time\n", 134 | " \n", 135 | " def get_test_data(self):\n", 136 | " '''\n", 137 | " Create x, y test data windows\n", 138 | " Warning: batch method, not generative, make sure you have enough memory to\n", 139 | " load data, otherwise reduce size of the training split.\n", 140 | " '''\n", 141 | " data_windows = []\n", 142 | "\n", 143 | " for i in range((self.len_test-self.input_timesteps)//self.pre_len):\n", 144 | " data_windows.append(self.data_test[i*self.pre_len:i*self.pre_len+self.seq_len])\n", 145 | "\n", 146 | " x = np.array([p[:self.input_timesteps,:] for p in data_windows])\n", 147 | " y = np.array([p[self.input_timesteps:,0] for p in data_windows])\n", 148 | " return x,y\n", 149 | " \n", 150 | " def get_train_data(self):\n", 151 | " '''\n", 152 | " Create x, y train data windows\n", 153 | " Warning: batch method, not generative, make sure you have enough memory to\n", 154 | " load data, otherwise use generate_training_window() method.\n", 155 | " '''\n", 156 | " train_x = []\n", 157 | " train_y = []\n", 158 | " for i in range(self.len_train-self.seq_len):\n", 159 | " data_window = self.data_train[i:i+self.seq_len]\n", 160 | " train_x.append(data_window[:self.input_timesteps,:])\n", 161 | " train_y.append(data_window[self.input_timesteps:,0])\n", 162 | " \n", 163 | " train_x = np.array(train_x)\n", 164 | " train_y = np.array(train_y)\n", 165 | " \n", 166 | " sfl = list(range(len(train_x)))\n", 167 | " random.shuffle(sfl)\n", 168 | " train_x = train_x[sfl]\n", 169 | " train_y = train_y[sfl]\n", 170 | " \n", 171 | " return train_x, train_y\n", 172 | " \n", 173 | " def get_val_data(self):\n", 174 | " val_x = []\n", 175 | " val_y = []\n", 176 | " for i in range(self.len_val-self.seq_len):\n", 177 | " data_window = self.data_val[i:i+self.seq_len]\n", 178 | " val_x.append(data_window[:self.input_timesteps,:])\n", 179 | " val_y.append(data_window[self.input_timesteps:,0])\n", 180 | " val_x = np.array(val_x)\n", 181 | " val_y = np.array(val_y)\n", 182 | " \n", 183 | " sfl = list(range(len(val_x)))\n", 184 | " random.shuffle(sfl)\n", 185 | " val_x = val_x[sfl]\n", 186 | " val_y = val_y[sfl]\n", 187 | " \n", 188 | " return val_x, val_y\n", 189 | " \n", 190 | " def training_batch_generator(self, batch_size):\n", 191 | " train_x, train_y = self.get_train_data()\n", 192 | " return DataLoaderSequence(train_x,train_y, batch_size)\n", 193 | " \n", 194 | " def val_batch_generator(self, batch_size):\n", 195 | " val_x,val_y = self.get_val_data()\n", 196 | " return DataLoaderSequence(val_x, val_y, batch_size)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 360, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "the input cols are: ['Consumption', 'hour', 'holiday_indicator']\n", 209 | "scaling Consumption is done!\n", 210 | "the signal is decomposed into 11 parts\n", 211 | "processing the data of IM10\n" 212 | ] 213 | }, 214 | { 215 | "name": "stderr", 216 | "output_type": "stream", 217 | "text": [ 218 | "/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/data.py:364: RuntimeWarning: All-NaN slice encountered\n", 219 | " data_min = np.nanmin(X, axis=0)\n", 220 | "/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/data.py:365: RuntimeWarning: All-NaN slice encountered\n", 221 | " data_max = np.nanmax(X, axis=0)\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "# configs = json.load(open('config.json', 'r'))\n", 227 | "# data = DataLoader(\n", 228 | "# filename=os.path.join('data', configs['data']['filename']),\n", 229 | "# split1=configs['data']['train_test_split1'],\n", 230 | "# split2=configs['data']['train_test_split2'],\n", 231 | "# cols=configs['data']['columns'],\n", 232 | "# pre_len=configs['model']['layers'][4]['neurons'],\n", 233 | "# input_timesteps=configs['model']['layers'][0]['input_timesteps'],\n", 234 | "# seq_len = configs['data']['sequence_length'],\n", 235 | "# IMF_num = 10\n", 236 | "# )" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 361, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# data.dataframe" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | } 255 | ], 256 | "metadata": { 257 | "kernelspec": { 258 | "display_name": "Python 3", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.7.3" 273 | }, 274 | "toc": { 275 | "base_numbering": 1, 276 | "nav_menu": {}, 277 | "number_sections": true, 278 | "sideBar": true, 279 | "skip_h1_title": false, 280 | "title_cell": "Table of Contents", 281 | "title_sidebar": "Contents", 282 | "toc_cell": false, 283 | "toc_position": {}, 284 | "toc_section_display": true, 285 | "toc_window_display": false 286 | } 287 | }, 288 | "nbformat": 4, 289 | "nbformat_minor": 2 290 | } 291 | -------------------------------------------------------------------------------- /main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/html": [ 11 | " \n", 26 | " " 27 | ] 28 | }, 29 | "metadata": {}, 30 | "output_type": "display_data" 31 | } 32 | ], 33 | "source": [ 34 | "import os\n", 35 | "import json\n", 36 | "import time\n", 37 | "import datetime as dt\n", 38 | "import math\n", 39 | "import import_ipynb\n", 40 | "from data_processing import DataLoader\n", 41 | "from build_model import Model\n", 42 | "import pandas as pd\n", 43 | "import numpy as np\n", 44 | "import keras\n", 45 | "import tensorflow\n", 46 | "# from plotly.offline import iplot\n", 47 | "import plotly as py\n", 48 | "import plotly.graph_objs as go\n", 49 | "py.offline.init_notebook_mode(connected=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def plot_results(predicted_data, true_data,pre_time):\n", 59 | " pre_time = pd.to_datetime(pre_time)\n", 60 | "\n", 61 | " trace1 = go.Scatter(x=pre_time,\n", 62 | " y=true_data,\n", 63 | " mode='lines',\n", 64 | " name='True',\n", 65 | " hoverinfo='name',\n", 66 | " line=dict(\n", 67 | " shape='spline'\n", 68 | " )\n", 69 | " )\n", 70 | " \n", 71 | " trace2 = go.Scatter(x=pre_time,\n", 72 | " y=predicted_data,\n", 73 | " mode='lines',\n", 74 | " name='Prediction',\n", 75 | " hoverinfo='name',\n", 76 | " line=dict(\n", 77 | " shape='spline'\n", 78 | " )\n", 79 | " )\n", 80 | " data = [trace1, trace2]\n", 81 | " layout = go.Layout(title = 'Prediction & True',\n", 82 | "# yaxis = dict(title = '%'),\n", 83 | " xaxis = dict(title = 'timestamp')\n", 84 | " )\n", 85 | " fig = go.Figure(data=data, layout=layout)\n", 86 | " py.offline.plot(fig)\n", 87 | " " 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "configs = json.load(open('config.json', 'r'))\n", 97 | "if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])\n", 98 | "print('import dataset:', configs['data']['filename'])\n", 99 | "data = DataLoader(\n", 100 | " filename=os.path.join('data', configs['data']['filename']),\n", 101 | " split1=configs['data']['train_test_split1'],\n", 102 | " split2=configs['data']['train_test_split2'],\n", 103 | " cols=configs['data']['columns'],\n", 104 | " pre_len=24,\n", 105 | " input_timesteps=configs['model']['layers'][0]['input_timesteps'],\n", 106 | " seq_len = configs['data']['sequence_length']\n", 107 | ")\n", 108 | "configs['model']['layers'][0]['input_timesteps']\n", 109 | "train_x, train_y = data.get_train_data()\n", 110 | "train_x.shape\n", 111 | "val_x,val_y = data.get_val_data()\n", 112 | "# train_x, train_y,val_x,val_y = data.get_train_val_data()\n", 113 | "# train_x.shape" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "x_test, y_test = data.get_test_data()\n", 123 | "pre_time = data.get_pre_time()\n", 124 | "\n", 125 | "steps_per_epoch = math.ceil((len(train_x) - configs['data']['sequence_length']) / \n", 126 | " configs['training']['batch_size'])\n", 127 | "save_dir = configs['model']['save_dir']\n", 128 | "# pre_time\n", 129 | "y_test.shape" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "score = {}\n", 139 | "models_dir = []\n", 140 | "for i in range(1):\n", 141 | " print('Starting training %s Model'%(i+1))\n", 142 | " model = Model()\n", 143 | " model.build_model(configs)\n", 144 | " \n", 145 | " time_now = dt.datetime.now().strftime('%Y%m%d-%H%M%S')\n", 146 | " save_dir = configs['model']['save_dir']\n", 147 | " save_dir = os.path.join(save_dir, '%s-e%s'%(time_now,str(i)))\n", 148 | " models_dir.append(save_dir)\n", 149 | " os.makedirs(save_dir)\n", 150 | " \n", 151 | " save_fname = os.path.join(save_dir, 'e%s.h5'%(str(i)))\n", 152 | " log_fname = save_dir\n", 153 | " \n", 154 | " model.model_to_json(save_dir)\n", 155 | " \n", 156 | " fname = os.path.join(save_dir, 'model_information.json')\n", 157 | " \n", 158 | " model.train_generator(\n", 159 | " data_gen=data.training_batch_generator(\n", 160 | " batch_size=configs['training']['batch_size']\n", 161 | " ),\n", 162 | " val_gen=data.val_batch_generator(\n", 163 | " batch_size=configs['training']['batch_size']\n", 164 | " ),\n", 165 | " epochs=configs['training']['epochs'],\n", 166 | " batch_size=configs['training']['batch_size'],\n", 167 | " steps_per_epoch=steps_per_epoch,\n", 168 | " log_fname=log_fname,\n", 169 | " save_fname=save_fname\n", 170 | " )\n", 171 | " \n", 172 | " score_ = model.evaluate(val_x, val_y)\n", 173 | " print(\"loss:\", score_)\n", 174 | " score[save_fname] = score_\n", 175 | " \n", 176 | " \n", 177 | " with open(fname, \"w\") as to:\n", 178 | " with open(\"./config.json\",'r') as original:\n", 179 | " m = json.loads(original.read())\n", 180 | " m['loss']=score_\n", 181 | " json_str=json.dumps(m)\n", 182 | " to.write(json_str)\n", 183 | " \n", 184 | " print('[Model] Store model_information at %s' % fname)\n", 185 | " \n", 186 | "filename_best = min(score,key=score.get)\n", 187 | "print(filename_best)\n", 188 | "model.load_model(filename_best)\n", 189 | "predictions = model.predict_point_by_point(x_test)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "plot_results(predicted_data=predictions, \n", 199 | " true_data=np.reshape(y_test, (y_test.size,)),\n", 200 | " pre_time=np.reshape(pre_time,(pre_time.size,)))" 201 | ] 202 | } 203 | ], 204 | "metadata": { 205 | "kernelspec": { 206 | "display_name": "EMD_LSTM", 207 | "language": "python", 208 | "name": "emd_lstm" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.7.3" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /utils.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import datetime as dt\n", 10 | "\n", 11 | "class Timer():\n", 12 | " def __init__(self):\n", 13 | " self.start_dt = None\n", 14 | " \n", 15 | " def start(self):\n", 16 | " self.start_dt = dt.datetime.now()\n", 17 | " \n", 18 | " def stop(self):\n", 19 | " end_dt = dt.datetime.now()\n", 20 | " print('Time taken: %s' % (end_dt - self.start_dt))" 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "EMD_LSTM", 27 | "language": "python", 28 | "name": "emd_lstm" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 3 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython3", 40 | "version": "3.7.3" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 2 45 | } 46 | --------------------------------------------------------------------------------