├── GA.ipynb
└── README.md


/GA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from sklearn.metrics import mean_squared_error\n",
 12 |     "from sklearn.model_selection import train_test_split as split\n",
 13 |     "\n",
 14 |     "from keras.layers import LSTM, Input, Dense\n",
 15 |     "from keras.models import Model\n",
 16 |     "\n",
 17 |     "from deap import base, creator, tools, algorithms\n",
 18 |     "from scipy.stats import bernoulli\n",
 19 |     "from bitstring import BitArray\n",
 20 |     "\n",
 21 |     "np.random.seed(1120)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stdout",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "[[0.045]\n",
 34 |       " [0.085]\n",
 35 |       " [0.02 ]\n",
 36 |       " [0.06 ]\n",
 37 |       " [0.045]\n",
 38 |       " [0.035]\n",
 39 |       " [0.005]\n",
 40 |       " [0.   ]\n",
 41 |       " [0.   ]\n",
 42 |       " [0.01 ]]\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "data = pd.read_csv('./all/train.csv')\n",
 48 |     "data = np.reshape(np.array(data['wp1']),(len(data['wp1']),1))\n",
 49 |     "print(data[:10])\n",
 50 |     "train_data = data[0:17257]\n",
 51 |     "test_data = data[17257:]"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 4,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "[[[0.045]\n",
 64 |       "  [0.085]\n",
 65 |       "  [0.02 ]]\n",
 66 |       "\n",
 67 |       " [[0.085]\n",
 68 |       "  [0.02 ]\n",
 69 |       "  [0.06 ]]\n",
 70 |       "\n",
 71 |       " [[0.02 ]\n",
 72 |       "  [0.06 ]\n",
 73 |       "  [0.045]]\n",
 74 |       "\n",
 75 |       " ...\n",
 76 |       "\n",
 77 |       " [[0.09 ]\n",
 78 |       "  [0.11 ]\n",
 79 |       "  [0.11 ]]\n",
 80 |       "\n",
 81 |       " [[0.11 ]\n",
 82 |       "  [0.11 ]\n",
 83 |       "  [0.095]]\n",
 84 |       "\n",
 85 |       " [[0.11 ]\n",
 86 |       "  [0.095]\n",
 87 |       "  [0.14 ]]]\n",
 88 |       "[[0.06 ]\n",
 89 |       " [0.045]\n",
 90 |       " [0.035]\n",
 91 |       " ...\n",
 92 |       " [0.095]\n",
 93 |       " [0.14 ]\n",
 94 |       " [0.15 ]]\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "def prepare_dataset(data, window_size):\n",
100 |     "    X, Y = np.empty((0,window_size)), np.empty((0))\n",
101 |     "    for i in range(len(data)-window_size-1):\n",
102 |     "        X = np.vstack([X,data[i:(i + window_size),0]])\n",
103 |     "        Y = np.append(Y,data[i + window_size,0])   \n",
104 |     "    X = np.reshape(X,(len(X),window_size,1))\n",
105 |     "    Y = np.reshape(Y,(len(Y),1))\n",
106 |     "    return X, Y\n",
107 |     "\n",
108 |     "X_train,y_train = prepare_dataset(train_data,3)\n",
109 |     "print(X_train)\n",
110 |     "print(y_train)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 7,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "def train_evaluate(ga_individual_solution):  \n",
120 |     "    \n",
121 |     "    # Decode the Genetic Algorithm solution to get the window size and number of bits\n",
122 |     "    window_size_bits = BitArray(ga_individual_solution[0:6])\n",
123 |     "    num_units_bits = BitArray(ga_individual_solution[6:]) \n",
124 |     "    window_size = window_size_bits.uint\n",
125 |     "    num_of_units = num_units_bits.uint\n",
126 |     "    print('\\nWindow Size: ', window_size, ', Num of Units: ', num_of_units)\n",
127 |     "    \n",
128 |     "    # Return fitness score of 100 if window_size or num_unit is zero\n",
129 |     "    if window_size == 0 or num_of_units == 0:\n",
130 |     "        return 100, \n",
131 |     "    \n",
132 |     "    # Segment the train_data based on new window_size; \n",
133 |     "    # Split the dataset into train set(80) and validation set(20)\n",
134 |     "    X_data,Y_data = prepare_dataset(train_data,window_size)\n",
135 |     "    X_train, X_val, y_train, y_val = split(X_data, Y_data, test_size = 0.20, random_state = 1120)\n",
136 |     "    \n",
137 |     "    # Design an LSTM model to train on training data and predict on validation data\n",
138 |     "    input_ph = Input(shape=(window_size,1))\n",
139 |     "    x = LSTM(num_of_units, input_shape=(window_size,1))(input_ph)\n",
140 |     "    predicted_values = Dense(1, activation='tanh')(x)\n",
141 |     "    model = Model(inputs=input_ph, outputs=predicted_values)\n",
142 |     "    model.compile(optimizer='adam',loss='mean_squared_error')\n",
143 |     "    model.fit(X_train, y_train, epochs=5, batch_size=20,shuffle=True)\n",
144 |     "    y_pred = model.predict(X_val)\n",
145 |     "    \n",
146 |     "    # Calculate the RMSE score as fitness score for GA\n",
147 |     "    rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n",
148 |     "    print('Validation RMSE: ', rmse,'\\n')\n",
149 |     "    \n",
150 |     "    return rmse,"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 8,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "name": "stderr",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "/home/sunandini/.local/lib/python3.5/site-packages/deap/creator.py:141: RuntimeWarning: A class named 'FitnessMax' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.\n",
163 |       "  RuntimeWarning)\n",
164 |       "/home/sunandini/.local/lib/python3.5/site-packages/deap/creator.py:141: RuntimeWarning: A class named 'Individual' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.\n",
165 |       "  RuntimeWarning)\n"
166 |      ]
167 |     },
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "\n",
173 |       "Window Size:  34 , Num of Units:  9\n",
174 |       "Epoch 1/5\n",
175 |       "13777/13777 [==============================] - 13s 934us/step - loss: 0.0391\n",
176 |       "Epoch 2/5\n",
177 |       "13777/13777 [==============================] - 11s 763us/step - loss: 0.0114\n",
178 |       "Epoch 3/5\n",
179 |       "13777/13777 [==============================] - 11s 770us/step - loss: 0.0080\n",
180 |       "Epoch 4/5\n",
181 |       "13777/13777 [==============================] - 10s 746us/step - loss: 0.0065\n",
182 |       "Epoch 5/5\n",
183 |       "13777/13777 [==============================] - 10s 749us/step - loss: 0.0059\n",
184 |       "Validation RMSE:  0.07639846236008416 \n",
185 |       "\n",
186 |       "\n",
187 |       "Window Size:  28 , Num of Units:  2\n",
188 |       "Epoch 1/5\n",
189 |       "13782/13782 [==============================] - 9s 650us/step - loss: 0.0234\n",
190 |       "Epoch 2/5\n",
191 |       "13782/13782 [==============================] - 9s 628us/step - loss: 0.0108\n",
192 |       "Epoch 3/5\n",
193 |       "13782/13782 [==============================] - 9s 629us/step - loss: 0.0078\n",
194 |       "Epoch 4/5\n",
195 |       "13782/13782 [==============================] - 9s 678us/step - loss: 0.0065\n",
196 |       "Epoch 5/5\n",
197 |       "13782/13782 [==============================] - 9s 688us/step - loss: 0.0060\n",
198 |       "Validation RMSE:  0.0789012736107829 \n",
199 |       "\n",
200 |       "\n",
201 |       "Window Size:  14 , Num of Units:  8\n",
202 |       "Epoch 1/5\n",
203 |       "13793/13793 [==============================] - 5s 394us/step - loss: 0.0171\n",
204 |       "Epoch 2/5\n",
205 |       "13793/13793 [==============================] - 5s 370us/step - loss: 0.0085\n",
206 |       "Epoch 3/5\n",
207 |       "13793/13793 [==============================] - 5s 363us/step - loss: 0.0066\n",
208 |       "Epoch 4/5\n",
209 |       "13793/13793 [==============================] - 5s 356us/step - loss: 0.0061\n",
210 |       "Epoch 5/5\n",
211 |       "13793/13793 [==============================] - 5s 361us/step - loss: 0.0060\n",
212 |       "Validation RMSE:  0.07789992243052019 \n",
213 |       "\n",
214 |       "\n",
215 |       "Window Size:  44 , Num of Units:  3\n",
216 |       "Epoch 1/5\n",
217 |       "13769/13769 [==============================] - 13s 980us/step - loss: 0.1397\n",
218 |       "Epoch 2/5\n",
219 |       "13769/13769 [==============================] - 13s 934us/step - loss: 0.0388\n",
220 |       "Epoch 3/5\n",
221 |       "13769/13769 [==============================] - 13s 932us/step - loss: 0.0168\n",
222 |       "Epoch 4/5\n",
223 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0123\n",
224 |       "Epoch 5/5\n",
225 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0098\n",
226 |       "Validation RMSE:  0.09205666537789764 \n",
227 |       "\n",
228 |       "\n",
229 |       "Window Size:  28 , Num of Units:  2\n",
230 |       "Epoch 1/5\n",
231 |       "13782/13782 [==============================] - 10s 700us/step - loss: 0.0487\n",
232 |       "Epoch 2/5\n",
233 |       "13782/13782 [==============================] - 9s 662us/step - loss: 0.0198\n",
234 |       "Epoch 3/5\n",
235 |       "13782/13782 [==============================] - 9s 665us/step - loss: 0.0134\n",
236 |       "Epoch 4/5\n",
237 |       "13782/13782 [==============================] - 9s 672us/step - loss: 0.0106\n",
238 |       "Epoch 5/5\n",
239 |       "13782/13782 [==============================] - 9s 663us/step - loss: 0.0089\n",
240 |       "Validation RMSE:  0.0903745636888513 \n",
241 |       "\n",
242 |       "\n",
243 |       "Window Size:  32 , Num of Units:  3\n",
244 |       "Epoch 1/5\n",
245 |       "13779/13779 [==============================] - 11s 780us/step - loss: 0.0481\n",
246 |       "Epoch 2/5\n",
247 |       "13779/13779 [==============================] - 10s 734us/step - loss: 0.0150\n",
248 |       "Epoch 3/5\n",
249 |       "13779/13779 [==============================] - 10s 736us/step - loss: 0.0100\n",
250 |       "Epoch 4/5\n",
251 |       "13779/13779 [==============================] - 10s 737us/step - loss: 0.0075\n",
252 |       "Epoch 5/5\n",
253 |       "13779/13779 [==============================] - 10s 745us/step - loss: 0.0064\n",
254 |       "Validation RMSE:  0.07735020071476466 \n",
255 |       "\n",
256 |       "\n",
257 |       "Window Size:  3 , Num of Units:  9\n",
258 |       "Epoch 1/5\n",
259 |       "13802/13802 [==============================] - 3s 199us/step - loss: 0.0260\n",
260 |       "Epoch 2/5\n",
261 |       "13802/13802 [==============================] - 2s 156us/step - loss: 0.0112\n",
262 |       "Epoch 3/5\n",
263 |       "13802/13802 [==============================] - 2s 151us/step - loss: 0.0092\n",
264 |       "Epoch 4/5\n",
265 |       "13802/13802 [==============================] - 2s 155us/step - loss: 0.0071\n",
266 |       "Epoch 5/5\n",
267 |       "13802/13802 [==============================] - 2s 166us/step - loss: 0.0062\n",
268 |       "Validation RMSE:  0.07481095347319817 \n",
269 |       "\n",
270 |       "\n",
271 |       "Window Size:  51 , Num of Units:  11\n",
272 |       "Epoch 1/5\n",
273 |       "13764/13764 [==============================] - 16s 1ms/step - loss: 0.0222\n",
274 |       "Epoch 2/5\n",
275 |       "13764/13764 [==============================] - 17s 1ms/step - loss: 0.0081\n",
276 |       "Epoch 3/5\n",
277 |       "13764/13764 [==============================] - 16s 1ms/step - loss: 0.0063\n",
278 |       "Epoch 4/5\n",
279 |       "13764/13764 [==============================] - 15s 1ms/step - loss: 0.0059\n",
280 |       "Epoch 5/5\n",
281 |       "13764/13764 [==============================] - 15s 1ms/step - loss: 0.0058\n",
282 |       "Validation RMSE:  0.08021812863991828 \n",
283 |       "\n",
284 |       "\n",
285 |       "Window Size:  44 , Num of Units:  3\n",
286 |       "Epoch 1/5\n",
287 |       "13769/13769 [==============================] - 15s 1ms/step - loss: 0.0678\n",
288 |       "Epoch 2/5\n",
289 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0171\n",
290 |       "Epoch 3/5\n",
291 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0116\n",
292 |       "Epoch 4/5\n",
293 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0093\n",
294 |       "Epoch 5/5\n",
295 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0081\n",
296 |       "Validation RMSE:  0.08573251545653714 \n",
297 |       "\n",
298 |       "\n",
299 |       "Window Size:  44 , Num of Units:  3\n",
300 |       "Epoch 1/5\n",
301 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0522\n",
302 |       "Epoch 2/5\n",
303 |       "13769/13769 [==============================] - 14s 984us/step - loss: 0.0140\n",
304 |       "Epoch 3/5\n",
305 |       "13769/13769 [==============================] - 14s 997us/step - loss: 0.0088\n",
306 |       "Epoch 4/5\n",
307 |       "13769/13769 [==============================] - 14s 991us/step - loss: 0.0070\n",
308 |       "Epoch 5/5\n",
309 |       "13769/13769 [==============================] - 14s 1ms/step - loss: 0.0063\n",
310 |       "Validation RMSE:  0.07729665050321902 \n",
311 |       "\n",
312 |       "\n",
313 |       "Window Size:  44 , Num of Units:  3\n",
314 |       "Epoch 1/5\n",
315 |       "13769/13769 [==============================] - 15s 1ms/step - loss: 0.0616\n",
316 |       "Epoch 2/5\n",
317 |       "13769/13769 [==============================] - 14s 997us/step - loss: 0.0149\n",
318 |       "Epoch 3/5\n",
319 |       "13769/13769 [==============================] - 14s 989us/step - loss: 0.0095\n",
320 |       "Epoch 4/5\n",
321 |       "13769/13769 [==============================] - 14s 997us/step - loss: 0.0077\n",
322 |       "Epoch 5/5\n",
323 |       "13769/13769 [==============================] - 14s 996us/step - loss: 0.0068\n",
324 |       "Validation RMSE:  0.07885058974021525 \n",
325 |       "\n",
326 |       "\n",
327 |       "Window Size:  51 , Num of Units:  11\n",
328 |       "Epoch 1/5\n",
329 |       "13764/13764 [==============================] - 19s 1ms/step - loss: 0.0201\n",
330 |       "Epoch 2/5\n",
331 |       "13764/13764 [==============================] - 16s 1ms/step - loss: 0.0076\n",
332 |       "Epoch 3/5\n",
333 |       "13764/13764 [==============================] - 15s 1ms/step - loss: 0.0061\n",
334 |       "Epoch 4/5\n",
335 |       "13764/13764 [==============================] - 15s 1ms/step - loss: 0.0059\n",
336 |       "Epoch 5/5\n",
337 |       "13764/13764 [==============================] - 16s 1ms/step - loss: 0.0058\n",
338 |       "Validation RMSE:  0.08014650106123256 \n",
339 |       "\n",
340 |       "\n",
341 |       "Window Size:  47 , Num of Units:  9\n",
342 |       "Epoch 1/5\n",
343 |       "13767/13767 [==============================] - 15s 1ms/step - loss: 0.0236\n",
344 |       "Epoch 2/5\n",
345 |       "13767/13767 [==============================] - 14s 1ms/step - loss: 0.0081\n",
346 |       "Epoch 3/5\n",
347 |       "13767/13767 [==============================] - 14s 1ms/step - loss: 0.0064\n",
348 |       "Epoch 4/5\n",
349 |       "13767/13767 [==============================] - 14s 1ms/step - loss: 0.0061\n",
350 |       "Epoch 5/5\n",
351 |       "13767/13767 [==============================] - 14s 1ms/step - loss: 0.0059\n",
352 |       "Validation RMSE:  0.07693006095112899 \n",
353 |       "\n"
354 |      ]
355 |     }
356 |    ],
357 |    "source": [
358 |     "population_size = 4\n",
359 |     "num_generations = 4\n",
360 |     "gene_length = 10\n",
361 |     "\n",
362 |     "#Implementation of Genetic Algorithm using DEAP python library.\n",
363 |     "\n",
364 |     "#Since we try to minimise the loss values, we use the negation of the root mean squared loss as fitness function.\n",
365 |     "creator.create('FitnessMax', base.Fitness, weights = (-1.0,))\n",
366 |     "creator.create('Individual', list , fitness = creator.FitnessMax)\n",
367 |     "\n",
368 |     "#initialize the variables as bernoilli random variables\n",
369 |     "toolbox = base.Toolbox()\n",
370 |     "toolbox.register('binary', bernoulli.rvs, 0.5)\n",
371 |     "toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)\n",
372 |     "toolbox.register('population', tools.initRepeat, list , toolbox.individual)\n",
373 |     "\n",
374 |     "#Ordered cross-over used for mating\n",
375 |     "toolbox.register('mate', tools.cxOrdered)\n",
376 |     "#Shuffle mutation to reorder the chromosomes\n",
377 |     "toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)\n",
378 |     "#use roulette wheel selection algorithm\n",
379 |     "toolbox.register('select', tools.selRoulette)\n",
380 |     "#training function used for evaluating fitness of individual solution.\n",
381 |     "toolbox.register('evaluate', train_evaluate)\n",
382 |     "\n",
383 |     "population = toolbox.population(n = population_size)\n",
384 |     "r = algorithms.eaSimple(population, toolbox, cxpb = 0.4, mutpb = 0.1, ngen = num_generations, verbose = False)"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 11,
390 |    "metadata": {},
391 |    "outputs": [
392 |     {
393 |      "name": "stdout",
394 |      "output_type": "stream",
395 |      "text": [
396 |       "\n",
397 |       " Best Window Size:  47 , Best Num of Units:  9\n"
398 |      ]
399 |     }
400 |    ],
401 |    "source": [
402 |     "optimal_individuals_data = tools.selBest(population,k = 1) #select top 1 solution\n",
403 |     "optimal_window_size = None\n",
404 |     "optimal_num_units = None\n",
405 |     "\n",
406 |     "for bi in optimal_individuals_data:\n",
407 |     "    window_size_bits = BitArray(bi[0:6])\n",
408 |     "    num_units_bits = BitArray(bi[6:]) \n",
409 |     "    optimal_window_size = window_size_bits.uint\n",
410 |     "    optimal_num_units = num_units_bits.uint\n",
411 |     "    print('\\n Best Window Size: ', optimal_window_size, ', Best Num of Units: ', optimal_num_units)\n",
412 |     "\n",
413 |     "#print(optimal_window_size, optimal_num_units)"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 12,
419 |    "metadata": {},
420 |    "outputs": [
421 |     {
422 |      "name": "stdout",
423 |      "output_type": "stream",
424 |      "text": [
425 |       "Epoch 1/5\n",
426 |       "17209/17209 [==============================] - 21s 1ms/step - loss: 0.0190\n",
427 |       "Epoch 2/5\n",
428 |       "17209/17209 [==============================] - 18s 1ms/step - loss: 0.0078\n",
429 |       "Epoch 3/5\n",
430 |       "17209/17209 [==============================] - 18s 1ms/step - loss: 0.0062\n",
431 |       "Epoch 4/5\n",
432 |       "17209/17209 [==============================] - 19s 1ms/step - loss: 0.0060\n",
433 |       "Epoch 5/5\n",
434 |       "17209/17209 [==============================] - 19s 1ms/step - loss: 0.0060\n",
435 |       "Test RMSE:  0.09710418381525192\n"
436 |      ]
437 |     }
438 |    ],
439 |    "source": [
440 |     "#hence train the model with the optimal number of lstm units and optimal window size for prediction\n",
441 |     "X_train,y_train = prepare_dataset(train_data,optimal_window_size)\n",
442 |     "X_test, y_test = prepare_dataset(test_data,optimal_window_size)\n",
443 |     "\n",
444 |     "inputs = Input(shape=(optimal_window_size,1))\n",
445 |     "x = LSTM(optimal_num_units, input_shape=(optimal_window_size,1))(inputs)\n",
446 |     "predictions = Dense(1, activation='tanh')(x)\n",
447 |     "model = Model(inputs = inputs, outputs = predictions)\n",
448 |     "model.compile(optimizer='adam',loss='mean_squared_error')\n",
449 |     "model.fit(X_train, y_train, epochs=5, batch_size=20,shuffle=True)\n",
450 |     "y_pred = model.predict(X_test)\n",
451 |     "\n",
452 |     "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
453 |     "print('Test RMSE: ', rmse)"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {},
460 |    "outputs": [],
461 |    "source": []
462 |   }
463 |  ],
464 |  "metadata": {
465 |   "kernelspec": {
466 |    "display_name": "Python 3",
467 |    "language": "python",
468 |    "name": "python3"
469 |   },
470 |   "language_info": {
471 |    "codemirror_mode": {
472 |     "name": "ipython",
473 |     "version": 3
474 |    },
475 |    "file_extension": ".py",
476 |    "mimetype": "text/x-python",
477 |    "name": "python",
478 |    "nbconvert_exporter": "python",
479 |    "pygments_lexer": "ipython3",
480 |    "version": "3.5.2"
481 |   }
482 |  },
483 |  "nbformat": 4,
484 |  "nbformat_minor": 2
485 | }
486 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Optimizing LSTM parameters using Genetic Algorithms
 2 | 
 3 | ### Introduction:-
 4 | The aim of the project is to optimize the parameters of a recurrent neural network to obtain the best configuration of parameters. Genetic algorithm has been used to fine tune parameters used to train an RNN for wind power forecasting. The best number of LSTM units and ther optimal window size to be used for prediction have been found. For deep learning, Keras library is used and for Genetic Algorithms, DEAP library has been used. 
 5 | 
 6 | The following paragraphs explain the application of genetic algorithms to this project.
 7 | 
 8 | ### Theory behind Genetic Algoithms:-
 9 | Genetic algorithm is an adaptive heuristic search algorithm based on the ideas of evolution. It leads to an exploration as well as exploitation of the search space. It exploits the past information to get better and better subsequently. The techniques are inspired from the principles of Charles Darwin of "survival of the fittest". In nature also, we observe that the weaker individuals get dominated by the stronger ones. 
10 | 
11 | The solution representation for the parameters used in this project are as follows:-
12 | A solution to our problem is a 10 bit integer where the first six bits represent the window size and the next four bits represent the number of units of LSTM.
13 | 
14 | The basic operators that consttitue a Genetic Algorithm are as follows:-
15 | 
16 | #### 1. Selection Operator:-
17 | The process of selection gives more preference to better individuals. The fitness of each individual solution is calculated using a fitness function. In this implementation, roulette wheel selection has been used where a wheel is divided into n pies for n individuals and each individual gets a portion of the circle which is proportional to its fitness value.
18 | 
19 | #### 2. Crossover Operator:-
20 | Two individuals are chosen from a the population and a crossover site is chosen. Then these two values are exhanged accross the crossover site to get new solutions. The two new solutions created are passed onto the next generation. In this implementation, ordered cross-over has been used. Here, we select two random crossover points and copy the contents of first parent into the offspring. Then, starting from the second crossover point, copy the unused numbers of the second parent into the offsrping.
21 | 
22 | #### 3. Mutation Operator:-
23 | The purpose of mutation is to maintain diversity in the population and also inhibit premature convergence. It uses the idea of random walk in the search space. In this implementation, shuffle mutation has been used where the attributes of the solution are shuffeled randomly to get a new solution.
24 | 
25 | ### RNN Implementation:-
26 | The wind power forecasting data contains the wind power measurements of seven wind farms. But only, column 'wp1' has been used for experimentation.
27 | A basic LSTM cell in keras is used to create a chain of LSTM cells. The root mean square error on the validation set has been calculated and returned as a fitness score to the genetic algorithm solution. 
28 | 
29 | ### Results:-
30 | The optimal window size has been found to be 47 and the optimal number of LSTM units is 9.
31 | 
32 | The outputs of 5 epochs for training are shown below:-
33 | 
34 | Epoch 1/5
35 | 
36 | 17209/17209 [==============================] - 21s 1ms/step - loss: 0.0190
37 | 
38 | Epoch 2/5
39 | 
40 | 17209/17209 [==============================] - 18s 1ms/step - loss: 0.0078
41 | 
42 | Epoch 3/5
43 | 
44 | 17209/17209 [==============================] - 18s 1ms/step - loss: 0.0062
45 | 
46 | Epoch 4/5
47 | 
48 | 17209/17209 [==============================] - 19s 1ms/step - loss: 0.0060
49 | 
50 | Epoch 5/5
51 | 
52 | 17209/17209 [==============================] - 19s 1ms/step - loss: 0.0060
53 | 
54 | Test RMSE:  0.09710418381525192
55 | 
56 | ### References:-
57 | 1. This project is an implementation of this [blog](http://aqibsaeed.github.io/2017-08-11-genetic-algorithm-for-optimizing-rnn/). The code has been referred from here and the dataset can be downloaded from [here](https://www.kaggle.com/c/GEF2012-wind-forecasting/data).
58 | 2. DEAP Package [deap](http://deap.readthedocs.io/en/master/api/tools.html#deap.tools.cxOrdered)
59 | 3. Tutorial on [Genetic Algorithms](https://www.tutorialspoint.com/genetic_algorithms/genetic_algorithms_mutation.htm)
60 | 4. Blog on [GA](https://www.doc.ic.ac.uk/~nd/surprise_96/journal/vol1/hmw/article1.html)
61 | 


--------------------------------------------------------------------------------