├── README.md
├── Numpy2.ipynb
├── Pandas_part_3.ipynb
├── Pandas_Part_2.ipynb
└── Day_3_Numpy_1.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # PYTHON-AND-DATA-ANALYTICS-7-DAYS


--------------------------------------------------------------------------------
/Numpy2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Day_7_Numpy_Part_3.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "markdown",
 18 |       "metadata": {
 19 |         "id": "Tjukq5BlGUNj",
 20 |         "colab_type": "text"
 21 |       },
 22 |       "source": [
 23 |         "## **Boolean Indexing:**\n",
 24 |         "There are many situations in which we don't know the indices of the elements we want to select. For example, suppose we have a 10,000 x 10,000 ndarray of random integers ranging from 1 to 15,000 and we only want to select those integers that are less than 20. Boolean indexing can help us in these cases, by allowing us select elements using logical arguments instead of explicit indices. Let's see some examples:"
 25 |       ]
 26 |     },
 27 |     {
 28 |       "cell_type": "code",
 29 |       "metadata": {
 30 |         "id": "YzTXz6bzF06E",
 31 |         "colab_type": "code",
 32 |         "colab": {
 33 |           "base_uri": "https://localhost:8080/",
 34 |           "height": 352
 35 |         },
 36 |         "outputId": "eaa87520-4f12-467a-f56d-cee00dd5000f"
 37 |       },
 38 |       "source": [
 39 |         "import numpy as np\n",
 40 |         "\n",
 41 |         "# We create a 5 x 5 ndarray that contains integers from 0 to 24\n",
 42 |         "X = np.arange(25).reshape(5, 5)\n",
 43 |         "\n",
 44 |         "# We print X\n",
 45 |         "print()\n",
 46 |         "print('Original X = \\n', X)\n",
 47 |         "print()\n",
 48 |         "\n",
 49 |         "# We use Boolean indexing to select elements in X:\n",
 50 |         "print('The elements in X that are greater than 10:', X[X > 10])\n",
 51 |         "print('The elements in X that less than or equal to 7:', X[X <= 7])\n",
 52 |         "print('The elements in X that are between 10 and 17:', X[(X > 10) & (X < 17)])\n",
 53 |         "\n",
 54 |         "# We use Boolean indexing to assign the elements that are between 10 and 17 the value of -1\n",
 55 |         "X[(X > 10) & (X < 17)] = -1\n",
 56 |         "\n",
 57 |         "# We print X\n",
 58 |         "print()\n",
 59 |         "print('X = \\n', X)\n",
 60 |         "print()"
 61 |       ],
 62 |       "execution_count": 2,
 63 |       "outputs": [
 64 |         {
 65 |           "output_type": "stream",
 66 |           "text": [
 67 |             "\n",
 68 |             "Original X = \n",
 69 |             " [[ 0  1  2  3  4]\n",
 70 |             " [ 5  6  7  8  9]\n",
 71 |             " [10 11 12 13 14]\n",
 72 |             " [15 16 17 18 19]\n",
 73 |             " [20 21 22 23 24]]\n",
 74 |             "\n",
 75 |             "The elements in X that are greater than 10: [11 12 13 14 15 16 17 18 19 20 21 22 23 24]\n",
 76 |             "The elements in X that less than or equal to 7: [0 1 2 3 4 5 6 7]\n",
 77 |             "The elements in X that are between 10 and 17: [11 12 13 14 15 16]\n",
 78 |             "\n",
 79 |             "X = \n",
 80 |             " [[ 0  1  2  3  4]\n",
 81 |             " [ 5  6  7  8  9]\n",
 82 |             " [10 -1 -1 -1 -1]\n",
 83 |             " [-1 -1 17 18 19]\n",
 84 |             " [20 21 22 23 24]]\n",
 85 |             "\n"
 86 |           ],
 87 |           "name": "stdout"
 88 |         }
 89 |       ]
 90 |     },
 91 |     {
 92 |       "cell_type": "markdown",
 93 |       "metadata": {
 94 |         "id": "m8AEouL3G-s5",
 95 |         "colab_type": "text"
 96 |       },
 97 |       "source": [
 98 |         "## **Set Operations:**\n",
 99 |         "\n",
100 |         "In addition to Boolean Indexing NumPy also allows for set operations. This useful when comparing ndarrays, for example, to find common elements between two ndarrays. Let's see some examples:"
101 |       ]
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "metadata": {
106 |         "id": "KZBTpYh9GjU0",
107 |         "colab_type": "code",
108 |         "colab": {
109 |           "base_uri": "https://localhost:8080/",
110 |           "height": 170
111 |         },
112 |         "outputId": "f053f3cc-e5fe-4090-ba2e-2ccb3a02c91f"
113 |       },
114 |       "source": [
115 |         "# We create a rank 1 ndarray\n",
116 |         "x = np.array([1,2,3,4,5])\n",
117 |         "\n",
118 |         "# We create a rank 1 ndarray\n",
119 |         "y = np.array([6,7,2,8,4])\n",
120 |         "\n",
121 |         "# We print x\n",
122 |         "print()\n",
123 |         "print('x = ', x)\n",
124 |         "\n",
125 |         "# We print y\n",
126 |         "print()\n",
127 |         "print('y = ', y)\n",
128 |         "\n",
129 |         "# We use set operations to compare x and y:\n",
130 |         "print()\n",
131 |         "print('The elements that are both in x and y:', np.intersect1d(x,y))\n",
132 |         "print('The elements that are in x that are not in y:', np.setdiff1d(x,y))\n",
133 |         "print('All the elements of x and y:',np.union1d(x,y))"
134 |       ],
135 |       "execution_count": 3,
136 |       "outputs": [
137 |         {
138 |           "output_type": "stream",
139 |           "text": [
140 |             "\n",
141 |             "x =  [1 2 3 4 5]\n",
142 |             "\n",
143 |             "y =  [6 7 2 8 4]\n",
144 |             "\n",
145 |             "The elements that are both in x and y: [2 4]\n",
146 |             "The elements that are in x that are not in y: [1 3 5]\n",
147 |             "All the elements of x and y: [1 2 3 4 5 6 7 8]\n"
148 |           ],
149 |           "name": "stdout"
150 |         }
151 |       ]
152 |     },
153 |     {
154 |       "cell_type": "markdown",
155 |       "metadata": {
156 |         "id": "r39N_VAmHYnN",
157 |         "colab_type": "text"
158 |       },
159 |       "source": [
160 |         "## **Sort():**\n",
161 |         "\n",
162 |         "We can also sort ndarrays in NumPy. We will learn how to use the np.sort() function to sort rank 1 and rank 2 ndarrays in different ways. Like with other functions we saw before, the sort function can also be used as a method. However, there is a big difference on how the data is stored in memory in this case. \n",
163 |         "- When np.sort() is used as a function, it sorts the ndrrays out of place, meaning, that it doesn't change the original ndarray being sorted. \n",
164 |         "- However, when you use sort as a method, ndarray.sort() sorts the ndarray in place, meaning, that the original array will be changed to the sorted one. \n",
165 |         "\n",
166 |         "Let's see some examples:"
167 |       ]
168 |     },
169 |     {
170 |       "cell_type": "code",
171 |       "metadata": {
172 |         "id": "XOSbzTsSHK7U",
173 |         "colab_type": "code",
174 |         "colab": {
175 |           "base_uri": "https://localhost:8080/",
176 |           "height": 137
177 |         },
178 |         "outputId": "40e08849-c383-4111-d028-9df536f3f2a1"
179 |       },
180 |       "source": [
181 |         "# We create an unsorted rank 1 ndarray\n",
182 |         "x = np.random.randint(1,11,size=(10,))\n",
183 |         "\n",
184 |         "# We print x\n",
185 |         "print()\n",
186 |         "print('Original x = ', x)\n",
187 |         "\n",
188 |         "# We sort x and print the sorted array using sort as a function.\n",
189 |         "print()\n",
190 |         "print('Sorted x (out of place):', np.sort(x))\n",
191 |         "\n",
192 |         "# When we sort out of place the original array remains intact. To see this we print x again\n",
193 |         "print()\n",
194 |         "print('x after sorting:', x)"
195 |       ],
196 |       "execution_count": 4,
197 |       "outputs": [
198 |         {
199 |           "output_type": "stream",
200 |           "text": [
201 |             "\n",
202 |             "Original x =  [ 2  2  7  9  8  4  2 10  5  7]\n",
203 |             "\n",
204 |             "Sorted x (out of place): [ 2  2  2  4  5  7  7  8  9 10]\n",
205 |             "\n",
206 |             "x after sorting: [ 2  2  7  9  8  4  2 10  5  7]\n"
207 |           ],
208 |           "name": "stdout"
209 |         }
210 |       ]
211 |     },
212 |     {
213 |       "cell_type": "markdown",
214 |       "metadata": {
215 |         "id": "QbBU9jOtH7pq",
216 |         "colab_type": "text"
217 |       },
218 |       "source": [
219 |         "let's see how we can sort ndarrays in place, by using sort as a method:"
220 |       ]
221 |     },
222 |     {
223 |       "cell_type": "code",
224 |       "metadata": {
225 |         "id": "1Mbvj8oWHwEt",
226 |         "colab_type": "code",
227 |         "colab": {
228 |           "base_uri": "https://localhost:8080/",
229 |           "height": 104
230 |         },
231 |         "outputId": "8fa1372b-1832-4f78-b02f-221aa47ab809"
232 |       },
233 |       "source": [
234 |         "# We create an unsorted rank 1 ndarray\n",
235 |         "x = np.random.randint(1,11,size=(10,))\n",
236 |         "\n",
237 |         "# We print x\n",
238 |         "print()\n",
239 |         "print('Original x = ', x)\n",
240 |         "\n",
241 |         "# We sort x and print the sorted array using sort as a method.\n",
242 |         "x.sort()\n",
243 |         "\n",
244 |         "# When we sort in place the original array is changed to the sorted array. To see this we print x again\n",
245 |         "print()\n",
246 |         "print('x after sorting:', x)"
247 |       ],
248 |       "execution_count": 5,
249 |       "outputs": [
250 |         {
251 |           "output_type": "stream",
252 |           "text": [
253 |             "\n",
254 |             "Original x =  [ 6 10  8 10  2  9  1  6  6  2]\n",
255 |             "\n",
256 |             "x after sorting: [ 1  2  2  6  6  6  8  9 10 10]\n"
257 |           ],
258 |           "name": "stdout"
259 |         }
260 |       ]
261 |     },
262 |     {
263 |       "cell_type": "markdown",
264 |       "metadata": {
265 |         "id": "dELVwDysIAhs",
266 |         "colab_type": "text"
267 |       },
268 |       "source": [
269 |         "Notice that np.sort() sorts the array but, if the ndarray being sorted has repeated values, np.sort() leaves those values in the sorted array. However, if desired, we can sort only the unique elements in x by combining the sort function with the unique function. Let's see how we can sort the unique elements of x above:"
270 |       ]
271 |     },
272 |     {
273 |       "cell_type": "code",
274 |       "metadata": {
275 |         "id": "Wny5spAFH-lL",
276 |         "colab_type": "code",
277 |         "colab": {
278 |           "base_uri": "https://localhost:8080/",
279 |           "height": 87
280 |         },
281 |         "outputId": "2147cde2-1890-4cd4-deb2-d791996e1cf0"
282 |       },
283 |       "source": [
284 |         "# We create an unsorted rank 1 ndarray\n",
285 |         "x = np.random.randint(1,11,size=(10,))\n",
286 |         "\n",
287 |         "# We print x\n",
288 |         "print()\n",
289 |         "print('Original x = ', x)\n",
290 |         "\n",
291 |         "# We sort x but only keep the unique elements in x\n",
292 |         "print(np.sort(np.unique(x)))"
293 |       ],
294 |       "execution_count": 8,
295 |       "outputs": [
296 |         {
297 |           "output_type": "stream",
298 |           "text": [
299 |             "\n",
300 |             "Original x =  [8 6 2 8 1 6 2 1 8 6]\n",
301 |             "[1 2 6 8]\n"
302 |           ],
303 |           "name": "stdout"
304 |         }
305 |       ]
306 |     },
307 |     {
308 |       "cell_type": "markdown",
309 |       "metadata": {
310 |         "id": "3SSiV4EoKRWZ",
311 |         "colab_type": "text"
312 |       },
313 |       "source": [
314 |         "When sorting rank 2 ndarrays, we need to specify to the np.sort() function whether we are sorting by rows or columns. This is done by using the axis keyword. Let's see some examples:\n"
315 |       ]
316 |     },
317 |     {
318 |       "cell_type": "code",
319 |       "metadata": {
320 |         "id": "XdZi81Q4IFv7",
321 |         "colab_type": "code",
322 |         "colab": {
323 |           "base_uri": "https://localhost:8080/"
324 |         },
325 |         "outputId": "a4201ae6-6e75-4632-9e4c-86476dcc73f9"
326 |       },
327 |       "source": [
328 |         "# We create an unsorted rank 2 ndarray\n",
329 |         "X = np.random.randint(1,11,size=(5,5))\n",
330 |         "\n",
331 |         "# We print X\n",
332 |         "print()\n",
333 |         "print('Original X = \\n', X)\n",
334 |         "print()\n",
335 |         "\n",
336 |         "# We sort the columns of X and print the sorted array\n",
337 |         "print()\n",
338 |         "print('X with sorted columns :\\n', np.sort(X, axis = 0))\n",
339 |         "\n",
340 |         "# We sort the rows of X and print the sorted array\n",
341 |         "print()\n",
342 |         "print('X with sorted rows :\\n', np.sort(X, axis = 1))"
343 |       ],
344 |       "execution_count": 9,
345 |       "outputs": [
346 |         {
347 |           "output_type": "stream",
348 |           "text": [
349 |             "\n",
350 |             "Original X = \n",
351 |             " [[ 2  5  9  1  7]\n",
352 |             " [10  7  6  9  6]\n",
353 |             " [ 4  8  3  8  2]\n",
354 |             " [ 9  1  5  6 10]\n",
355 |             " [ 5  5 10  6  9]]\n",
356 |             "\n",
357 |             "\n",
358 |             "X with sorted columns :\n",
359 |             " [[ 2  1  3  1  2]\n",
360 |             " [ 4  5  5  6  6]\n",
361 |             " [ 5  5  6  6  7]\n",
362 |             " [ 9  7  9  8  9]\n",
363 |             " [10  8 10  9 10]]\n",
364 |             "\n",
365 |             "X with sorted rows :\n",
366 |             " [[ 1  2  5  7  9]\n",
367 |             " [ 6  6  7  9 10]\n",
368 |             " [ 2  3  4  8  8]\n",
369 |             " [ 1  5  6  9 10]\n",
370 |             " [ 5  5  6  9 10]]\n"
371 |           ],
372 |           "name": "stdout"
373 |         }
374 |       ]
375 |     },
376 |     {
377 |       "cell_type": "markdown",
378 |       "metadata": {
379 |         "id": "8B06V3sgKeSG",
380 |         "colab_type": "text"
381 |       },
382 |       "source": [
383 |         "## **Question:**\n",
384 |         "Create a 5 x 5 ndarray with consecutive integers from 1 to 25 (inclusive). Afterwards use Boolean indexing to pick out only the odd numbers in the array"
385 |       ]
386 |     },
387 |     {
388 |       "cell_type": "code",
389 |       "metadata": {
390 |         "id": "9KW7DdSEKUqW",
391 |         "colab_type": "code",
392 |         "colab": {}
393 |       },
394 |       "source": [
395 |         "import numpy as np\n",
396 |         "\n",
397 |         "# Create a 5 x 5 ndarray with consecutive integers from 1 to 25 (inclusive).\n",
398 |         "X = \n",
399 |         "\n",
400 |         "# Use Boolean indexing to pick out only the odd numbers in the array\n",
401 |         "Y = "
402 |       ],
403 |       "execution_count": null,
404 |       "outputs": []
405 |     },
406 |     {
407 |       "cell_type": "markdown",
408 |       "metadata": {
409 |         "id": "l2f8f9ZUKwKH",
410 |         "colab_type": "text"
411 |       },
412 |       "source": [
413 |         "## **Arithmtic Operations:**\n",
414 |         "Let's start by doing element-wise addition, subtraction, multiplication, and division, between ndarrays. To do this, NumPy provides a functional approach, where we use functions such as np.add(), or by using arithmetic symbols, such as +, that resembles more how we write mathematical equations. Both forms will do the same operation, the only difference is that if you use the function approach, the functions usually have options that you can tweak using keywords. It is important to note that when performing element-wise operations, the shapes of the ndarrays being operated on, must have the same shape or be broadcastable. We'll explain more about this later in this lesson. Let's start by performing element-wise arithmetic operations on rank 1 ndarrays:"
415 |       ]
416 |     },
417 |     {
418 |       "cell_type": "code",
419 |       "metadata": {
420 |         "id": "0Uk3Dg4qNcYK",
421 |         "colab_type": "code",
422 |         "colab": {
423 |           "base_uri": "https://localhost:8080/",
424 |           "height": 302
425 |         },
426 |         "outputId": "53e7a86b-6ef4-41c7-929e-cab4602253d4"
427 |       },
428 |       "source": [
429 |         "# We create two rank 1 ndarrays\n",
430 |         "x = np.array([1,2,3,4])\n",
431 |         "y = np.array([5.5,6.5,7.5,8.5])\n",
432 |         "\n",
433 |         "# We print x\n",
434 |         "print()\n",
435 |         "print('x = ', x)\n",
436 |         "\n",
437 |         "# We print y\n",
438 |         "print()\n",
439 |         "print('y = ', y)\n",
440 |         "print()\n",
441 |         "\n",
442 |         "# We perfrom basic element-wise operations using arithmetic symbols and functions\n",
443 |         "print('x + y = ', x + y)\n",
444 |         "print('add(x,y) = ', np.add(x,y))\n",
445 |         "print()\n",
446 |         "print('x - y = ', x - y)\n",
447 |         "print('subtract(x,y) = ', np.subtract(x,y))\n",
448 |         "print()\n",
449 |         "print('x * y = ', x * y)\n",
450 |         "print('multiply(x,y) = ', np.multiply(x,y))\n",
451 |         "print()\n",
452 |         "print('x / y = ', x / y)\n",
453 |         "print('divide(x,y) = ', np.divide(x,y))"
454 |       ],
455 |       "execution_count": 10,
456 |       "outputs": [
457 |         {
458 |           "output_type": "stream",
459 |           "text": [
460 |             "\n",
461 |             "x =  [1 2 3 4]\n",
462 |             "\n",
463 |             "y =  [5.5 6.5 7.5 8.5]\n",
464 |             "\n",
465 |             "x + y =  [ 6.5  8.5 10.5 12.5]\n",
466 |             "add(x,y) =  [ 6.5  8.5 10.5 12.5]\n",
467 |             "\n",
468 |             "x - y =  [-4.5 -4.5 -4.5 -4.5]\n",
469 |             "subtract(x,y) =  [-4.5 -4.5 -4.5 -4.5]\n",
470 |             "\n",
471 |             "x * y =  [ 5.5 13.  22.5 34. ]\n",
472 |             "multiply(x,y) =  [ 5.5 13.  22.5 34. ]\n",
473 |             "\n",
474 |             "x / y =  [0.18181818 0.30769231 0.4        0.47058824]\n",
475 |             "divide(x,y) =  [0.18181818 0.30769231 0.4        0.47058824]\n"
476 |           ],
477 |           "name": "stdout"
478 |         }
479 |       ]
480 |     },
481 |     {
482 |       "cell_type": "markdown",
483 |       "metadata": {
484 |         "id": "Cv-lLIRANh6q",
485 |         "colab_type": "text"
486 |       },
487 |       "source": [
488 |         "We can also perform the same element-wise arithmetic operations on rank 2 ndarrays. Again, remember that in order to do these operations the shapes of the ndarrays being operated on, must have the same shape or be broadcastable."
489 |       ]
490 |     },
491 |     {
492 |       "cell_type": "code",
493 |       "metadata": {
494 |         "id": "aM5ZOfYBNfRr",
495 |         "colab_type": "code",
496 |         "colab": {
497 |           "base_uri": "https://localhost:8080/",
498 |           "height": 699
499 |         },
500 |         "outputId": "fceff7d7-b52b-433c-aeaa-801c7e61a834"
501 |       },
502 |       "source": [
503 |         "# We create two rank 2 ndarrays\n",
504 |         "X = np.array([1,2,3,4]).reshape(2,2)\n",
505 |         "Y = np.array([5.5,6.5,7.5,8.5]).reshape(2,2)\n",
506 |         "\n",
507 |         "# We print X\n",
508 |         "print()\n",
509 |         "print('X = \\n', X)\n",
510 |         "\n",
511 |         "# We print Y\n",
512 |         "print()\n",
513 |         "print('Y = \\n', Y)\n",
514 |         "print()\n",
515 |         "\n",
516 |         "# We perform basic element-wise operations using arithmetic symbols and functions\n",
517 |         "print('X + Y = \\n', X + Y)\n",
518 |         "print()\n",
519 |         "print('add(X,Y) = \\n', np.add(X,Y))\n",
520 |         "print()\n",
521 |         "print('X - Y = \\n', X - Y)\n",
522 |         "print()\n",
523 |         "print('subtract(X,Y) = \\n', np.subtract(X,Y))\n",
524 |         "print()\n",
525 |         "print('X * Y = \\n', X * Y)\n",
526 |         "print()\n",
527 |         "print('multiply(X,Y) = \\n', np.multiply(X,Y))\n",
528 |         "print()\n",
529 |         "print('X / Y = \\n', X / Y)\n",
530 |         "print()\n",
531 |         "print('divide(X,Y) = \\n', np.divide(X,Y))"
532 |       ],
533 |       "execution_count": 11,
534 |       "outputs": [
535 |         {
536 |           "output_type": "stream",
537 |           "text": [
538 |             "\n",
539 |             "X = \n",
540 |             " [[1 2]\n",
541 |             " [3 4]]\n",
542 |             "\n",
543 |             "Y = \n",
544 |             " [[5.5 6.5]\n",
545 |             " [7.5 8.5]]\n",
546 |             "\n",
547 |             "X + Y = \n",
548 |             " [[ 6.5  8.5]\n",
549 |             " [10.5 12.5]]\n",
550 |             "\n",
551 |             "add(X,Y) = \n",
552 |             " [[ 6.5  8.5]\n",
553 |             " [10.5 12.5]]\n",
554 |             "\n",
555 |             "X - Y = \n",
556 |             " [[-4.5 -4.5]\n",
557 |             " [-4.5 -4.5]]\n",
558 |             "\n",
559 |             "subtract(X,Y) = \n",
560 |             " [[-4.5 -4.5]\n",
561 |             " [-4.5 -4.5]]\n",
562 |             "\n",
563 |             "X * Y = \n",
564 |             " [[ 5.5 13. ]\n",
565 |             " [22.5 34. ]]\n",
566 |             "\n",
567 |             "multiply(X,Y) = \n",
568 |             " [[ 5.5 13. ]\n",
569 |             " [22.5 34. ]]\n",
570 |             "\n",
571 |             "X / Y = \n",
572 |             " [[0.18181818 0.30769231]\n",
573 |             " [0.4        0.47058824]]\n",
574 |             "\n",
575 |             "divide(X,Y) = \n",
576 |             " [[0.18181818 0.30769231]\n",
577 |             " [0.4        0.47058824]]\n"
578 |           ],
579 |           "name": "stdout"
580 |         }
581 |       ]
582 |     },
583 |     {
584 |       "cell_type": "markdown",
585 |       "metadata": {
586 |         "id": "sB3FyKXnNokr",
587 |         "colab_type": "text"
588 |       },
589 |       "source": [
590 |         "We can also apply mathematical functions, such as sqrt(x), to all elements of an ndarray at once."
591 |       ]
592 |     },
593 |     {
594 |       "cell_type": "code",
595 |       "metadata": {
596 |         "id": "gUpizA7QNmKU",
597 |         "colab_type": "code",
598 |         "colab": {
599 |           "base_uri": "https://localhost:8080/",
600 |           "height": 170
601 |         },
602 |         "outputId": "ac45f76b-69e7-4682-d310-e86fbc2f1c43"
603 |       },
604 |       "source": [
605 |         "# We create a rank 1 ndarray\n",
606 |         "x = np.array([1,2,3,4])\n",
607 |         "\n",
608 |         "# We print x\n",
609 |         "print()\n",
610 |         "print('x = ', x)\n",
611 |         "\n",
612 |         "# We apply different mathematical functions to all elements of x\n",
613 |         "print()\n",
614 |         "print('EXP(x) =', np.exp(x))\n",
615 |         "print()\n",
616 |         "print('SQRT(x) =',np.sqrt(x))\n",
617 |         "print()\n",
618 |         "print('POW(x,2) =',np.power(x,2)) # We raise all elements to the power of 2"
619 |       ],
620 |       "execution_count": 12,
621 |       "outputs": [
622 |         {
623 |           "output_type": "stream",
624 |           "text": [
625 |             "\n",
626 |             "x =  [1 2 3 4]\n",
627 |             "\n",
628 |             "EXP(x) = [ 2.71828183  7.3890561  20.08553692 54.59815003]\n",
629 |             "\n",
630 |             "SQRT(x) = [1.         1.41421356 1.73205081 2.        ]\n",
631 |             "\n",
632 |             "POW(x,2) = [ 1  4  9 16]\n"
633 |           ],
634 |           "name": "stdout"
635 |         }
636 |       ]
637 |     },
638 |     {
639 |       "cell_type": "markdown",
640 |       "metadata": {
641 |         "id": "HEHDlHpONvWn",
642 |         "colab_type": "text"
643 |       },
644 |       "source": [
645 |         "Another great feature of NumPy is that it has a wide variety of statistical functions. Statistical functions provide us with statistical information about the elements in an ndarray. Let's see some examples:"
646 |       ]
647 |     },
648 |     {
649 |       "cell_type": "code",
650 |       "metadata": {
651 |         "id": "iaPpIpT1Nrqu",
652 |         "colab_type": "code",
653 |         "colab": {
654 |           "base_uri": "https://localhost:8080/",
655 |           "height": 500
656 |         },
657 |         "outputId": "ed914324-a640-407e-eab1-927dc5a5e414"
658 |       },
659 |       "source": [
660 |         "# We create a 2 x 2 ndarray\n",
661 |         "X = np.array([[1,2], [3,4]])\n",
662 |         "\n",
663 |         "# We print x\n",
664 |         "print()\n",
665 |         "print('X = \\n', X)\n",
666 |         "print()\n",
667 |         "\n",
668 |         "print('Average of all elements in X:', X.mean())\n",
669 |         "print('Average of all elements in the columns of X:', X.mean(axis=0))\n",
670 |         "print('Average of all elements in the rows of X:', X.mean(axis=1))\n",
671 |         "print()\n",
672 |         "print('Sum of all elements in X:', X.sum())\n",
673 |         "print('Sum of all elements in the columns of X:', X.sum(axis=0))\n",
674 |         "print('Sum of all elements in the rows of X:', X.sum(axis=1))\n",
675 |         "print()\n",
676 |         "print('Standard Deviation of all elements in X:', X.std())\n",
677 |         "print('Standard Deviation of all elements in the columns of X:', X.std(axis=0))\n",
678 |         "print('Standard Deviation of all elements in the rows of X:', X.std(axis=1))\n",
679 |         "print()\n",
680 |         "print('Median of all elements in X:', np.median(X))\n",
681 |         "print('Median of all elements in the columns of X:', np.median(X,axis=0))\n",
682 |         "print('Median of all elements in the rows of X:', np.median(X,axis=1))\n",
683 |         "print()\n",
684 |         "print('Maximum value of all elements in X:', X.max())\n",
685 |         "print('Maximum value of all elements in the columns of X:', X.max(axis=0))\n",
686 |         "print('Maximum value of all elements in the rows of X:', X.max(axis=1))\n",
687 |         "print()\n",
688 |         "print('Minimum value of all elements in X:', X.min())\n",
689 |         "print('Minimum value of all elements in the columns of X:', X.min(axis=0))\n",
690 |         "print('Minimum value of all elements in the rows of X:', X.min(axis=1))"
691 |       ],
692 |       "execution_count": 13,
693 |       "outputs": [
694 |         {
695 |           "output_type": "stream",
696 |           "text": [
697 |             "\n",
698 |             "X = \n",
699 |             " [[1 2]\n",
700 |             " [3 4]]\n",
701 |             "\n",
702 |             "Average of all elements in X: 2.5\n",
703 |             "Average of all elements in the columns of X: [2. 3.]\n",
704 |             "Average of all elements in the rows of X: [1.5 3.5]\n",
705 |             "\n",
706 |             "Sum of all elements in X: 10\n",
707 |             "Sum of all elements in the columns of X: [4 6]\n",
708 |             "Sum of all elements in the rows of X: [3 7]\n",
709 |             "\n",
710 |             "Standard Deviation of all elements in X: 1.118033988749895\n",
711 |             "Standard Deviation of all elements in the columns of X: [1. 1.]\n",
712 |             "Standard Deviation of all elements in the rows of X: [0.5 0.5]\n",
713 |             "\n",
714 |             "Median of all elements in X: 2.5\n",
715 |             "Median of all elements in the columns of X: [2. 3.]\n",
716 |             "Median of all elements in the rows of X: [1.5 3.5]\n",
717 |             "\n",
718 |             "Maximum value of all elements in X: 4\n",
719 |             "Maximum value of all elements in the columns of X: [3 4]\n",
720 |             "Maximum value of all elements in the rows of X: [2 4]\n",
721 |             "\n",
722 |             "Minimum value of all elements in X: 1\n",
723 |             "Minimum value of all elements in the columns of X: [1 2]\n",
724 |             "Minimum value of all elements in the rows of X: [1 3]\n"
725 |           ],
726 |           "name": "stdout"
727 |         }
728 |       ]
729 |     },
730 |     {
731 |       "cell_type": "markdown",
732 |       "metadata": {
733 |         "id": "rQnXwgohN5VQ",
734 |         "colab_type": "text"
735 |       },
736 |       "source": [
737 |         "Finally, let's see how NumPy can perform arithmetics between a single numbers and all the elements of an ndarray without the use of complicated loops."
738 |       ]
739 |     },
740 |     {
741 |       "cell_type": "code",
742 |       "metadata": {
743 |         "id": "KPLD2A_UN0Fe",
744 |         "colab_type": "code",
745 |         "colab": {
746 |           "base_uri": "https://localhost:8080/",
747 |           "height": 368
748 |         },
749 |         "outputId": "67f75eba-a2ea-4f34-b65a-d298d39a74a1"
750 |       },
751 |       "source": [
752 |         "# We create a 2 x 2 ndarray\n",
753 |         "X = np.array([[1,2], [3,4]])\n",
754 |         "\n",
755 |         "# We print x\n",
756 |         "print()\n",
757 |         "print('X = \\n', X)\n",
758 |         "print()\n",
759 |         "\n",
760 |         "print('3 * X = \\n', 3 * X)\n",
761 |         "print()\n",
762 |         "print('3 + X = \\n', 3 + X)\n",
763 |         "print()\n",
764 |         "print('X - 3 = \\n', X - 3)\n",
765 |         "print()\n",
766 |         "print('X / 3 = \\n', X / 3)"
767 |       ],
768 |       "execution_count": 14,
769 |       "outputs": [
770 |         {
771 |           "output_type": "stream",
772 |           "text": [
773 |             "\n",
774 |             "X = \n",
775 |             " [[1 2]\n",
776 |             " [3 4]]\n",
777 |             "\n",
778 |             "3 * X = \n",
779 |             " [[ 3  6]\n",
780 |             " [ 9 12]]\n",
781 |             "\n",
782 |             "3 + X = \n",
783 |             " [[4 5]\n",
784 |             " [6 7]]\n",
785 |             "\n",
786 |             "X - 3 = \n",
787 |             " [[-2 -1]\n",
788 |             " [ 0  1]]\n",
789 |             "\n",
790 |             "X / 3 = \n",
791 |             " [[0.33333333 0.66666667]\n",
792 |             " [1.         1.33333333]]\n"
793 |           ],
794 |           "name": "stdout"
795 |         }
796 |       ]
797 |     },
798 |     {
799 |       "cell_type": "markdown",
800 |       "metadata": {
801 |         "id": "rK7le2G5OQUm",
802 |         "colab_type": "text"
803 |       },
804 |       "source": [
805 |         "## **Broadcasting:**\n",
806 |         "In order to do element-wise operations, NumPy sometimes uses something called Broadcasting. Broadcasting is the term used to describe how NumPy handles element-wise arithmetic operations with ndarrays of different shapes. For example, broadcasting is used implicitly when doing arithmetic operations between scalars and ndarrays.\n",
807 |         "\n",
808 |         "In the examples above, NumPy is working behind the scenes to broadcast 3 along the ndarray so that they have the same shape. This allows us to add 3 to each element of X with just one line of code.\n",
809 |         "\n",
810 |         "Subject to certain constraints, Numpy can do the same for two ndarrays of different shapes, as we can see below."
811 |       ]
812 |     },
813 |     {
814 |       "cell_type": "code",
815 |       "metadata": {
816 |         "id": "sZdEElKmOIJE",
817 |         "colab_type": "code",
818 |         "colab": {
819 |           "base_uri": "https://localhost:8080/",
820 |           "height": 434
821 |         },
822 |         "outputId": "0ea5fd84-3656-4e88-d073-71d226fb872f"
823 |       },
824 |       "source": [
825 |         "# We create a rank 1 ndarray\n",
826 |         "x = np.array([1,2,3])\n",
827 |         "\n",
828 |         "# We create a 3 x 3 ndarray\n",
829 |         "Y = np.array([[1,2,3],[4,5,6],[7,8,9]])\n",
830 |         "\n",
831 |         "# We create a 3 x 1 ndarray\n",
832 |         "Z = np.array([1,2,3]).reshape(3,1)\n",
833 |         "\n",
834 |         "# We print x\n",
835 |         "print()\n",
836 |         "print('x = ', x)\n",
837 |         "print()\n",
838 |         "\n",
839 |         "# We print Y\n",
840 |         "print()\n",
841 |         "print('Y = \\n', Y)\n",
842 |         "print()\n",
843 |         "\n",
844 |         "# We print Z\n",
845 |         "print()\n",
846 |         "print('Z = \\n', Z)\n",
847 |         "print()\n",
848 |         "\n",
849 |         "print('x + Y = \\n', x + Y)\n",
850 |         "print()\n",
851 |         "print('Z + Y = \\n',Z + Y)"
852 |       ],
853 |       "execution_count": 15,
854 |       "outputs": [
855 |         {
856 |           "output_type": "stream",
857 |           "text": [
858 |             "\n",
859 |             "x =  [1 2 3]\n",
860 |             "\n",
861 |             "\n",
862 |             "Y = \n",
863 |             " [[1 2 3]\n",
864 |             " [4 5 6]\n",
865 |             " [7 8 9]]\n",
866 |             "\n",
867 |             "\n",
868 |             "Z = \n",
869 |             " [[1]\n",
870 |             " [2]\n",
871 |             " [3]]\n",
872 |             "\n",
873 |             "x + Y = \n",
874 |             " [[ 2  4  6]\n",
875 |             " [ 5  7  9]\n",
876 |             " [ 8 10 12]]\n",
877 |             "\n",
878 |             "Z + Y = \n",
879 |             " [[ 2  3  4]\n",
880 |             " [ 6  7  8]\n",
881 |             " [10 11 12]]\n"
882 |           ],
883 |           "name": "stdout"
884 |         }
885 |       ]
886 |     },
887 |     {
888 |       "cell_type": "markdown",
889 |       "metadata": {
890 |         "id": "LeeZLIXoOvzb",
891 |         "colab_type": "text"
892 |       },
893 |       "source": [
894 |         "As before, NumPy is able to add 1 x 3 and 3 x 1 ndarrays to 3 x 3 ndarrays by broadcasting the smaller ndarrays along the big ndarray so that they have compatible shapes. In general, NumPy can do this provided that the smaller ndarray, such as the 1 x 3 ndarray in our example, can be expanded to the shape of the larger ndarray in such a way that the resulting broadcast is unambiguous."
895 |       ]
896 |     },
897 |     {
898 |       "cell_type": "markdown",
899 |       "metadata": {
900 |         "id": "9kNw90GNO6-i",
901 |         "colab_type": "text"
902 |       },
903 |       "source": [
904 |         "## **Assignment:**\n",
905 |         "Make sure you check out the NumPy Documentation for more information on Broadcasting and its rules: [Here](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.broadcasting.html)"
906 |       ]
907 |     },
908 |     {
909 |       "cell_type": "markdown",
910 |       "metadata": {
911 |         "id": "atR6p2-EPLjC",
912 |         "colab_type": "text"
913 |       },
914 |       "source": [
915 |         "## **Question:**\n",
916 |         "Use Broadcasting to create a 4 x 4 ndarray that has its first column full of 1s, its second column full of 2s, its third column full of 3s, etc.. "
917 |       ]
918 |     },
919 |     {
920 |       "cell_type": "code",
921 |       "metadata": {
922 |         "id": "5VV4j0doOiSS",
923 |         "colab_type": "code",
924 |         "colab": {}
925 |       },
926 |       "source": [
927 |         "import numpy as np\n",
928 |         "\n",
929 |         "X = "
930 |       ],
931 |       "execution_count": null,
932 |       "outputs": []
933 |     }
934 |   ]
935 | }


--------------------------------------------------------------------------------
/Pandas_part_3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "name": "Day_10_Pandas_part_3.ipynb",
   7 |       "provenance": []
   8 |     },
   9 |     "kernelspec": {
  10 |       "name": "python3",
  11 |       "display_name": "Python 3"
  12 |     }
  13 |   },
  14 |   "cells": [
  15 |     {
  16 |       "cell_type": "markdown",
  17 |       "metadata": {
  18 |         "id": "9U8B1dFPZzox"
  19 |       },
  20 |       "source": [
  21 |         "In machine learning you will most likely use databases from many sources to train your learning algorithms. Pandas allows us to load databases of different formats into DataFrames. One of the most popular data formats used to store databases is csv. CSV stands for Comma Separated Values and offers a simple format to store data. We can load CSV files into Pandas DataFrames using the ```pd.read_csv()``` function. Let's load Google stock data into a Pandas DataFrame. The GOOG.csv file contains Google stock data from 8/19/2004 till 10/13/2017 taken from Yahoo Finance."
  22 |       ]
  23 |     },
  24 |     {
  25 |       "cell_type": "code",
  26 |       "metadata": {
  27 |         "id": "XStain6bVH4x",
  28 |         "outputId": "21ba7179-f0c0-4633-fd85-b6da8be9bd09",
  29 |         "colab": {
  30 |           "base_uri": "https://localhost:8080/",
  31 |           "height": 71
  32 |         }
  33 |       },
  34 |       "source": [
  35 |         "import pandas as pd\n",
  36 |         "\n",
  37 |         "# We load Google stock data in a DataFrame\n",
  38 |         "Google_stock = pd.read_csv('/content/sample_data/stocks.csv')\n",
  39 |         "\n",
  40 |         "# We print some information about Google_stock\n",
  41 |         "print('Google_stock is of type:', type(Google_stock))\n",
  42 |         "print('Google_stock has shape:', Google_stock.shape)"
  43 |       ],
  44 |       "execution_count": 4,
  45 |       "outputs": [
  46 |         {
  47 |           "output_type": "stream",
  48 |           "text": [
  49 |             "Google_stock is of type: <class 'pandas.core.frame.DataFrame'>\n",
  50 |             "Google_stock has shape: (3313, 7)\n"
  51 |           ],
  52 |           "name": "stdout"
  53 |         }
  54 |       ]
  55 |     },
  56 |     {
  57 |       "cell_type": "markdown",
  58 |       "metadata": {
  59 |         "id": "7wzCKjscdd1i"
  60 |       },
  61 |       "source": [
  62 |         "We see that we have loaded the stocks.csv file into a Pandas DataFrame and it consists of 3,313 rows and 7 columns. Now let's look at the stock data"
  63 |       ]
  64 |     },
  65 |     {
  66 |       "cell_type": "code",
  67 |       "metadata": {
  68 |         "id": "zh8u2vdMce7D",
  69 |         "outputId": "647be954-8468-43fb-e3dd-d0defb7e16b0",
  70 |         "colab": {
  71 |           "base_uri": "https://localhost:8080/",
  72 |           "height": 614
  73 |         }
  74 |       },
  75 |       "source": [
  76 |         "Google_stock"
  77 |       ],
  78 |       "execution_count": 5,
  79 |       "outputs": [
  80 |         {
  81 |           "output_type": "execute_result",
  82 |           "data": {
  83 |             "text/html": [
  84 |               "<div>\n",
  85 |               "<style scoped>\n",
  86 |               "    .dataframe tbody tr th:only-of-type {\n",
  87 |               "        vertical-align: middle;\n",
  88 |               "    }\n",
  89 |               "\n",
  90 |               "    .dataframe tbody tr th {\n",
  91 |               "        vertical-align: top;\n",
  92 |               "    }\n",
  93 |               "\n",
  94 |               "    .dataframe thead th {\n",
  95 |               "        text-align: right;\n",
  96 |               "    }\n",
  97 |               "</style>\n",
  98 |               "<table border=\"1\" class=\"dataframe\">\n",
  99 |               "  <thead>\n",
 100 |               "    <tr style=\"text-align: right;\">\n",
 101 |               "      <th></th>\n",
 102 |               "      <th>Date</th>\n",
 103 |               "      <th>Open</th>\n",
 104 |               "      <th>High</th>\n",
 105 |               "      <th>Low</th>\n",
 106 |               "      <th>Close</th>\n",
 107 |               "      <th>Adj Close</th>\n",
 108 |               "      <th>Volume</th>\n",
 109 |               "    </tr>\n",
 110 |               "  </thead>\n",
 111 |               "  <tbody>\n",
 112 |               "    <tr>\n",
 113 |               "      <th>0</th>\n",
 114 |               "      <td>2004-08-19</td>\n",
 115 |               "      <td>49.676899</td>\n",
 116 |               "      <td>51.693783</td>\n",
 117 |               "      <td>47.669952</td>\n",
 118 |               "      <td>49.845802</td>\n",
 119 |               "      <td>49.845802</td>\n",
 120 |               "      <td>44994500</td>\n",
 121 |               "    </tr>\n",
 122 |               "    <tr>\n",
 123 |               "      <th>1</th>\n",
 124 |               "      <td>2004-08-20</td>\n",
 125 |               "      <td>50.178635</td>\n",
 126 |               "      <td>54.187561</td>\n",
 127 |               "      <td>49.925285</td>\n",
 128 |               "      <td>53.805050</td>\n",
 129 |               "      <td>53.805050</td>\n",
 130 |               "      <td>23005800</td>\n",
 131 |               "    </tr>\n",
 132 |               "    <tr>\n",
 133 |               "      <th>2</th>\n",
 134 |               "      <td>2004-08-23</td>\n",
 135 |               "      <td>55.017166</td>\n",
 136 |               "      <td>56.373344</td>\n",
 137 |               "      <td>54.172661</td>\n",
 138 |               "      <td>54.346527</td>\n",
 139 |               "      <td>54.346527</td>\n",
 140 |               "      <td>18393200</td>\n",
 141 |               "    </tr>\n",
 142 |               "    <tr>\n",
 143 |               "      <th>3</th>\n",
 144 |               "      <td>2004-08-24</td>\n",
 145 |               "      <td>55.260582</td>\n",
 146 |               "      <td>55.439419</td>\n",
 147 |               "      <td>51.450363</td>\n",
 148 |               "      <td>52.096165</td>\n",
 149 |               "      <td>52.096165</td>\n",
 150 |               "      <td>15361800</td>\n",
 151 |               "    </tr>\n",
 152 |               "    <tr>\n",
 153 |               "      <th>4</th>\n",
 154 |               "      <td>2004-08-25</td>\n",
 155 |               "      <td>52.140873</td>\n",
 156 |               "      <td>53.651051</td>\n",
 157 |               "      <td>51.604362</td>\n",
 158 |               "      <td>52.657513</td>\n",
 159 |               "      <td>52.657513</td>\n",
 160 |               "      <td>9257400</td>\n",
 161 |               "    </tr>\n",
 162 |               "    <tr>\n",
 163 |               "      <th>...</th>\n",
 164 |               "      <td>...</td>\n",
 165 |               "      <td>...</td>\n",
 166 |               "      <td>...</td>\n",
 167 |               "      <td>...</td>\n",
 168 |               "      <td>...</td>\n",
 169 |               "      <td>...</td>\n",
 170 |               "      <td>...</td>\n",
 171 |               "    </tr>\n",
 172 |               "    <tr>\n",
 173 |               "      <th>3308</th>\n",
 174 |               "      <td>2017-10-09</td>\n",
 175 |               "      <td>980.000000</td>\n",
 176 |               "      <td>985.424988</td>\n",
 177 |               "      <td>976.109985</td>\n",
 178 |               "      <td>977.000000</td>\n",
 179 |               "      <td>977.000000</td>\n",
 180 |               "      <td>891400</td>\n",
 181 |               "    </tr>\n",
 182 |               "    <tr>\n",
 183 |               "      <th>3309</th>\n",
 184 |               "      <td>2017-10-10</td>\n",
 185 |               "      <td>980.000000</td>\n",
 186 |               "      <td>981.570007</td>\n",
 187 |               "      <td>966.080017</td>\n",
 188 |               "      <td>972.599976</td>\n",
 189 |               "      <td>972.599976</td>\n",
 190 |               "      <td>968400</td>\n",
 191 |               "    </tr>\n",
 192 |               "    <tr>\n",
 193 |               "      <th>3310</th>\n",
 194 |               "      <td>2017-10-11</td>\n",
 195 |               "      <td>973.719971</td>\n",
 196 |               "      <td>990.710022</td>\n",
 197 |               "      <td>972.250000</td>\n",
 198 |               "      <td>989.250000</td>\n",
 199 |               "      <td>989.250000</td>\n",
 200 |               "      <td>1693300</td>\n",
 201 |               "    </tr>\n",
 202 |               "    <tr>\n",
 203 |               "      <th>3311</th>\n",
 204 |               "      <td>2017-10-12</td>\n",
 205 |               "      <td>987.450012</td>\n",
 206 |               "      <td>994.119995</td>\n",
 207 |               "      <td>985.000000</td>\n",
 208 |               "      <td>987.830017</td>\n",
 209 |               "      <td>987.830017</td>\n",
 210 |               "      <td>1262400</td>\n",
 211 |               "    </tr>\n",
 212 |               "    <tr>\n",
 213 |               "      <th>3312</th>\n",
 214 |               "      <td>2017-10-13</td>\n",
 215 |               "      <td>992.000000</td>\n",
 216 |               "      <td>997.210022</td>\n",
 217 |               "      <td>989.000000</td>\n",
 218 |               "      <td>989.679993</td>\n",
 219 |               "      <td>989.679993</td>\n",
 220 |               "      <td>1157700</td>\n",
 221 |               "    </tr>\n",
 222 |               "  </tbody>\n",
 223 |               "</table>\n",
 224 |               "<p>3313 rows × 7 columns</p>\n",
 225 |               "</div>"
 226 |             ],
 227 |             "text/plain": [
 228 |               "            Date        Open        High  ...       Close   Adj Close    Volume\n",
 229 |               "0     2004-08-19   49.676899   51.693783  ...   49.845802   49.845802  44994500\n",
 230 |               "1     2004-08-20   50.178635   54.187561  ...   53.805050   53.805050  23005800\n",
 231 |               "2     2004-08-23   55.017166   56.373344  ...   54.346527   54.346527  18393200\n",
 232 |               "3     2004-08-24   55.260582   55.439419  ...   52.096165   52.096165  15361800\n",
 233 |               "4     2004-08-25   52.140873   53.651051  ...   52.657513   52.657513   9257400\n",
 234 |               "...          ...         ...         ...  ...         ...         ...       ...\n",
 235 |               "3308  2017-10-09  980.000000  985.424988  ...  977.000000  977.000000    891400\n",
 236 |               "3309  2017-10-10  980.000000  981.570007  ...  972.599976  972.599976    968400\n",
 237 |               "3310  2017-10-11  973.719971  990.710022  ...  989.250000  989.250000   1693300\n",
 238 |               "3311  2017-10-12  987.450012  994.119995  ...  987.830017  987.830017   1262400\n",
 239 |               "3312  2017-10-13  992.000000  997.210022  ...  989.679993  989.679993   1157700\n",
 240 |               "\n",
 241 |               "[3313 rows x 7 columns]"
 242 |             ]
 243 |           },
 244 |           "metadata": {
 245 |             "tags": []
 246 |           },
 247 |           "execution_count": 5
 248 |         }
 249 |       ]
 250 |     },
 251 |     {
 252 |       "cell_type": "markdown",
 253 |       "metadata": {
 254 |         "id": "EB2qpQEzdt3W"
 255 |       },
 256 |       "source": [
 257 |         "We see that it is quite a large dataset and that Pandas has automatically assigned numerical row indices to the DataFrame. Pandas also used the labels that appear in the data in the CSV file to assign the column labels.\n",
 258 |         "\n",
 259 |         "When dealing with large datasets like this one, it is often useful just to take a look at the first few rows of data instead of the whole dataset. We can take a look at the first 5 rows of data using the ```.head()``` method, as shown below"
 260 |       ]
 261 |     },
 262 |     {
 263 |       "cell_type": "code",
 264 |       "metadata": {
 265 |         "id": "8imtUbLudi_U",
 266 |         "outputId": "6dc8e6df-2ccf-40d3-99ca-9c65ce6e9647",
 267 |         "colab": {
 268 |           "base_uri": "https://localhost:8080/",
 269 |           "height": 221
 270 |         }
 271 |       },
 272 |       "source": [
 273 |         "Google_stock.head()"
 274 |       ],
 275 |       "execution_count": 6,
 276 |       "outputs": [
 277 |         {
 278 |           "output_type": "execute_result",
 279 |           "data": {
 280 |             "text/html": [
 281 |               "<div>\n",
 282 |               "<style scoped>\n",
 283 |               "    .dataframe tbody tr th:only-of-type {\n",
 284 |               "        vertical-align: middle;\n",
 285 |               "    }\n",
 286 |               "\n",
 287 |               "    .dataframe tbody tr th {\n",
 288 |               "        vertical-align: top;\n",
 289 |               "    }\n",
 290 |               "\n",
 291 |               "    .dataframe thead th {\n",
 292 |               "        text-align: right;\n",
 293 |               "    }\n",
 294 |               "</style>\n",
 295 |               "<table border=\"1\" class=\"dataframe\">\n",
 296 |               "  <thead>\n",
 297 |               "    <tr style=\"text-align: right;\">\n",
 298 |               "      <th></th>\n",
 299 |               "      <th>Date</th>\n",
 300 |               "      <th>Open</th>\n",
 301 |               "      <th>High</th>\n",
 302 |               "      <th>Low</th>\n",
 303 |               "      <th>Close</th>\n",
 304 |               "      <th>Adj Close</th>\n",
 305 |               "      <th>Volume</th>\n",
 306 |               "    </tr>\n",
 307 |               "  </thead>\n",
 308 |               "  <tbody>\n",
 309 |               "    <tr>\n",
 310 |               "      <th>0</th>\n",
 311 |               "      <td>2004-08-19</td>\n",
 312 |               "      <td>49.676899</td>\n",
 313 |               "      <td>51.693783</td>\n",
 314 |               "      <td>47.669952</td>\n",
 315 |               "      <td>49.845802</td>\n",
 316 |               "      <td>49.845802</td>\n",
 317 |               "      <td>44994500</td>\n",
 318 |               "    </tr>\n",
 319 |               "    <tr>\n",
 320 |               "      <th>1</th>\n",
 321 |               "      <td>2004-08-20</td>\n",
 322 |               "      <td>50.178635</td>\n",
 323 |               "      <td>54.187561</td>\n",
 324 |               "      <td>49.925285</td>\n",
 325 |               "      <td>53.805050</td>\n",
 326 |               "      <td>53.805050</td>\n",
 327 |               "      <td>23005800</td>\n",
 328 |               "    </tr>\n",
 329 |               "    <tr>\n",
 330 |               "      <th>2</th>\n",
 331 |               "      <td>2004-08-23</td>\n",
 332 |               "      <td>55.017166</td>\n",
 333 |               "      <td>56.373344</td>\n",
 334 |               "      <td>54.172661</td>\n",
 335 |               "      <td>54.346527</td>\n",
 336 |               "      <td>54.346527</td>\n",
 337 |               "      <td>18393200</td>\n",
 338 |               "    </tr>\n",
 339 |               "    <tr>\n",
 340 |               "      <th>3</th>\n",
 341 |               "      <td>2004-08-24</td>\n",
 342 |               "      <td>55.260582</td>\n",
 343 |               "      <td>55.439419</td>\n",
 344 |               "      <td>51.450363</td>\n",
 345 |               "      <td>52.096165</td>\n",
 346 |               "      <td>52.096165</td>\n",
 347 |               "      <td>15361800</td>\n",
 348 |               "    </tr>\n",
 349 |               "    <tr>\n",
 350 |               "      <th>4</th>\n",
 351 |               "      <td>2004-08-25</td>\n",
 352 |               "      <td>52.140873</td>\n",
 353 |               "      <td>53.651051</td>\n",
 354 |               "      <td>51.604362</td>\n",
 355 |               "      <td>52.657513</td>\n",
 356 |               "      <td>52.657513</td>\n",
 357 |               "      <td>9257400</td>\n",
 358 |               "    </tr>\n",
 359 |               "  </tbody>\n",
 360 |               "</table>\n",
 361 |               "</div>"
 362 |             ],
 363 |             "text/plain": [
 364 |               "         Date       Open       High        Low      Close  Adj Close    Volume\n",
 365 |               "0  2004-08-19  49.676899  51.693783  47.669952  49.845802  49.845802  44994500\n",
 366 |               "1  2004-08-20  50.178635  54.187561  49.925285  53.805050  53.805050  23005800\n",
 367 |               "2  2004-08-23  55.017166  56.373344  54.172661  54.346527  54.346527  18393200\n",
 368 |               "3  2004-08-24  55.260582  55.439419  51.450363  52.096165  52.096165  15361800\n",
 369 |               "4  2004-08-25  52.140873  53.651051  51.604362  52.657513  52.657513   9257400"
 370 |             ]
 371 |           },
 372 |           "metadata": {
 373 |             "tags": []
 374 |           },
 375 |           "execution_count": 6
 376 |         }
 377 |       ]
 378 |     },
 379 |     {
 380 |       "cell_type": "markdown",
 381 |       "metadata": {
 382 |         "id": "Sk6a7_Nrh_RV"
 383 |       },
 384 |       "source": [
 385 |         "We can also take a look at the last 5 rows of data by using the ```.tail()``` method:"
 386 |       ]
 387 |     },
 388 |     {
 389 |       "cell_type": "code",
 390 |       "metadata": {
 391 |         "id": "59nQSBchhHyR",
 392 |         "outputId": "07b884e6-6598-408a-92d4-8a2db61cc1ce",
 393 |         "colab": {
 394 |           "base_uri": "https://localhost:8080/",
 395 |           "height": 320
 396 |         }
 397 |       },
 398 |       "source": [
 399 |         "Google_stock.tail()"
 400 |       ],
 401 |       "execution_count": 7,
 402 |       "outputs": [
 403 |         {
 404 |           "output_type": "execute_result",
 405 |           "data": {
 406 |             "text/html": [
 407 |               "<div>\n",
 408 |               "<style scoped>\n",
 409 |               "    .dataframe tbody tr th:only-of-type {\n",
 410 |               "        vertical-align: middle;\n",
 411 |               "    }\n",
 412 |               "\n",
 413 |               "    .dataframe tbody tr th {\n",
 414 |               "        vertical-align: top;\n",
 415 |               "    }\n",
 416 |               "\n",
 417 |               "    .dataframe thead th {\n",
 418 |               "        text-align: right;\n",
 419 |               "    }\n",
 420 |               "</style>\n",
 421 |               "<table border=\"1\" class=\"dataframe\">\n",
 422 |               "  <thead>\n",
 423 |               "    <tr style=\"text-align: right;\">\n",
 424 |               "      <th></th>\n",
 425 |               "      <th>Date</th>\n",
 426 |               "      <th>Open</th>\n",
 427 |               "      <th>High</th>\n",
 428 |               "      <th>Low</th>\n",
 429 |               "      <th>Close</th>\n",
 430 |               "      <th>Adj Close</th>\n",
 431 |               "      <th>Volume</th>\n",
 432 |               "    </tr>\n",
 433 |               "  </thead>\n",
 434 |               "  <tbody>\n",
 435 |               "    <tr>\n",
 436 |               "      <th>3308</th>\n",
 437 |               "      <td>2017-10-09</td>\n",
 438 |               "      <td>980.000000</td>\n",
 439 |               "      <td>985.424988</td>\n",
 440 |               "      <td>976.109985</td>\n",
 441 |               "      <td>977.000000</td>\n",
 442 |               "      <td>977.000000</td>\n",
 443 |               "      <td>891400</td>\n",
 444 |               "    </tr>\n",
 445 |               "    <tr>\n",
 446 |               "      <th>3309</th>\n",
 447 |               "      <td>2017-10-10</td>\n",
 448 |               "      <td>980.000000</td>\n",
 449 |               "      <td>981.570007</td>\n",
 450 |               "      <td>966.080017</td>\n",
 451 |               "      <td>972.599976</td>\n",
 452 |               "      <td>972.599976</td>\n",
 453 |               "      <td>968400</td>\n",
 454 |               "    </tr>\n",
 455 |               "    <tr>\n",
 456 |               "      <th>3310</th>\n",
 457 |               "      <td>2017-10-11</td>\n",
 458 |               "      <td>973.719971</td>\n",
 459 |               "      <td>990.710022</td>\n",
 460 |               "      <td>972.250000</td>\n",
 461 |               "      <td>989.250000</td>\n",
 462 |               "      <td>989.250000</td>\n",
 463 |               "      <td>1693300</td>\n",
 464 |               "    </tr>\n",
 465 |               "    <tr>\n",
 466 |               "      <th>3311</th>\n",
 467 |               "      <td>2017-10-12</td>\n",
 468 |               "      <td>987.450012</td>\n",
 469 |               "      <td>994.119995</td>\n",
 470 |               "      <td>985.000000</td>\n",
 471 |               "      <td>987.830017</td>\n",
 472 |               "      <td>987.830017</td>\n",
 473 |               "      <td>1262400</td>\n",
 474 |               "    </tr>\n",
 475 |               "    <tr>\n",
 476 |               "      <th>3312</th>\n",
 477 |               "      <td>2017-10-13</td>\n",
 478 |               "      <td>992.000000</td>\n",
 479 |               "      <td>997.210022</td>\n",
 480 |               "      <td>989.000000</td>\n",
 481 |               "      <td>989.679993</td>\n",
 482 |               "      <td>989.679993</td>\n",
 483 |               "      <td>1157700</td>\n",
 484 |               "    </tr>\n",
 485 |               "  </tbody>\n",
 486 |               "</table>\n",
 487 |               "</div>"
 488 |             ],
 489 |             "text/plain": [
 490 |               "            Date        Open        High  ...       Close   Adj Close   Volume\n",
 491 |               "3308  2017-10-09  980.000000  985.424988  ...  977.000000  977.000000   891400\n",
 492 |               "3309  2017-10-10  980.000000  981.570007  ...  972.599976  972.599976   968400\n",
 493 |               "3310  2017-10-11  973.719971  990.710022  ...  989.250000  989.250000  1693300\n",
 494 |               "3311  2017-10-12  987.450012  994.119995  ...  987.830017  987.830017  1262400\n",
 495 |               "3312  2017-10-13  992.000000  997.210022  ...  989.679993  989.679993  1157700\n",
 496 |               "\n",
 497 |               "[5 rows x 7 columns]"
 498 |             ]
 499 |           },
 500 |           "metadata": {
 501 |             "tags": []
 502 |           },
 503 |           "execution_count": 7
 504 |         }
 505 |       ]
 506 |     },
 507 |     {
 508 |       "cell_type": "markdown",
 509 |       "metadata": {
 510 |         "id": "ZmCx_A_3iUVX"
 511 |       },
 512 |       "source": [
 513 |         "We can also optionally use ```.head(N)``` or ```.tail(N)``` to display the first and last N rows of data, respectively.\n",
 514 |         "\n",
 515 |         "Let's do a quick check to see whether we have any ```NaN``` values in our dataset. To do this, we will use the ```.isnull()``` method followed by the ```.any()``` method to check whether any of the columns contain ```NaN``` values."
 516 |       ]
 517 |     },
 518 |     {
 519 |       "cell_type": "code",
 520 |       "metadata": {
 521 |         "id": "KTvuj2BIiMcA",
 522 |         "outputId": "c1f9453d-6434-429f-c0b3-7947d786bef8",
 523 |         "colab": {
 524 |           "base_uri": "https://localhost:8080/",
 525 |           "height": 170
 526 |         }
 527 |       },
 528 |       "source": [
 529 |         "Google_stock.isnull().any()"
 530 |       ],
 531 |       "execution_count": 8,
 532 |       "outputs": [
 533 |         {
 534 |           "output_type": "execute_result",
 535 |           "data": {
 536 |             "text/plain": [
 537 |               "Date         False\n",
 538 |               "Open         False\n",
 539 |               "High         False\n",
 540 |               "Low          False\n",
 541 |               "Close        False\n",
 542 |               "Adj Close    False\n",
 543 |               "Volume       False\n",
 544 |               "dtype: bool"
 545 |             ]
 546 |           },
 547 |           "metadata": {
 548 |             "tags": []
 549 |           },
 550 |           "execution_count": 8
 551 |         }
 552 |       ]
 553 |     },
 554 |     {
 555 |       "cell_type": "markdown",
 556 |       "metadata": {
 557 |         "id": "zH8nfXlLio3j"
 558 |       },
 559 |       "source": [
 560 |         "We see that we have no ```NaN``` values.\n",
 561 |         "\n",
 562 |         "When dealing with large datasets, it is often useful to get statistical information from them. Pandas provides the ```.describe()``` method to get descriptive statistics on each column of the DataFrame. Let's see how this works:"
 563 |       ]
 564 |     },
 565 |     {
 566 |       "cell_type": "code",
 567 |       "metadata": {
 568 |         "id": "lXJrX81Kimgc",
 569 |         "outputId": "d43ca237-d5f1-4ce0-eac8-d2e27b0d9337",
 570 |         "colab": {
 571 |           "base_uri": "https://localhost:8080/",
 572 |           "height": 313
 573 |         }
 574 |       },
 575 |       "source": [
 576 |         "# We get descriptive statistics on our stock data\n",
 577 |         "Google_stock.describe()"
 578 |       ],
 579 |       "execution_count": 9,
 580 |       "outputs": [
 581 |         {
 582 |           "output_type": "execute_result",
 583 |           "data": {
 584 |             "text/html": [
 585 |               "<div>\n",
 586 |               "<style scoped>\n",
 587 |               "    .dataframe tbody tr th:only-of-type {\n",
 588 |               "        vertical-align: middle;\n",
 589 |               "    }\n",
 590 |               "\n",
 591 |               "    .dataframe tbody tr th {\n",
 592 |               "        vertical-align: top;\n",
 593 |               "    }\n",
 594 |               "\n",
 595 |               "    .dataframe thead th {\n",
 596 |               "        text-align: right;\n",
 597 |               "    }\n",
 598 |               "</style>\n",
 599 |               "<table border=\"1\" class=\"dataframe\">\n",
 600 |               "  <thead>\n",
 601 |               "    <tr style=\"text-align: right;\">\n",
 602 |               "      <th></th>\n",
 603 |               "      <th>Open</th>\n",
 604 |               "      <th>High</th>\n",
 605 |               "      <th>Low</th>\n",
 606 |               "      <th>Close</th>\n",
 607 |               "      <th>Adj Close</th>\n",
 608 |               "      <th>Volume</th>\n",
 609 |               "    </tr>\n",
 610 |               "  </thead>\n",
 611 |               "  <tbody>\n",
 612 |               "    <tr>\n",
 613 |               "      <th>count</th>\n",
 614 |               "      <td>3313.000000</td>\n",
 615 |               "      <td>3313.000000</td>\n",
 616 |               "      <td>3313.000000</td>\n",
 617 |               "      <td>3313.000000</td>\n",
 618 |               "      <td>3313.000000</td>\n",
 619 |               "      <td>3.313000e+03</td>\n",
 620 |               "    </tr>\n",
 621 |               "    <tr>\n",
 622 |               "      <th>mean</th>\n",
 623 |               "      <td>380.186092</td>\n",
 624 |               "      <td>383.493740</td>\n",
 625 |               "      <td>376.519309</td>\n",
 626 |               "      <td>380.072458</td>\n",
 627 |               "      <td>380.072458</td>\n",
 628 |               "      <td>8.038476e+06</td>\n",
 629 |               "    </tr>\n",
 630 |               "    <tr>\n",
 631 |               "      <th>std</th>\n",
 632 |               "      <td>223.818650</td>\n",
 633 |               "      <td>224.974534</td>\n",
 634 |               "      <td>222.473232</td>\n",
 635 |               "      <td>223.853780</td>\n",
 636 |               "      <td>223.853780</td>\n",
 637 |               "      <td>8.399521e+06</td>\n",
 638 |               "    </tr>\n",
 639 |               "    <tr>\n",
 640 |               "      <th>min</th>\n",
 641 |               "      <td>49.274517</td>\n",
 642 |               "      <td>50.541279</td>\n",
 643 |               "      <td>47.669952</td>\n",
 644 |               "      <td>49.681866</td>\n",
 645 |               "      <td>49.681866</td>\n",
 646 |               "      <td>7.900000e+03</td>\n",
 647 |               "    </tr>\n",
 648 |               "    <tr>\n",
 649 |               "      <th>25%</th>\n",
 650 |               "      <td>226.556473</td>\n",
 651 |               "      <td>228.394516</td>\n",
 652 |               "      <td>224.003082</td>\n",
 653 |               "      <td>226.407440</td>\n",
 654 |               "      <td>226.407440</td>\n",
 655 |               "      <td>2.584900e+06</td>\n",
 656 |               "    </tr>\n",
 657 |               "    <tr>\n",
 658 |               "      <th>50%</th>\n",
 659 |               "      <td>293.312286</td>\n",
 660 |               "      <td>295.433502</td>\n",
 661 |               "      <td>289.929291</td>\n",
 662 |               "      <td>293.029114</td>\n",
 663 |               "      <td>293.029114</td>\n",
 664 |               "      <td>5.281300e+06</td>\n",
 665 |               "    </tr>\n",
 666 |               "    <tr>\n",
 667 |               "      <th>75%</th>\n",
 668 |               "      <td>536.650024</td>\n",
 669 |               "      <td>540.000000</td>\n",
 670 |               "      <td>532.409973</td>\n",
 671 |               "      <td>536.690002</td>\n",
 672 |               "      <td>536.690002</td>\n",
 673 |               "      <td>1.065370e+07</td>\n",
 674 |               "    </tr>\n",
 675 |               "    <tr>\n",
 676 |               "      <th>max</th>\n",
 677 |               "      <td>992.000000</td>\n",
 678 |               "      <td>997.210022</td>\n",
 679 |               "      <td>989.000000</td>\n",
 680 |               "      <td>989.679993</td>\n",
 681 |               "      <td>989.679993</td>\n",
 682 |               "      <td>8.276810e+07</td>\n",
 683 |               "    </tr>\n",
 684 |               "  </tbody>\n",
 685 |               "</table>\n",
 686 |               "</div>"
 687 |             ],
 688 |             "text/plain": [
 689 |               "              Open         High  ...    Adj Close        Volume\n",
 690 |               "count  3313.000000  3313.000000  ...  3313.000000  3.313000e+03\n",
 691 |               "mean    380.186092   383.493740  ...   380.072458  8.038476e+06\n",
 692 |               "std     223.818650   224.974534  ...   223.853780  8.399521e+06\n",
 693 |               "min      49.274517    50.541279  ...    49.681866  7.900000e+03\n",
 694 |               "25%     226.556473   228.394516  ...   226.407440  2.584900e+06\n",
 695 |               "50%     293.312286   295.433502  ...   293.029114  5.281300e+06\n",
 696 |               "75%     536.650024   540.000000  ...   536.690002  1.065370e+07\n",
 697 |               "max     992.000000   997.210022  ...   989.679993  8.276810e+07\n",
 698 |               "\n",
 699 |               "[8 rows x 6 columns]"
 700 |             ]
 701 |           },
 702 |           "metadata": {
 703 |             "tags": []
 704 |           },
 705 |           "execution_count": 9
 706 |         }
 707 |       ]
 708 |     },
 709 |     {
 710 |       "cell_type": "markdown",
 711 |       "metadata": {
 712 |         "id": "ddXb_1Eoiwyj"
 713 |       },
 714 |       "source": [
 715 |         "If desired, we can apply the ```.describe()``` method on a single column as shown below:"
 716 |       ]
 717 |     },
 718 |     {
 719 |       "cell_type": "code",
 720 |       "metadata": {
 721 |         "id": "XVc9LUDeiulN",
 722 |         "outputId": "28f26246-9e3f-4aac-a1cd-4f8f027cd547",
 723 |         "colab": {
 724 |           "base_uri": "https://localhost:8080/",
 725 |           "height": 186
 726 |         }
 727 |       },
 728 |       "source": [
 729 |         "# We get descriptive statistics on a single column of our DataFrame\n",
 730 |         "Google_stock['Adj Close'].describe()"
 731 |       ],
 732 |       "execution_count": 10,
 733 |       "outputs": [
 734 |         {
 735 |           "output_type": "execute_result",
 736 |           "data": {
 737 |             "text/plain": [
 738 |               "count    3313.000000\n",
 739 |               "mean      380.072458\n",
 740 |               "std       223.853780\n",
 741 |               "min        49.681866\n",
 742 |               "25%       226.407440\n",
 743 |               "50%       293.029114\n",
 744 |               "75%       536.690002\n",
 745 |               "max       989.679993\n",
 746 |               "Name: Adj Close, dtype: float64"
 747 |             ]
 748 |           },
 749 |           "metadata": {
 750 |             "tags": []
 751 |           },
 752 |           "execution_count": 10
 753 |         }
 754 |       ]
 755 |     },
 756 |     {
 757 |       "cell_type": "markdown",
 758 |       "metadata": {
 759 |         "id": "Bq8yeKE6i44l"
 760 |       },
 761 |       "source": [
 762 |         "Similarly, you can also look at one statistic by using one of the many statistical functions Pandas provides. Let's look at some examples:"
 763 |       ]
 764 |     },
 765 |     {
 766 |       "cell_type": "code",
 767 |       "metadata": {
 768 |         "id": "XG5MbGkui1-r",
 769 |         "outputId": "9365a3bb-0d82-4d5b-aeeb-33854fa8729c",
 770 |         "colab": {
 771 |           "base_uri": "https://localhost:8080/",
 772 |           "height": 385
 773 |         }
 774 |       },
 775 |       "source": [
 776 |         "# We print information about our DataFrame  \n",
 777 |         "print()\n",
 778 |         "print('Maximum values of each column:\\n', Google_stock.max())\n",
 779 |         "print()\n",
 780 |         "print('Minimum Close value:', Google_stock['Close'].min())\n",
 781 |         "print()\n",
 782 |         "print('Average value of each column:\\n', Google_stock.mean())"
 783 |       ],
 784 |       "execution_count": 11,
 785 |       "outputs": [
 786 |         {
 787 |           "output_type": "stream",
 788 |           "text": [
 789 |             "\n",
 790 |             "Maximum values of each column:\n",
 791 |             " Date         2017-10-13\n",
 792 |             "Open                992\n",
 793 |             "High             997.21\n",
 794 |             "Low                 989\n",
 795 |             "Close            989.68\n",
 796 |             "Adj Close        989.68\n",
 797 |             "Volume         82768100\n",
 798 |             "dtype: object\n",
 799 |             "\n",
 800 |             "Minimum Close value: 49.681866\n",
 801 |             "\n",
 802 |             "Average value of each column:\n",
 803 |             " Open         3.801861e+02\n",
 804 |             "High         3.834937e+02\n",
 805 |             "Low          3.765193e+02\n",
 806 |             "Close        3.800725e+02\n",
 807 |             "Adj Close    3.800725e+02\n",
 808 |             "Volume       8.038476e+06\n",
 809 |             "dtype: float64\n"
 810 |           ],
 811 |           "name": "stdout"
 812 |         }
 813 |       ]
 814 |     },
 815 |     {
 816 |       "cell_type": "markdown",
 817 |       "metadata": {
 818 |         "id": "HMYy30hfjebQ"
 819 |       },
 820 |       "source": [
 821 |         "Another important statistical measure is data correlation. Data correlation can tell us, for example, if the data in different columns are correlated. We can use the .corr() method to get the correlation between different columns, as shown below:"
 822 |       ]
 823 |     },
 824 |     {
 825 |       "cell_type": "code",
 826 |       "metadata": {
 827 |         "id": "ZrVhnTUujbd-",
 828 |         "outputId": "479839ae-f718-450b-9c33-5f508d5ee40c",
 829 |         "colab": {
 830 |           "base_uri": "https://localhost:8080/",
 831 |           "height": 252
 832 |         }
 833 |       },
 834 |       "source": [
 835 |         "# We display the correlation between columns\n",
 836 |         "Google_stock.corr()"
 837 |       ],
 838 |       "execution_count": 12,
 839 |       "outputs": [
 840 |         {
 841 |           "output_type": "execute_result",
 842 |           "data": {
 843 |             "text/html": [
 844 |               "<div>\n",
 845 |               "<style scoped>\n",
 846 |               "    .dataframe tbody tr th:only-of-type {\n",
 847 |               "        vertical-align: middle;\n",
 848 |               "    }\n",
 849 |               "\n",
 850 |               "    .dataframe tbody tr th {\n",
 851 |               "        vertical-align: top;\n",
 852 |               "    }\n",
 853 |               "\n",
 854 |               "    .dataframe thead th {\n",
 855 |               "        text-align: right;\n",
 856 |               "    }\n",
 857 |               "</style>\n",
 858 |               "<table border=\"1\" class=\"dataframe\">\n",
 859 |               "  <thead>\n",
 860 |               "    <tr style=\"text-align: right;\">\n",
 861 |               "      <th></th>\n",
 862 |               "      <th>Open</th>\n",
 863 |               "      <th>High</th>\n",
 864 |               "      <th>Low</th>\n",
 865 |               "      <th>Close</th>\n",
 866 |               "      <th>Adj Close</th>\n",
 867 |               "      <th>Volume</th>\n",
 868 |               "    </tr>\n",
 869 |               "  </thead>\n",
 870 |               "  <tbody>\n",
 871 |               "    <tr>\n",
 872 |               "      <th>Open</th>\n",
 873 |               "      <td>1.000000</td>\n",
 874 |               "      <td>0.999904</td>\n",
 875 |               "      <td>0.999845</td>\n",
 876 |               "      <td>0.999745</td>\n",
 877 |               "      <td>0.999745</td>\n",
 878 |               "      <td>-0.564258</td>\n",
 879 |               "    </tr>\n",
 880 |               "    <tr>\n",
 881 |               "      <th>High</th>\n",
 882 |               "      <td>0.999904</td>\n",
 883 |               "      <td>1.000000</td>\n",
 884 |               "      <td>0.999834</td>\n",
 885 |               "      <td>0.999868</td>\n",
 886 |               "      <td>0.999868</td>\n",
 887 |               "      <td>-0.562749</td>\n",
 888 |               "    </tr>\n",
 889 |               "    <tr>\n",
 890 |               "      <th>Low</th>\n",
 891 |               "      <td>0.999845</td>\n",
 892 |               "      <td>0.999834</td>\n",
 893 |               "      <td>1.000000</td>\n",
 894 |               "      <td>0.999899</td>\n",
 895 |               "      <td>0.999899</td>\n",
 896 |               "      <td>-0.567007</td>\n",
 897 |               "    </tr>\n",
 898 |               "    <tr>\n",
 899 |               "      <th>Close</th>\n",
 900 |               "      <td>0.999745</td>\n",
 901 |               "      <td>0.999868</td>\n",
 902 |               "      <td>0.999899</td>\n",
 903 |               "      <td>1.000000</td>\n",
 904 |               "      <td>1.000000</td>\n",
 905 |               "      <td>-0.564967</td>\n",
 906 |               "    </tr>\n",
 907 |               "    <tr>\n",
 908 |               "      <th>Adj Close</th>\n",
 909 |               "      <td>0.999745</td>\n",
 910 |               "      <td>0.999868</td>\n",
 911 |               "      <td>0.999899</td>\n",
 912 |               "      <td>1.000000</td>\n",
 913 |               "      <td>1.000000</td>\n",
 914 |               "      <td>-0.564967</td>\n",
 915 |               "    </tr>\n",
 916 |               "    <tr>\n",
 917 |               "      <th>Volume</th>\n",
 918 |               "      <td>-0.564258</td>\n",
 919 |               "      <td>-0.562749</td>\n",
 920 |               "      <td>-0.567007</td>\n",
 921 |               "      <td>-0.564967</td>\n",
 922 |               "      <td>-0.564967</td>\n",
 923 |               "      <td>1.000000</td>\n",
 924 |               "    </tr>\n",
 925 |               "  </tbody>\n",
 926 |               "</table>\n",
 927 |               "</div>"
 928 |             ],
 929 |             "text/plain": [
 930 |               "               Open      High       Low     Close  Adj Close    Volume\n",
 931 |               "Open       1.000000  0.999904  0.999845  0.999745   0.999745 -0.564258\n",
 932 |               "High       0.999904  1.000000  0.999834  0.999868   0.999868 -0.562749\n",
 933 |               "Low        0.999845  0.999834  1.000000  0.999899   0.999899 -0.567007\n",
 934 |               "Close      0.999745  0.999868  0.999899  1.000000   1.000000 -0.564967\n",
 935 |               "Adj Close  0.999745  0.999868  0.999899  1.000000   1.000000 -0.564967\n",
 936 |               "Volume    -0.564258 -0.562749 -0.567007 -0.564967  -0.564967  1.000000"
 937 |             ]
 938 |           },
 939 |           "metadata": {
 940 |             "tags": []
 941 |           },
 942 |           "execution_count": 12
 943 |         }
 944 |       ]
 945 |     },
 946 |     {
 947 |       "cell_type": "markdown",
 948 |       "metadata": {
 949 |         "id": "xBzK1crrkfLt"
 950 |       },
 951 |       "source": [
 952 |         "A correlation value of 1 tells us there is a high correlation and a correlation of 0 tells us that the data is not correlated at all.\n",
 953 |         "\n",
 954 |         "We will end this Introduction to Pandas by taking a look at the .groupby() method. The .groupby() method allows us to group data in different ways. Let's see how we can group data to get different types of information. For the next examples we are going to load fake data about a fictitious company."
 955 |       ]
 956 |     },
 957 |     {
 958 |       "cell_type": "code",
 959 |       "metadata": {
 960 |         "id": "bbRBKoJFkc2n",
 961 |         "outputId": "d6b37673-2d87-4a0f-aa52-adf1d45f6981",
 962 |         "colab": {
 963 |           "base_uri": "https://localhost:8080/",
 964 |           "height": 343
 965 |         }
 966 |       },
 967 |       "source": [
 968 |         "# We load fake Company data in a DataFrame\n",
 969 |         "data = pd.read_csv('/fake_company.csv')\n",
 970 |         "\n",
 971 |         "data"
 972 |       ],
 973 |       "execution_count": 14,
 974 |       "outputs": [
 975 |         {
 976 |           "output_type": "execute_result",
 977 |           "data": {
 978 |             "text/html": [
 979 |               "<div>\n",
 980 |               "<style scoped>\n",
 981 |               "    .dataframe tbody tr th:only-of-type {\n",
 982 |               "        vertical-align: middle;\n",
 983 |               "    }\n",
 984 |               "\n",
 985 |               "    .dataframe tbody tr th {\n",
 986 |               "        vertical-align: top;\n",
 987 |               "    }\n",
 988 |               "\n",
 989 |               "    .dataframe thead th {\n",
 990 |               "        text-align: right;\n",
 991 |               "    }\n",
 992 |               "</style>\n",
 993 |               "<table border=\"1\" class=\"dataframe\">\n",
 994 |               "  <thead>\n",
 995 |               "    <tr style=\"text-align: right;\">\n",
 996 |               "      <th></th>\n",
 997 |               "      <th>Year</th>\n",
 998 |               "      <th>Name</th>\n",
 999 |               "      <th>Department</th>\n",
1000 |               "      <th>Age</th>\n",
1001 |               "      <th>Salary</th>\n",
1002 |               "    </tr>\n",
1003 |               "  </thead>\n",
1004 |               "  <tbody>\n",
1005 |               "    <tr>\n",
1006 |               "      <th>0</th>\n",
1007 |               "      <td>1990</td>\n",
1008 |               "      <td>Alice</td>\n",
1009 |               "      <td>HR</td>\n",
1010 |               "      <td>25</td>\n",
1011 |               "      <td>50000</td>\n",
1012 |               "    </tr>\n",
1013 |               "    <tr>\n",
1014 |               "      <th>1</th>\n",
1015 |               "      <td>1990</td>\n",
1016 |               "      <td>Bob</td>\n",
1017 |               "      <td>RD</td>\n",
1018 |               "      <td>30</td>\n",
1019 |               "      <td>48000</td>\n",
1020 |               "    </tr>\n",
1021 |               "    <tr>\n",
1022 |               "      <th>2</th>\n",
1023 |               "      <td>1990</td>\n",
1024 |               "      <td>Charlie</td>\n",
1025 |               "      <td>Admin</td>\n",
1026 |               "      <td>45</td>\n",
1027 |               "      <td>55000</td>\n",
1028 |               "    </tr>\n",
1029 |               "    <tr>\n",
1030 |               "      <th>3</th>\n",
1031 |               "      <td>1991</td>\n",
1032 |               "      <td>Alice</td>\n",
1033 |               "      <td>HR</td>\n",
1034 |               "      <td>26</td>\n",
1035 |               "      <td>52000</td>\n",
1036 |               "    </tr>\n",
1037 |               "    <tr>\n",
1038 |               "      <th>4</th>\n",
1039 |               "      <td>1991</td>\n",
1040 |               "      <td>Bob</td>\n",
1041 |               "      <td>RD</td>\n",
1042 |               "      <td>31</td>\n",
1043 |               "      <td>50000</td>\n",
1044 |               "    </tr>\n",
1045 |               "    <tr>\n",
1046 |               "      <th>5</th>\n",
1047 |               "      <td>1991</td>\n",
1048 |               "      <td>Charlie</td>\n",
1049 |               "      <td>Admin</td>\n",
1050 |               "      <td>46</td>\n",
1051 |               "      <td>60000</td>\n",
1052 |               "    </tr>\n",
1053 |               "    <tr>\n",
1054 |               "      <th>6</th>\n",
1055 |               "      <td>1992</td>\n",
1056 |               "      <td>Alice</td>\n",
1057 |               "      <td>Admin</td>\n",
1058 |               "      <td>27</td>\n",
1059 |               "      <td>60000</td>\n",
1060 |               "    </tr>\n",
1061 |               "    <tr>\n",
1062 |               "      <th>7</th>\n",
1063 |               "      <td>1992</td>\n",
1064 |               "      <td>Bob</td>\n",
1065 |               "      <td>RD</td>\n",
1066 |               "      <td>32</td>\n",
1067 |               "      <td>52000</td>\n",
1068 |               "    </tr>\n",
1069 |               "    <tr>\n",
1070 |               "      <th>8</th>\n",
1071 |               "      <td>1992</td>\n",
1072 |               "      <td>Charlie</td>\n",
1073 |               "      <td>Admin</td>\n",
1074 |               "      <td>28</td>\n",
1075 |               "      <td>62000</td>\n",
1076 |               "    </tr>\n",
1077 |               "  </tbody>\n",
1078 |               "</table>\n",
1079 |               "</div>"
1080 |             ],
1081 |             "text/plain": [
1082 |               "   Year     Name Department  Age  Salary\n",
1083 |               "0  1990    Alice         HR   25   50000\n",
1084 |               "1  1990      Bob         RD   30   48000\n",
1085 |               "2  1990  Charlie      Admin   45   55000\n",
1086 |               "3  1991    Alice         HR   26   52000\n",
1087 |               "4  1991      Bob         RD   31   50000\n",
1088 |               "5  1991  Charlie      Admin   46   60000\n",
1089 |               "6  1992    Alice      Admin   27   60000\n",
1090 |               "7  1992      Bob         RD   32   52000\n",
1091 |               "8  1992  Charlie      Admin   28   62000"
1092 |             ]
1093 |           },
1094 |           "metadata": {
1095 |             "tags": []
1096 |           },
1097 |           "execution_count": 14
1098 |         }
1099 |       ]
1100 |     },
1101 |     {
1102 |       "cell_type": "markdown",
1103 |       "metadata": {
1104 |         "id": "A_0sbu0Qo00-"
1105 |       },
1106 |       "source": [
1107 |         "We see that the data contains information for the year 1990 through 1992. For each year we see name of the employees, the department they work for, their age, and their annual salary. Now, let's use the ```.groupby()``` method to get information.\n",
1108 |         "\n",
1109 |         "Let's calculate how much money the company spent in salaries each year. To do this, we will group the data by Year using the ```.groupby()``` method and then we will add up the salaries of all the employees by using the ```.sum()``` method."
1110 |       ]
1111 |     },
1112 |     {
1113 |       "cell_type": "code",
1114 |       "metadata": {
1115 |         "id": "504rMBEiki5h",
1116 |         "outputId": "57c31bdf-e611-423e-f6ce-3fc413b0e87d",
1117 |         "colab": {
1118 |           "base_uri": "https://localhost:8080/"
1119 |         }
1120 |       },
1121 |       "source": [
1122 |         "# We display the total amount of money spent in salaries each year\n",
1123 |         "data.groupby(['Year'])['Salary'].sum()"
1124 |       ],
1125 |       "execution_count": 15,
1126 |       "outputs": [
1127 |         {
1128 |           "output_type": "execute_result",
1129 |           "data": {
1130 |             "text/plain": [
1131 |               "Year\n",
1132 |               "1990    153000\n",
1133 |               "1991    162000\n",
1134 |               "1992    174000\n",
1135 |               "Name: Salary, dtype: int64"
1136 |             ]
1137 |           },
1138 |           "metadata": {
1139 |             "tags": []
1140 |           },
1141 |           "execution_count": 15
1142 |         }
1143 |       ]
1144 |     },
1145 |     {
1146 |       "cell_type": "markdown",
1147 |       "metadata": {
1148 |         "id": "NuqQeRX2o9lJ"
1149 |       },
1150 |       "source": [
1151 |         "We see that the company spent a total of 153,000 dollars in 1990, 162,000 in 1991, and 174,000 in 1992.\n",
1152 |         "\n",
1153 |         "Now, let's suppose I want to know what was the average salary for each year. In this case, we will group the data by Year using the ```.groupby()``` method, just as we did before, and then we use the ```.mean()``` method to get the average salary. Let's see how this works"
1154 |       ]
1155 |     },
1156 |     {
1157 |       "cell_type": "code",
1158 |       "metadata": {
1159 |         "id": "51eARHI4pEg8",
1160 |         "outputId": "04c287e7-7f40-482a-96e7-e8759ccf87e9",
1161 |         "colab": {
1162 |           "base_uri": "https://localhost:8080/",
1163 |           "height": 120
1164 |         }
1165 |       },
1166 |       "source": [
1167 |         "# We display the average salary per year\n",
1168 |         "data.groupby(['Year'])['Salary'].mean()"
1169 |       ],
1170 |       "execution_count": 16,
1171 |       "outputs": [
1172 |         {
1173 |           "output_type": "execute_result",
1174 |           "data": {
1175 |             "text/plain": [
1176 |               "Year\n",
1177 |               "1990    51000\n",
1178 |               "1991    54000\n",
1179 |               "1992    58000\n",
1180 |               "Name: Salary, dtype: int64"
1181 |             ]
1182 |           },
1183 |           "metadata": {
1184 |             "tags": []
1185 |           },
1186 |           "execution_count": 16
1187 |         }
1188 |       ]
1189 |     },
1190 |     {
1191 |       "cell_type": "markdown",
1192 |       "metadata": {
1193 |         "id": "-NsUOKyPpH5X"
1194 |       },
1195 |       "source": [
1196 |         "We see that the average salary in 1990 was 51,000 dollars, 54,000 in 1991, and 58,000 in 1992.\n",
1197 |         "\n",
1198 |         "Now let's see how much did each employee get paid in those three years. In this case, we will group the data by Name using the ```.groupby()``` method and then we will add up the salaries for each year. Let's see the result"
1199 |       ]
1200 |     },
1201 |     {
1202 |       "cell_type": "code",
1203 |       "metadata": {
1204 |         "id": "-Rh4BaZ2pMGr",
1205 |         "outputId": "e207694a-ed45-4d3e-bb4d-e6f69598e535",
1206 |         "colab": {
1207 |           "base_uri": "https://localhost:8080/",
1208 |           "height": 203
1209 |         }
1210 |       },
1211 |       "source": [
1212 |         "# We display the salary distribution per department per year.\n",
1213 |         "data.groupby(['Year', 'Department'])['Salary'].sum()"
1214 |       ],
1215 |       "execution_count": 17,
1216 |       "outputs": [
1217 |         {
1218 |           "output_type": "execute_result",
1219 |           "data": {
1220 |             "text/plain": [
1221 |               "Year  Department\n",
1222 |               "1990  Admin          55000\n",
1223 |               "      HR             50000\n",
1224 |               "      RD             48000\n",
1225 |               "1991  Admin          60000\n",
1226 |               "      HR             52000\n",
1227 |               "      RD             50000\n",
1228 |               "1992  Admin         122000\n",
1229 |               "      RD             52000\n",
1230 |               "Name: Salary, dtype: int64"
1231 |             ]
1232 |           },
1233 |           "metadata": {
1234 |             "tags": []
1235 |           },
1236 |           "execution_count": 17
1237 |         }
1238 |       ]
1239 |     },
1240 |     {
1241 |       "cell_type": "markdown",
1242 |       "metadata": {
1243 |         "id": "pc6lM-xypU22"
1244 |       },
1245 |       "source": [
1246 |         "We see that in 1990 the Admin department paid 55,000 dollars in salaries,the HR department paid 50,000, and the RD department 48,0000. While in 1992 the Admin department paid 122,000 dollars in salaries and the RD department paid 52,000."
1247 |       ]
1248 |     },
1249 |     {
1250 |       "cell_type": "code",
1251 |       "metadata": {
1252 |         "id": "0XftCH5JsnU3"
1253 |       },
1254 |       "source": [
1255 |         ""
1256 |       ],
1257 |       "execution_count": null,
1258 |       "outputs": []
1259 |     }
1260 |   ]
1261 | }


--------------------------------------------------------------------------------
/Pandas_Part_2.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "name": "Day_9_Pandas_2.ipynb",
   7 |       "provenance": []
   8 |     },
   9 |     "kernelspec": {
  10 |       "name": "python3",
  11 |       "display_name": "Python 3"
  12 |     }
  13 |   },
  14 |   "cells": [
  15 |     {
  16 |       "cell_type": "markdown",
  17 |       "metadata": {
  18 |         "id": "P2-S_RPdx5Nz"
  19 |       },
  20 |       "source": [
  21 |         "## **Dealing with NaN values:**\n",
  22 |         "As mentioned earlier, before we can begin training our learning algorithms with large datasets, we usually need to clean the data first. This means we need to have a method for detecting and correcting errors in our data. \n",
  23 |         "\n",
  24 |         "While any given dataset can have many types of bad data, such as outliers or incorrect values, the type of bad data we encounter almost always is missing values. As we saw earlier, Pandas assigns ```NaN``` values to missing data. In this lesson we will learn how to detect and deal with ```NaN``` values.\n",
  25 |         "\n",
  26 |         "We will begin by creating a DataFrame with some ```NaN``` values in it.\n",
  27 |         "\n",
  28 |         "\n"
  29 |       ]
  30 |     },
  31 |     {
  32 |       "cell_type": "code",
  33 |       "metadata": {
  34 |         "id": "Cdbo5DqCv_fz",
  35 |         "outputId": "aa84e080-0d13-47b3-a9f0-8ccde009a7f2",
  36 |         "colab": {
  37 |           "base_uri": "https://localhost:8080/",
  38 |           "height": 160
  39 |         }
  40 |       },
  41 |       "source": [
  42 |         "import pandas as pd\n",
  43 |         "\n",
  44 |         "# We create a list of Python dictionaries\n",
  45 |         "items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},\n",
  46 |         "{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},\n",
  47 |         "{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]\n",
  48 |         "\n",
  49 |         "# We create a DataFrame  and provide the row index\n",
  50 |         "store_items = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])\n",
  51 |         "\n",
  52 |         "# We display the DataFrame\n",
  53 |         "store_items"
  54 |       ],
  55 |       "execution_count": 2,
  56 |       "outputs": [
  57 |         {
  58 |           "output_type": "execute_result",
  59 |           "data": {
  60 |             "text/html": [
  61 |               "<div>\n",
  62 |               "<style scoped>\n",
  63 |               "    .dataframe tbody tr th:only-of-type {\n",
  64 |               "        vertical-align: middle;\n",
  65 |               "    }\n",
  66 |               "\n",
  67 |               "    .dataframe tbody tr th {\n",
  68 |               "        vertical-align: top;\n",
  69 |               "    }\n",
  70 |               "\n",
  71 |               "    .dataframe thead th {\n",
  72 |               "        text-align: right;\n",
  73 |               "    }\n",
  74 |               "</style>\n",
  75 |               "<table border=\"1\" class=\"dataframe\">\n",
  76 |               "  <thead>\n",
  77 |               "    <tr style=\"text-align: right;\">\n",
  78 |               "      <th></th>\n",
  79 |               "      <th>bikes</th>\n",
  80 |               "      <th>pants</th>\n",
  81 |               "      <th>watches</th>\n",
  82 |               "      <th>shirts</th>\n",
  83 |               "      <th>shoes</th>\n",
  84 |               "      <th>suits</th>\n",
  85 |               "      <th>glasses</th>\n",
  86 |               "    </tr>\n",
  87 |               "  </thead>\n",
  88 |               "  <tbody>\n",
  89 |               "    <tr>\n",
  90 |               "      <th>store 1</th>\n",
  91 |               "      <td>20</td>\n",
  92 |               "      <td>30</td>\n",
  93 |               "      <td>35</td>\n",
  94 |               "      <td>15.0</td>\n",
  95 |               "      <td>8</td>\n",
  96 |               "      <td>45.0</td>\n",
  97 |               "      <td>NaN</td>\n",
  98 |               "    </tr>\n",
  99 |               "    <tr>\n",
 100 |               "      <th>store 2</th>\n",
 101 |               "      <td>15</td>\n",
 102 |               "      <td>5</td>\n",
 103 |               "      <td>10</td>\n",
 104 |               "      <td>2.0</td>\n",
 105 |               "      <td>5</td>\n",
 106 |               "      <td>7.0</td>\n",
 107 |               "      <td>50.0</td>\n",
 108 |               "    </tr>\n",
 109 |               "    <tr>\n",
 110 |               "      <th>store 3</th>\n",
 111 |               "      <td>20</td>\n",
 112 |               "      <td>30</td>\n",
 113 |               "      <td>35</td>\n",
 114 |               "      <td>NaN</td>\n",
 115 |               "      <td>10</td>\n",
 116 |               "      <td>NaN</td>\n",
 117 |               "      <td>4.0</td>\n",
 118 |               "    </tr>\n",
 119 |               "  </tbody>\n",
 120 |               "</table>\n",
 121 |               "</div>"
 122 |             ],
 123 |             "text/plain": [
 124 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 125 |               "store 1     20     30       35    15.0      8   45.0      NaN\n",
 126 |               "store 2     15      5       10     2.0      5    7.0     50.0\n",
 127 |               "store 3     20     30       35     NaN     10    NaN      4.0"
 128 |             ]
 129 |           },
 130 |           "metadata": {
 131 |             "tags": []
 132 |           },
 133 |           "execution_count": 2
 134 |         }
 135 |       ]
 136 |     },
 137 |     {
 138 |       "cell_type": "markdown",
 139 |       "metadata": {
 140 |         "id": "WwTe9sY01ftK"
 141 |       },
 142 |       "source": [
 143 |         "In cases where we load very large datasets into a DataFrame, possibly with millions of items, the number of NaN values is not easily visualized. For these cases, we can use a combination of methods to count the number of ```NaN``` values in our data. The following example combines the ```.isnull()``` and the ```sum()``` methods to count the number of ```NaN``` values in our DataFrame"
 144 |       ]
 145 |     },
 146 |     {
 147 |       "cell_type": "code",
 148 |       "metadata": {
 149 |         "id": "7-oMtUkO174S",
 150 |         "outputId": "7109e9f6-69a8-48df-d598-9b493261a9f7",
 151 |         "colab": {
 152 |           "base_uri": "https://localhost:8080/",
 153 |           "height": 160
 154 |         }
 155 |       },
 156 |       "source": [
 157 |         "store_items.isnull()"
 158 |       ],
 159 |       "execution_count": 6,
 160 |       "outputs": [
 161 |         {
 162 |           "output_type": "execute_result",
 163 |           "data": {
 164 |             "text/html": [
 165 |               "<div>\n",
 166 |               "<style scoped>\n",
 167 |               "    .dataframe tbody tr th:only-of-type {\n",
 168 |               "        vertical-align: middle;\n",
 169 |               "    }\n",
 170 |               "\n",
 171 |               "    .dataframe tbody tr th {\n",
 172 |               "        vertical-align: top;\n",
 173 |               "    }\n",
 174 |               "\n",
 175 |               "    .dataframe thead th {\n",
 176 |               "        text-align: right;\n",
 177 |               "    }\n",
 178 |               "</style>\n",
 179 |               "<table border=\"1\" class=\"dataframe\">\n",
 180 |               "  <thead>\n",
 181 |               "    <tr style=\"text-align: right;\">\n",
 182 |               "      <th></th>\n",
 183 |               "      <th>bikes</th>\n",
 184 |               "      <th>pants</th>\n",
 185 |               "      <th>watches</th>\n",
 186 |               "      <th>shirts</th>\n",
 187 |               "      <th>shoes</th>\n",
 188 |               "      <th>suits</th>\n",
 189 |               "      <th>glasses</th>\n",
 190 |               "    </tr>\n",
 191 |               "  </thead>\n",
 192 |               "  <tbody>\n",
 193 |               "    <tr>\n",
 194 |               "      <th>store 1</th>\n",
 195 |               "      <td>False</td>\n",
 196 |               "      <td>False</td>\n",
 197 |               "      <td>False</td>\n",
 198 |               "      <td>False</td>\n",
 199 |               "      <td>False</td>\n",
 200 |               "      <td>False</td>\n",
 201 |               "      <td>True</td>\n",
 202 |               "    </tr>\n",
 203 |               "    <tr>\n",
 204 |               "      <th>store 2</th>\n",
 205 |               "      <td>False</td>\n",
 206 |               "      <td>False</td>\n",
 207 |               "      <td>False</td>\n",
 208 |               "      <td>False</td>\n",
 209 |               "      <td>False</td>\n",
 210 |               "      <td>False</td>\n",
 211 |               "      <td>False</td>\n",
 212 |               "    </tr>\n",
 213 |               "    <tr>\n",
 214 |               "      <th>store 3</th>\n",
 215 |               "      <td>False</td>\n",
 216 |               "      <td>False</td>\n",
 217 |               "      <td>False</td>\n",
 218 |               "      <td>True</td>\n",
 219 |               "      <td>False</td>\n",
 220 |               "      <td>True</td>\n",
 221 |               "      <td>False</td>\n",
 222 |               "    </tr>\n",
 223 |               "  </tbody>\n",
 224 |               "</table>\n",
 225 |               "</div>"
 226 |             ],
 227 |             "text/plain": [
 228 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 229 |               "store 1  False  False    False   False  False  False     True\n",
 230 |               "store 2  False  False    False   False  False  False    False\n",
 231 |               "store 3  False  False    False    True  False   True    False"
 232 |             ]
 233 |           },
 234 |           "metadata": {
 235 |             "tags": []
 236 |           },
 237 |           "execution_count": 6
 238 |         }
 239 |       ]
 240 |     },
 241 |     {
 242 |       "cell_type": "markdown",
 243 |       "metadata": {
 244 |         "id": "wfprzvTu2ETi"
 245 |       },
 246 |       "source": [
 247 |         "> In Pandas, logical True values have numerical value 1 and logical False values have numerical value 0. Therefore, we can count the number of NaN values by counting the number of logical True values."
 248 |       ]
 249 |     },
 250 |     {
 251 |       "cell_type": "code",
 252 |       "metadata": {
 253 |         "id": "W_AV_rKL0_kM",
 254 |         "outputId": "30028950-2b6e-4aeb-8f41-d61b10536b60",
 255 |         "colab": {
 256 |           "base_uri": "https://localhost:8080/",
 257 |           "height": 170
 258 |         }
 259 |       },
 260 |       "source": [
 261 |         "# We count the number of NaN values in the columns of store_items\n",
 262 |         "x =  store_items.isnull().sum()\n",
 263 |         "\n",
 264 |         "# We print x\n",
 265 |         "print('Number of NaN values in our DataFrame:', x)"
 266 |       ],
 267 |       "execution_count": 4,
 268 |       "outputs": [
 269 |         {
 270 |           "output_type": "stream",
 271 |           "text": [
 272 |             "Number of NaN values in our DataFrame: bikes      0\n",
 273 |             "pants      0\n",
 274 |             "watches    0\n",
 275 |             "shirts     1\n",
 276 |             "shoes      0\n",
 277 |             "suits      1\n",
 278 |             "glasses    1\n",
 279 |             "dtype: int64\n"
 280 |           ],
 281 |           "name": "stdout"
 282 |         }
 283 |       ]
 284 |     },
 285 |     {
 286 |       "cell_type": "markdown",
 287 |       "metadata": {
 288 |         "id": "eDRunbOt2N4c"
 289 |       },
 290 |       "source": [
 291 |         "> In order to count the total number of logical True values we use the .sum() method twice. We have to use it twice because the first sum returns a Pandas Series with the sums of logical True values along columns, as we see below:"
 292 |       ]
 293 |     },
 294 |     {
 295 |       "cell_type": "code",
 296 |       "metadata": {
 297 |         "id": "r5DUDDh-1qdC",
 298 |         "outputId": "490e8ef5-93d1-48d7-de03-aff0f2c685bc",
 299 |         "colab": {
 300 |           "base_uri": "https://localhost:8080/",
 301 |           "height": 54
 302 |         }
 303 |       },
 304 |       "source": [
 305 |         "# We count the number of NaN values in store_items\n",
 306 |         "x =  store_items.isnull().sum().sum()\n",
 307 |         "\n",
 308 |         "# We print x\n",
 309 |         "print('Number of NaN values in our DataFrame:', x)"
 310 |       ],
 311 |       "execution_count": 5,
 312 |       "outputs": [
 313 |         {
 314 |           "output_type": "stream",
 315 |           "text": [
 316 |             "Number of NaN values in our DataFrame: 3\n"
 317 |           ],
 318 |           "name": "stdout"
 319 |         }
 320 |       ]
 321 |     },
 322 |     {
 323 |       "cell_type": "markdown",
 324 |       "metadata": {
 325 |         "id": "1GoFgxMx2iBw"
 326 |       },
 327 |       "source": [
 328 |         "> The second sum will then add up the 1s in the above Pandas Series.\n",
 329 |         "\n",
 330 |         "Instead of counting the number of NaN values we can also do the opposite, we can count the number of non-NaN values. We can do this by using the .count() method as shown below:"
 331 |       ]
 332 |     },
 333 |     {
 334 |       "cell_type": "code",
 335 |       "metadata": {
 336 |         "id": "0GnX8msl14sQ",
 337 |         "outputId": "0fc11eab-19d2-4ba8-92ff-7db85dc71d20",
 338 |         "colab": {
 339 |           "base_uri": "https://localhost:8080/",
 340 |           "height": 203
 341 |         }
 342 |       },
 343 |       "source": [
 344 |         "# We print the number of non-NaN values in our DataFrame\n",
 345 |         "print()\n",
 346 |         "print('Number of non-NaN values in the columns of our DataFrame:\\n', \n",
 347 |         "        store_items.count())"
 348 |       ],
 349 |       "execution_count": 7,
 350 |       "outputs": [
 351 |         {
 352 |           "output_type": "stream",
 353 |           "text": [
 354 |             "\n",
 355 |             "Number of non-NaN values in the columns of our DataFrame:\n",
 356 |             " bikes      3\n",
 357 |             "pants      3\n",
 358 |             "watches    3\n",
 359 |             "shirts     2\n",
 360 |             "shoes      3\n",
 361 |             "suits      2\n",
 362 |             "glasses    2\n",
 363 |             "dtype: int64\n"
 364 |           ],
 365 |           "name": "stdout"
 366 |         }
 367 |       ]
 368 |     },
 369 |     {
 370 |       "cell_type": "markdown",
 371 |       "metadata": {
 372 |         "id": "TW02Ukj-3ggb"
 373 |       },
 374 |       "source": [
 375 |         "Now that we learned how to know if our dataset has any NaN values in it, the next step is to decide what to do with them. In general we have two options, we can either delete or replace the NaN values. In the following examples we will show you how to do both.\n",
 376 |         "\n",
 377 |         "We will start by learning how to eliminate rows or columns from our DataFrame that contain any NaN values. The .dropna(axis) method eliminates any rows with NaN values when axis = 0 is used and will eliminate any columns with NaN values when axis = 1 is used. Let's see some examples"
 378 |       ]
 379 |     },
 380 |     {
 381 |       "cell_type": "code",
 382 |       "metadata": {
 383 |         "id": "oIGm4Jft23sA",
 384 |         "outputId": "cf463378-0773-4dd5-8e4b-02450250d167",
 385 |         "colab": {
 386 |           "base_uri": "https://localhost:8080/",
 387 |           "height": 100
 388 |         }
 389 |       },
 390 |       "source": [
 391 |         "# We drop any rows with NaN values\n",
 392 |         "store_items.dropna(axis = 0)"
 393 |       ],
 394 |       "execution_count": 8,
 395 |       "outputs": [
 396 |         {
 397 |           "output_type": "execute_result",
 398 |           "data": {
 399 |             "text/html": [
 400 |               "<div>\n",
 401 |               "<style scoped>\n",
 402 |               "    .dataframe tbody tr th:only-of-type {\n",
 403 |               "        vertical-align: middle;\n",
 404 |               "    }\n",
 405 |               "\n",
 406 |               "    .dataframe tbody tr th {\n",
 407 |               "        vertical-align: top;\n",
 408 |               "    }\n",
 409 |               "\n",
 410 |               "    .dataframe thead th {\n",
 411 |               "        text-align: right;\n",
 412 |               "    }\n",
 413 |               "</style>\n",
 414 |               "<table border=\"1\" class=\"dataframe\">\n",
 415 |               "  <thead>\n",
 416 |               "    <tr style=\"text-align: right;\">\n",
 417 |               "      <th></th>\n",
 418 |               "      <th>bikes</th>\n",
 419 |               "      <th>pants</th>\n",
 420 |               "      <th>watches</th>\n",
 421 |               "      <th>shirts</th>\n",
 422 |               "      <th>shoes</th>\n",
 423 |               "      <th>suits</th>\n",
 424 |               "      <th>glasses</th>\n",
 425 |               "    </tr>\n",
 426 |               "  </thead>\n",
 427 |               "  <tbody>\n",
 428 |               "    <tr>\n",
 429 |               "      <th>store 2</th>\n",
 430 |               "      <td>15</td>\n",
 431 |               "      <td>5</td>\n",
 432 |               "      <td>10</td>\n",
 433 |               "      <td>2.0</td>\n",
 434 |               "      <td>5</td>\n",
 435 |               "      <td>7.0</td>\n",
 436 |               "      <td>50.0</td>\n",
 437 |               "    </tr>\n",
 438 |               "  </tbody>\n",
 439 |               "</table>\n",
 440 |               "</div>"
 441 |             ],
 442 |             "text/plain": [
 443 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 444 |               "store 2     15      5       10     2.0      5    7.0     50.0"
 445 |             ]
 446 |           },
 447 |           "metadata": {
 448 |             "tags": []
 449 |           },
 450 |           "execution_count": 8
 451 |         }
 452 |       ]
 453 |     },
 454 |     {
 455 |       "cell_type": "code",
 456 |       "metadata": {
 457 |         "id": "5A83CBac-Kan",
 458 |         "outputId": "674a6735-6563-4267-ee6d-f045d7fb59ae",
 459 |         "colab": {
 460 |           "base_uri": "https://localhost:8080/",
 461 |           "height": 160
 462 |         }
 463 |       },
 464 |       "source": [
 465 |         "# We drop any columns with NaN values\n",
 466 |         "store_items.dropna(axis = 1)"
 467 |       ],
 468 |       "execution_count": 9,
 469 |       "outputs": [
 470 |         {
 471 |           "output_type": "execute_result",
 472 |           "data": {
 473 |             "text/html": [
 474 |               "<div>\n",
 475 |               "<style scoped>\n",
 476 |               "    .dataframe tbody tr th:only-of-type {\n",
 477 |               "        vertical-align: middle;\n",
 478 |               "    }\n",
 479 |               "\n",
 480 |               "    .dataframe tbody tr th {\n",
 481 |               "        vertical-align: top;\n",
 482 |               "    }\n",
 483 |               "\n",
 484 |               "    .dataframe thead th {\n",
 485 |               "        text-align: right;\n",
 486 |               "    }\n",
 487 |               "</style>\n",
 488 |               "<table border=\"1\" class=\"dataframe\">\n",
 489 |               "  <thead>\n",
 490 |               "    <tr style=\"text-align: right;\">\n",
 491 |               "      <th></th>\n",
 492 |               "      <th>bikes</th>\n",
 493 |               "      <th>pants</th>\n",
 494 |               "      <th>watches</th>\n",
 495 |               "      <th>shoes</th>\n",
 496 |               "    </tr>\n",
 497 |               "  </thead>\n",
 498 |               "  <tbody>\n",
 499 |               "    <tr>\n",
 500 |               "      <th>store 1</th>\n",
 501 |               "      <td>20</td>\n",
 502 |               "      <td>30</td>\n",
 503 |               "      <td>35</td>\n",
 504 |               "      <td>8</td>\n",
 505 |               "    </tr>\n",
 506 |               "    <tr>\n",
 507 |               "      <th>store 2</th>\n",
 508 |               "      <td>15</td>\n",
 509 |               "      <td>5</td>\n",
 510 |               "      <td>10</td>\n",
 511 |               "      <td>5</td>\n",
 512 |               "    </tr>\n",
 513 |               "    <tr>\n",
 514 |               "      <th>store 3</th>\n",
 515 |               "      <td>20</td>\n",
 516 |               "      <td>30</td>\n",
 517 |               "      <td>35</td>\n",
 518 |               "      <td>10</td>\n",
 519 |               "    </tr>\n",
 520 |               "  </tbody>\n",
 521 |               "</table>\n",
 522 |               "</div>"
 523 |             ],
 524 |             "text/plain": [
 525 |               "         bikes  pants  watches  shoes\n",
 526 |               "store 1     20     30       35      8\n",
 527 |               "store 2     15      5       10      5\n",
 528 |               "store 3     20     30       35     10"
 529 |             ]
 530 |           },
 531 |           "metadata": {
 532 |             "tags": []
 533 |           },
 534 |           "execution_count": 9
 535 |         }
 536 |       ]
 537 |     },
 538 |     {
 539 |       "cell_type": "markdown",
 540 |       "metadata": {
 541 |         "id": "Hu6m-KLW-O5u"
 542 |       },
 543 |       "source": [
 544 |         "Notice that the .dropna() method eliminates (drops) the rows or columns with NaN values out of place. This means that the original DataFrame is not modified. You can always remove the desired rows or columns in place by setting the keyword inplace = True inside the dropna() function.\n",
 545 |         "\n",
 546 |         "Now, instead of eliminating NaN values, we can replace them with suitable values. We could choose for example to replace all NaN values with the value 0. We can do this by using the .fillna() method as shown below."
 547 |       ]
 548 |     },
 549 |     {
 550 |       "cell_type": "code",
 551 |       "metadata": {
 552 |         "id": "eCVbempI-OpA",
 553 |         "outputId": "a5062f3d-2cbc-41db-df67-38f67227f44e",
 554 |         "colab": {
 555 |           "base_uri": "https://localhost:8080/",
 556 |           "height": 160
 557 |         }
 558 |       },
 559 |       "source": [
 560 |         "# We replace all NaN values with 0\n",
 561 |         "store_items.fillna(0)"
 562 |       ],
 563 |       "execution_count": 10,
 564 |       "outputs": [
 565 |         {
 566 |           "output_type": "execute_result",
 567 |           "data": {
 568 |             "text/html": [
 569 |               "<div>\n",
 570 |               "<style scoped>\n",
 571 |               "    .dataframe tbody tr th:only-of-type {\n",
 572 |               "        vertical-align: middle;\n",
 573 |               "    }\n",
 574 |               "\n",
 575 |               "    .dataframe tbody tr th {\n",
 576 |               "        vertical-align: top;\n",
 577 |               "    }\n",
 578 |               "\n",
 579 |               "    .dataframe thead th {\n",
 580 |               "        text-align: right;\n",
 581 |               "    }\n",
 582 |               "</style>\n",
 583 |               "<table border=\"1\" class=\"dataframe\">\n",
 584 |               "  <thead>\n",
 585 |               "    <tr style=\"text-align: right;\">\n",
 586 |               "      <th></th>\n",
 587 |               "      <th>bikes</th>\n",
 588 |               "      <th>pants</th>\n",
 589 |               "      <th>watches</th>\n",
 590 |               "      <th>shirts</th>\n",
 591 |               "      <th>shoes</th>\n",
 592 |               "      <th>suits</th>\n",
 593 |               "      <th>glasses</th>\n",
 594 |               "    </tr>\n",
 595 |               "  </thead>\n",
 596 |               "  <tbody>\n",
 597 |               "    <tr>\n",
 598 |               "      <th>store 1</th>\n",
 599 |               "      <td>20</td>\n",
 600 |               "      <td>30</td>\n",
 601 |               "      <td>35</td>\n",
 602 |               "      <td>15.0</td>\n",
 603 |               "      <td>8</td>\n",
 604 |               "      <td>45.0</td>\n",
 605 |               "      <td>0.0</td>\n",
 606 |               "    </tr>\n",
 607 |               "    <tr>\n",
 608 |               "      <th>store 2</th>\n",
 609 |               "      <td>15</td>\n",
 610 |               "      <td>5</td>\n",
 611 |               "      <td>10</td>\n",
 612 |               "      <td>2.0</td>\n",
 613 |               "      <td>5</td>\n",
 614 |               "      <td>7.0</td>\n",
 615 |               "      <td>50.0</td>\n",
 616 |               "    </tr>\n",
 617 |               "    <tr>\n",
 618 |               "      <th>store 3</th>\n",
 619 |               "      <td>20</td>\n",
 620 |               "      <td>30</td>\n",
 621 |               "      <td>35</td>\n",
 622 |               "      <td>0.0</td>\n",
 623 |               "      <td>10</td>\n",
 624 |               "      <td>0.0</td>\n",
 625 |               "      <td>4.0</td>\n",
 626 |               "    </tr>\n",
 627 |               "  </tbody>\n",
 628 |               "</table>\n",
 629 |               "</div>"
 630 |             ],
 631 |             "text/plain": [
 632 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 633 |               "store 1     20     30       35    15.0      8   45.0      0.0\n",
 634 |               "store 2     15      5       10     2.0      5    7.0     50.0\n",
 635 |               "store 3     20     30       35     0.0     10    0.0      4.0"
 636 |             ]
 637 |           },
 638 |           "metadata": {
 639 |             "tags": []
 640 |           },
 641 |           "execution_count": 10
 642 |         }
 643 |       ]
 644 |     },
 645 |     {
 646 |       "cell_type": "markdown",
 647 |       "metadata": {
 648 |         "id": "SGRzC9cN-ZM8"
 649 |       },
 650 |       "source": [
 651 |         "We can also use the .fillna() method to replace NaN values with previous values in the DataFrame, this is known as forward filling. When replacing NaN values with forward filling, we can use previous values taken from columns or rows. The .fillna(method = 'ffill', axis) will use the forward filling (ffill) method to replace NaN values using the previous known value along the given axis. Let's see some examples:"
 652 |       ]
 653 |     },
 654 |     {
 655 |       "cell_type": "code",
 656 |       "metadata": {
 657 |         "id": "Q6d-csaI-MNs",
 658 |         "outputId": "36d474ab-4f9c-4844-c028-58964d8c161b",
 659 |         "colab": {
 660 |           "base_uri": "https://localhost:8080/",
 661 |           "height": 160
 662 |         }
 663 |       },
 664 |       "source": [
 665 |         "# We replace NaN values with the previous value in the column\n",
 666 |         "store_items.fillna(method = 'ffill', axis = 0)"
 667 |       ],
 668 |       "execution_count": 11,
 669 |       "outputs": [
 670 |         {
 671 |           "output_type": "execute_result",
 672 |           "data": {
 673 |             "text/html": [
 674 |               "<div>\n",
 675 |               "<style scoped>\n",
 676 |               "    .dataframe tbody tr th:only-of-type {\n",
 677 |               "        vertical-align: middle;\n",
 678 |               "    }\n",
 679 |               "\n",
 680 |               "    .dataframe tbody tr th {\n",
 681 |               "        vertical-align: top;\n",
 682 |               "    }\n",
 683 |               "\n",
 684 |               "    .dataframe thead th {\n",
 685 |               "        text-align: right;\n",
 686 |               "    }\n",
 687 |               "</style>\n",
 688 |               "<table border=\"1\" class=\"dataframe\">\n",
 689 |               "  <thead>\n",
 690 |               "    <tr style=\"text-align: right;\">\n",
 691 |               "      <th></th>\n",
 692 |               "      <th>bikes</th>\n",
 693 |               "      <th>pants</th>\n",
 694 |               "      <th>watches</th>\n",
 695 |               "      <th>shirts</th>\n",
 696 |               "      <th>shoes</th>\n",
 697 |               "      <th>suits</th>\n",
 698 |               "      <th>glasses</th>\n",
 699 |               "    </tr>\n",
 700 |               "  </thead>\n",
 701 |               "  <tbody>\n",
 702 |               "    <tr>\n",
 703 |               "      <th>store 1</th>\n",
 704 |               "      <td>20</td>\n",
 705 |               "      <td>30</td>\n",
 706 |               "      <td>35</td>\n",
 707 |               "      <td>15.0</td>\n",
 708 |               "      <td>8</td>\n",
 709 |               "      <td>45.0</td>\n",
 710 |               "      <td>NaN</td>\n",
 711 |               "    </tr>\n",
 712 |               "    <tr>\n",
 713 |               "      <th>store 2</th>\n",
 714 |               "      <td>15</td>\n",
 715 |               "      <td>5</td>\n",
 716 |               "      <td>10</td>\n",
 717 |               "      <td>2.0</td>\n",
 718 |               "      <td>5</td>\n",
 719 |               "      <td>7.0</td>\n",
 720 |               "      <td>50.0</td>\n",
 721 |               "    </tr>\n",
 722 |               "    <tr>\n",
 723 |               "      <th>store 3</th>\n",
 724 |               "      <td>20</td>\n",
 725 |               "      <td>30</td>\n",
 726 |               "      <td>35</td>\n",
 727 |               "      <td>2.0</td>\n",
 728 |               "      <td>10</td>\n",
 729 |               "      <td>7.0</td>\n",
 730 |               "      <td>4.0</td>\n",
 731 |               "    </tr>\n",
 732 |               "  </tbody>\n",
 733 |               "</table>\n",
 734 |               "</div>"
 735 |             ],
 736 |             "text/plain": [
 737 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 738 |               "store 1     20     30       35    15.0      8   45.0      NaN\n",
 739 |               "store 2     15      5       10     2.0      5    7.0     50.0\n",
 740 |               "store 3     20     30       35     2.0     10    7.0      4.0"
 741 |             ]
 742 |           },
 743 |           "metadata": {
 744 |             "tags": []
 745 |           },
 746 |           "execution_count": 11
 747 |         }
 748 |       ]
 749 |     },
 750 |     {
 751 |       "cell_type": "markdown",
 752 |       "metadata": {
 753 |         "id": "ToUvvjSC-i2N"
 754 |       },
 755 |       "source": [
 756 |         "Notice that the NaN value in store 1 didn't get replaced. That's because there are no previous values in this column, since the NaN value is the first value in that column. However, if we do forward fill using the previous row values, this won't happen. Let's take a look:"
 757 |       ]
 758 |     },
 759 |     {
 760 |       "cell_type": "code",
 761 |       "metadata": {
 762 |         "id": "2AkJfUbF-b8K",
 763 |         "outputId": "60ede1a5-70e6-4ea9-f4aa-d1a49b291a4e",
 764 |         "colab": {
 765 |           "base_uri": "https://localhost:8080/",
 766 |           "height": 160
 767 |         }
 768 |       },
 769 |       "source": [
 770 |         "# We replace NaN values with the previous value in the row\n",
 771 |         "store_items.fillna(method = 'ffill', axis = 1)"
 772 |       ],
 773 |       "execution_count": 12,
 774 |       "outputs": [
 775 |         {
 776 |           "output_type": "execute_result",
 777 |           "data": {
 778 |             "text/html": [
 779 |               "<div>\n",
 780 |               "<style scoped>\n",
 781 |               "    .dataframe tbody tr th:only-of-type {\n",
 782 |               "        vertical-align: middle;\n",
 783 |               "    }\n",
 784 |               "\n",
 785 |               "    .dataframe tbody tr th {\n",
 786 |               "        vertical-align: top;\n",
 787 |               "    }\n",
 788 |               "\n",
 789 |               "    .dataframe thead th {\n",
 790 |               "        text-align: right;\n",
 791 |               "    }\n",
 792 |               "</style>\n",
 793 |               "<table border=\"1\" class=\"dataframe\">\n",
 794 |               "  <thead>\n",
 795 |               "    <tr style=\"text-align: right;\">\n",
 796 |               "      <th></th>\n",
 797 |               "      <th>bikes</th>\n",
 798 |               "      <th>pants</th>\n",
 799 |               "      <th>watches</th>\n",
 800 |               "      <th>shirts</th>\n",
 801 |               "      <th>shoes</th>\n",
 802 |               "      <th>suits</th>\n",
 803 |               "      <th>glasses</th>\n",
 804 |               "    </tr>\n",
 805 |               "  </thead>\n",
 806 |               "  <tbody>\n",
 807 |               "    <tr>\n",
 808 |               "      <th>store 1</th>\n",
 809 |               "      <td>20.0</td>\n",
 810 |               "      <td>30.0</td>\n",
 811 |               "      <td>35.0</td>\n",
 812 |               "      <td>15.0</td>\n",
 813 |               "      <td>8.0</td>\n",
 814 |               "      <td>45.0</td>\n",
 815 |               "      <td>45.0</td>\n",
 816 |               "    </tr>\n",
 817 |               "    <tr>\n",
 818 |               "      <th>store 2</th>\n",
 819 |               "      <td>15.0</td>\n",
 820 |               "      <td>5.0</td>\n",
 821 |               "      <td>10.0</td>\n",
 822 |               "      <td>2.0</td>\n",
 823 |               "      <td>5.0</td>\n",
 824 |               "      <td>7.0</td>\n",
 825 |               "      <td>50.0</td>\n",
 826 |               "    </tr>\n",
 827 |               "    <tr>\n",
 828 |               "      <th>store 3</th>\n",
 829 |               "      <td>20.0</td>\n",
 830 |               "      <td>30.0</td>\n",
 831 |               "      <td>35.0</td>\n",
 832 |               "      <td>35.0</td>\n",
 833 |               "      <td>10.0</td>\n",
 834 |               "      <td>10.0</td>\n",
 835 |               "      <td>4.0</td>\n",
 836 |               "    </tr>\n",
 837 |               "  </tbody>\n",
 838 |               "</table>\n",
 839 |               "</div>"
 840 |             ],
 841 |             "text/plain": [
 842 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 843 |               "store 1   20.0   30.0     35.0    15.0    8.0   45.0     45.0\n",
 844 |               "store 2   15.0    5.0     10.0     2.0    5.0    7.0     50.0\n",
 845 |               "store 3   20.0   30.0     35.0    35.0   10.0   10.0      4.0"
 846 |             ]
 847 |           },
 848 |           "metadata": {
 849 |             "tags": []
 850 |           },
 851 |           "execution_count": 12
 852 |         }
 853 |       ]
 854 |     },
 855 |     {
 856 |       "cell_type": "markdown",
 857 |       "metadata": {
 858 |         "id": "ji9ncbCm_FPg"
 859 |       },
 860 |       "source": [
 861 |         "We see that in this case all the NaN values have been replaced with the previous row values.\n",
 862 |         "\n",
 863 |         "Similarly, you can choose to replace the NaN values with the values that go after them in the DataFrame, this is known as backward filling. The .fillna(method = 'backfill', axis) will use the backward filling (backfill) method to replace NaN values using the next known value along the given axis. Just like with forward filling we can choose to use row or column values. Let's see some examples:"
 864 |       ]
 865 |     },
 866 |     {
 867 |       "cell_type": "code",
 868 |       "metadata": {
 869 |         "id": "W-wDgBVZ-7yJ",
 870 |         "outputId": "dbd2e194-533a-4c3c-d3ef-918b062d2123",
 871 |         "colab": {
 872 |           "base_uri": "https://localhost:8080/",
 873 |           "height": 160
 874 |         }
 875 |       },
 876 |       "source": [
 877 |         "# We replace NaN values with the next value in the column\n",
 878 |         "store_items.fillna(method = 'backfill', axis = 0)"
 879 |       ],
 880 |       "execution_count": 13,
 881 |       "outputs": [
 882 |         {
 883 |           "output_type": "execute_result",
 884 |           "data": {
 885 |             "text/html": [
 886 |               "<div>\n",
 887 |               "<style scoped>\n",
 888 |               "    .dataframe tbody tr th:only-of-type {\n",
 889 |               "        vertical-align: middle;\n",
 890 |               "    }\n",
 891 |               "\n",
 892 |               "    .dataframe tbody tr th {\n",
 893 |               "        vertical-align: top;\n",
 894 |               "    }\n",
 895 |               "\n",
 896 |               "    .dataframe thead th {\n",
 897 |               "        text-align: right;\n",
 898 |               "    }\n",
 899 |               "</style>\n",
 900 |               "<table border=\"1\" class=\"dataframe\">\n",
 901 |               "  <thead>\n",
 902 |               "    <tr style=\"text-align: right;\">\n",
 903 |               "      <th></th>\n",
 904 |               "      <th>bikes</th>\n",
 905 |               "      <th>pants</th>\n",
 906 |               "      <th>watches</th>\n",
 907 |               "      <th>shirts</th>\n",
 908 |               "      <th>shoes</th>\n",
 909 |               "      <th>suits</th>\n",
 910 |               "      <th>glasses</th>\n",
 911 |               "    </tr>\n",
 912 |               "  </thead>\n",
 913 |               "  <tbody>\n",
 914 |               "    <tr>\n",
 915 |               "      <th>store 1</th>\n",
 916 |               "      <td>20</td>\n",
 917 |               "      <td>30</td>\n",
 918 |               "      <td>35</td>\n",
 919 |               "      <td>15.0</td>\n",
 920 |               "      <td>8</td>\n",
 921 |               "      <td>45.0</td>\n",
 922 |               "      <td>50.0</td>\n",
 923 |               "    </tr>\n",
 924 |               "    <tr>\n",
 925 |               "      <th>store 2</th>\n",
 926 |               "      <td>15</td>\n",
 927 |               "      <td>5</td>\n",
 928 |               "      <td>10</td>\n",
 929 |               "      <td>2.0</td>\n",
 930 |               "      <td>5</td>\n",
 931 |               "      <td>7.0</td>\n",
 932 |               "      <td>50.0</td>\n",
 933 |               "    </tr>\n",
 934 |               "    <tr>\n",
 935 |               "      <th>store 3</th>\n",
 936 |               "      <td>20</td>\n",
 937 |               "      <td>30</td>\n",
 938 |               "      <td>35</td>\n",
 939 |               "      <td>NaN</td>\n",
 940 |               "      <td>10</td>\n",
 941 |               "      <td>NaN</td>\n",
 942 |               "      <td>4.0</td>\n",
 943 |               "    </tr>\n",
 944 |               "  </tbody>\n",
 945 |               "</table>\n",
 946 |               "</div>"
 947 |             ],
 948 |             "text/plain": [
 949 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
 950 |               "store 1     20     30       35    15.0      8   45.0     50.0\n",
 951 |               "store 2     15      5       10     2.0      5    7.0     50.0\n",
 952 |               "store 3     20     30       35     NaN     10    NaN      4.0"
 953 |             ]
 954 |           },
 955 |           "metadata": {
 956 |             "tags": []
 957 |           },
 958 |           "execution_count": 13
 959 |         }
 960 |       ]
 961 |     },
 962 |     {
 963 |       "cell_type": "code",
 964 |       "metadata": {
 965 |         "id": "00-GZ-Jg_Hr4",
 966 |         "outputId": "4ef11e3b-5218-4116-a324-bbee50a7aeaa",
 967 |         "colab": {
 968 |           "base_uri": "https://localhost:8080/",
 969 |           "height": 160
 970 |         }
 971 |       },
 972 |       "source": [
 973 |         "# We replace NaN values with the next value in the row\n",
 974 |         "store_items.fillna(method = 'backfill', axis = 1)"
 975 |       ],
 976 |       "execution_count": 14,
 977 |       "outputs": [
 978 |         {
 979 |           "output_type": "execute_result",
 980 |           "data": {
 981 |             "text/html": [
 982 |               "<div>\n",
 983 |               "<style scoped>\n",
 984 |               "    .dataframe tbody tr th:only-of-type {\n",
 985 |               "        vertical-align: middle;\n",
 986 |               "    }\n",
 987 |               "\n",
 988 |               "    .dataframe tbody tr th {\n",
 989 |               "        vertical-align: top;\n",
 990 |               "    }\n",
 991 |               "\n",
 992 |               "    .dataframe thead th {\n",
 993 |               "        text-align: right;\n",
 994 |               "    }\n",
 995 |               "</style>\n",
 996 |               "<table border=\"1\" class=\"dataframe\">\n",
 997 |               "  <thead>\n",
 998 |               "    <tr style=\"text-align: right;\">\n",
 999 |               "      <th></th>\n",
1000 |               "      <th>bikes</th>\n",
1001 |               "      <th>pants</th>\n",
1002 |               "      <th>watches</th>\n",
1003 |               "      <th>shirts</th>\n",
1004 |               "      <th>shoes</th>\n",
1005 |               "      <th>suits</th>\n",
1006 |               "      <th>glasses</th>\n",
1007 |               "    </tr>\n",
1008 |               "  </thead>\n",
1009 |               "  <tbody>\n",
1010 |               "    <tr>\n",
1011 |               "      <th>store 1</th>\n",
1012 |               "      <td>20.0</td>\n",
1013 |               "      <td>30.0</td>\n",
1014 |               "      <td>35.0</td>\n",
1015 |               "      <td>15.0</td>\n",
1016 |               "      <td>8.0</td>\n",
1017 |               "      <td>45.0</td>\n",
1018 |               "      <td>NaN</td>\n",
1019 |               "    </tr>\n",
1020 |               "    <tr>\n",
1021 |               "      <th>store 2</th>\n",
1022 |               "      <td>15.0</td>\n",
1023 |               "      <td>5.0</td>\n",
1024 |               "      <td>10.0</td>\n",
1025 |               "      <td>2.0</td>\n",
1026 |               "      <td>5.0</td>\n",
1027 |               "      <td>7.0</td>\n",
1028 |               "      <td>50.0</td>\n",
1029 |               "    </tr>\n",
1030 |               "    <tr>\n",
1031 |               "      <th>store 3</th>\n",
1032 |               "      <td>20.0</td>\n",
1033 |               "      <td>30.0</td>\n",
1034 |               "      <td>35.0</td>\n",
1035 |               "      <td>10.0</td>\n",
1036 |               "      <td>10.0</td>\n",
1037 |               "      <td>4.0</td>\n",
1038 |               "      <td>4.0</td>\n",
1039 |               "    </tr>\n",
1040 |               "  </tbody>\n",
1041 |               "</table>\n",
1042 |               "</div>"
1043 |             ],
1044 |             "text/plain": [
1045 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
1046 |               "store 1   20.0   30.0     35.0    15.0    8.0   45.0      NaN\n",
1047 |               "store 2   15.0    5.0     10.0     2.0    5.0    7.0     50.0\n",
1048 |               "store 3   20.0   30.0     35.0    10.0   10.0    4.0      4.0"
1049 |             ]
1050 |           },
1051 |           "metadata": {
1052 |             "tags": []
1053 |           },
1054 |           "execution_count": 14
1055 |         }
1056 |       ]
1057 |     },
1058 |     {
1059 |       "cell_type": "markdown",
1060 |       "metadata": {
1061 |         "id": "Ci1HJ1uz_cGF"
1062 |       },
1063 |       "source": [
1064 |         "Notice that the .fillna() method replaces (fills) the NaN values out of place. This means that the original DataFrame is not modified. You can always replace the NaN values in place by setting the keyword inplace = True inside the fillna() function.\n",
1065 |         "\n",
1066 |         "We can also choose to replace NaN values by using different interpolation methods. For example, the .interpolate(method = 'linear', axis) method will use linear interpolation to replace NaN values using the values along the given axis. Let's see some examples:"
1067 |       ]
1068 |     },
1069 |     {
1070 |       "cell_type": "code",
1071 |       "metadata": {
1072 |         "id": "irAhB45W_XgS",
1073 |         "outputId": "ada45a20-541c-4f56-a194-3322db6a2885",
1074 |         "colab": {
1075 |           "base_uri": "https://localhost:8080/",
1076 |           "height": 160
1077 |         }
1078 |       },
1079 |       "source": [
1080 |         "# We replace NaN values by using linear interpolation using column values\n",
1081 |         "store_items.interpolate(method = 'linear', axis = 0)"
1082 |       ],
1083 |       "execution_count": 15,
1084 |       "outputs": [
1085 |         {
1086 |           "output_type": "execute_result",
1087 |           "data": {
1088 |             "text/html": [
1089 |               "<div>\n",
1090 |               "<style scoped>\n",
1091 |               "    .dataframe tbody tr th:only-of-type {\n",
1092 |               "        vertical-align: middle;\n",
1093 |               "    }\n",
1094 |               "\n",
1095 |               "    .dataframe tbody tr th {\n",
1096 |               "        vertical-align: top;\n",
1097 |               "    }\n",
1098 |               "\n",
1099 |               "    .dataframe thead th {\n",
1100 |               "        text-align: right;\n",
1101 |               "    }\n",
1102 |               "</style>\n",
1103 |               "<table border=\"1\" class=\"dataframe\">\n",
1104 |               "  <thead>\n",
1105 |               "    <tr style=\"text-align: right;\">\n",
1106 |               "      <th></th>\n",
1107 |               "      <th>bikes</th>\n",
1108 |               "      <th>pants</th>\n",
1109 |               "      <th>watches</th>\n",
1110 |               "      <th>shirts</th>\n",
1111 |               "      <th>shoes</th>\n",
1112 |               "      <th>suits</th>\n",
1113 |               "      <th>glasses</th>\n",
1114 |               "    </tr>\n",
1115 |               "  </thead>\n",
1116 |               "  <tbody>\n",
1117 |               "    <tr>\n",
1118 |               "      <th>store 1</th>\n",
1119 |               "      <td>20</td>\n",
1120 |               "      <td>30</td>\n",
1121 |               "      <td>35</td>\n",
1122 |               "      <td>15.0</td>\n",
1123 |               "      <td>8</td>\n",
1124 |               "      <td>45.0</td>\n",
1125 |               "      <td>NaN</td>\n",
1126 |               "    </tr>\n",
1127 |               "    <tr>\n",
1128 |               "      <th>store 2</th>\n",
1129 |               "      <td>15</td>\n",
1130 |               "      <td>5</td>\n",
1131 |               "      <td>10</td>\n",
1132 |               "      <td>2.0</td>\n",
1133 |               "      <td>5</td>\n",
1134 |               "      <td>7.0</td>\n",
1135 |               "      <td>50.0</td>\n",
1136 |               "    </tr>\n",
1137 |               "    <tr>\n",
1138 |               "      <th>store 3</th>\n",
1139 |               "      <td>20</td>\n",
1140 |               "      <td>30</td>\n",
1141 |               "      <td>35</td>\n",
1142 |               "      <td>2.0</td>\n",
1143 |               "      <td>10</td>\n",
1144 |               "      <td>7.0</td>\n",
1145 |               "      <td>4.0</td>\n",
1146 |               "    </tr>\n",
1147 |               "  </tbody>\n",
1148 |               "</table>\n",
1149 |               "</div>"
1150 |             ],
1151 |             "text/plain": [
1152 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
1153 |               "store 1     20     30       35    15.0      8   45.0      NaN\n",
1154 |               "store 2     15      5       10     2.0      5    7.0     50.0\n",
1155 |               "store 3     20     30       35     2.0     10    7.0      4.0"
1156 |             ]
1157 |           },
1158 |           "metadata": {
1159 |             "tags": []
1160 |           },
1161 |           "execution_count": 15
1162 |         }
1163 |       ]
1164 |     },
1165 |     {
1166 |       "cell_type": "markdown",
1167 |       "metadata": {
1168 |         "id": "lt5jypg8_uBu"
1169 |       },
1170 |       "source": [
1171 |         "Notice that the two NaN values in store 3 have been replaced with linear interpolated values. However, notice that the NaN value in store 1 didn't get replaced. That's because the NaN value is the first value in that column, and since there is no data before it, the interpolation function can't calculate a value. Now, let's interpolate using row values instead:"
1172 |       ]
1173 |     },
1174 |     {
1175 |       "cell_type": "code",
1176 |       "metadata": {
1177 |         "id": "QGnRalxo_kSA",
1178 |         "outputId": "944059c4-b41d-4453-de22-cda0c8d17e7f",
1179 |         "colab": {
1180 |           "base_uri": "https://localhost:8080/",
1181 |           "height": 160
1182 |         }
1183 |       },
1184 |       "source": [
1185 |         "# We replace NaN values by using linear interpolation using row values\n",
1186 |         "store_items.interpolate(method = 'linear', axis = 1)"
1187 |       ],
1188 |       "execution_count": 16,
1189 |       "outputs": [
1190 |         {
1191 |           "output_type": "execute_result",
1192 |           "data": {
1193 |             "text/html": [
1194 |               "<div>\n",
1195 |               "<style scoped>\n",
1196 |               "    .dataframe tbody tr th:only-of-type {\n",
1197 |               "        vertical-align: middle;\n",
1198 |               "    }\n",
1199 |               "\n",
1200 |               "    .dataframe tbody tr th {\n",
1201 |               "        vertical-align: top;\n",
1202 |               "    }\n",
1203 |               "\n",
1204 |               "    .dataframe thead th {\n",
1205 |               "        text-align: right;\n",
1206 |               "    }\n",
1207 |               "</style>\n",
1208 |               "<table border=\"1\" class=\"dataframe\">\n",
1209 |               "  <thead>\n",
1210 |               "    <tr style=\"text-align: right;\">\n",
1211 |               "      <th></th>\n",
1212 |               "      <th>bikes</th>\n",
1213 |               "      <th>pants</th>\n",
1214 |               "      <th>watches</th>\n",
1215 |               "      <th>shirts</th>\n",
1216 |               "      <th>shoes</th>\n",
1217 |               "      <th>suits</th>\n",
1218 |               "      <th>glasses</th>\n",
1219 |               "    </tr>\n",
1220 |               "  </thead>\n",
1221 |               "  <tbody>\n",
1222 |               "    <tr>\n",
1223 |               "      <th>store 1</th>\n",
1224 |               "      <td>20.0</td>\n",
1225 |               "      <td>30.0</td>\n",
1226 |               "      <td>35.0</td>\n",
1227 |               "      <td>15.0</td>\n",
1228 |               "      <td>8.0</td>\n",
1229 |               "      <td>45.0</td>\n",
1230 |               "      <td>45.0</td>\n",
1231 |               "    </tr>\n",
1232 |               "    <tr>\n",
1233 |               "      <th>store 2</th>\n",
1234 |               "      <td>15.0</td>\n",
1235 |               "      <td>5.0</td>\n",
1236 |               "      <td>10.0</td>\n",
1237 |               "      <td>2.0</td>\n",
1238 |               "      <td>5.0</td>\n",
1239 |               "      <td>7.0</td>\n",
1240 |               "      <td>50.0</td>\n",
1241 |               "    </tr>\n",
1242 |               "    <tr>\n",
1243 |               "      <th>store 3</th>\n",
1244 |               "      <td>20.0</td>\n",
1245 |               "      <td>30.0</td>\n",
1246 |               "      <td>35.0</td>\n",
1247 |               "      <td>22.5</td>\n",
1248 |               "      <td>10.0</td>\n",
1249 |               "      <td>7.0</td>\n",
1250 |               "      <td>4.0</td>\n",
1251 |               "    </tr>\n",
1252 |               "  </tbody>\n",
1253 |               "</table>\n",
1254 |               "</div>"
1255 |             ],
1256 |             "text/plain": [
1257 |               "         bikes  pants  watches  shirts  shoes  suits  glasses\n",
1258 |               "store 1   20.0   30.0     35.0    15.0    8.0   45.0     45.0\n",
1259 |               "store 2   15.0    5.0     10.0     2.0    5.0    7.0     50.0\n",
1260 |               "store 3   20.0   30.0     35.0    22.5   10.0    7.0      4.0"
1261 |             ]
1262 |           },
1263 |           "metadata": {
1264 |             "tags": []
1265 |           },
1266 |           "execution_count": 16
1267 |         }
1268 |       ]
1269 |     },
1270 |     {
1271 |       "cell_type": "markdown",
1272 |       "metadata": {
1273 |         "id": "YcXh1Wzu_zBa"
1274 |       },
1275 |       "source": [
1276 |         "> Just as with the other methods we saw, the .interpolate() method replaces NaN values out of place."
1277 |       ]
1278 |     },
1279 |     {
1280 |       "cell_type": "markdown",
1281 |       "metadata": {
1282 |         "id": "JBo9YDea_57T"
1283 |       },
1284 |       "source": [
1285 |         "## **Question:**\n"
1286 |       ]
1287 |     },
1288 |     {
1289 |       "cell_type": "code",
1290 |       "metadata": {
1291 |         "id": "xDLlfzrU_wAp"
1292 |       },
1293 |       "source": [
1294 |         "import pandas as pd\n",
1295 |         "import numpy as np\n",
1296 |         "\n",
1297 |         "# Since we will be working with ratings, we will set the precision of our \n",
1298 |         "# dataframes to one decimal place.\n",
1299 |         "pd.set_option('precision', 1)\n",
1300 |         "\n",
1301 |         "# Create a Pandas DataFrame that contains the ratings some users have given to a\n",
1302 |         "# series of books. The ratings given are in the range from 1 to 5, with 5 being\n",
1303 |         "# the best score. The names of the books, the authors, and the ratings of each user\n",
1304 |         "# are given below:\n",
1305 |         "\n",
1306 |         "books = pd.Series(data = ['Great Expectations', 'Of Mice and Men', 'Romeo and Juliet', 'The Time Machine', 'Alice in Wonderland' ])\n",
1307 |         "authors = pd.Series(data = ['Charles Dickens', 'John Steinbeck', 'William Shakespeare', ' H. G. Wells', 'Lewis Carroll' ])\n",
1308 |         "\n",
1309 |         "user_1 = pd.Series(data = [3.2, np.nan ,2.5])\n",
1310 |         "user_2 = pd.Series(data = [5., 1.3, 4.0, 3.8])\n",
1311 |         "user_3 = pd.Series(data = [2.0, 2.3, np.nan, 4])\n",
1312 |         "user_4 = pd.Series(data = [4, 3.5, 4, 5, 4.2])\n",
1313 |         "\n",
1314 |         "# Users that have np.nan values means that the user has not yet rated that book.\n",
1315 |         "# Use the data above to create a Pandas DataFrame that has the following column\n",
1316 |         "# labels: 'Author', 'Book Title', 'User 1', 'User 2', 'User 3', 'User 4'. Let Pandas\n",
1317 |         "# automatically assign numerical row indices to the DataFrame. \n",
1318 |         "\n",
1319 |         "# Create a dictionary with the data given above\n",
1320 |         "dat = \n",
1321 |         "\n",
1322 |         "# Use the dictionary to create a Pandas DataFrame\n",
1323 |         "book_ratings = \n",
1324 |         "\n",
1325 |         "# If you created the dictionary correctly you should have a Pandas DataFrame\n",
1326 |         "# that has column labels: 'Author', 'Book Title', 'User 1', 'User 2', 'User 3',\n",
1327 |         "# 'User 4' and row indices 0 through 4.\n",
1328 |         "\n",
1329 |         "# Now replace all the NaN values in your DataFrame with the average rating in\n",
1330 |         "# each column. Replace the NaN values in place. HINT: you can use the fillna()\n",
1331 |         "# function with the keyword inplace = True, to do this. Write your code below:\n"
1332 |       ],
1333 |       "execution_count": null,
1334 |       "outputs": []
1335 |     }
1336 |   ]
1337 | }
1338 | 


--------------------------------------------------------------------------------
/Day_3_Numpy_1.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "name": "Day_5_Numpy_Part_1.ipynb",
   7 |       "provenance": []
   8 |     },
   9 |     "kernelspec": {
  10 |       "name": "python3",
  11 |       "display_name": "Python 3"
  12 |     }
  13 |   },
  14 |   "cells": [
  15 |     {
  16 |       "cell_type": "markdown",
  17 |       "metadata": {
  18 |         "id": "yNMy_TNjUrdd",
  19 |         "colab_type": "text"
  20 |       },
  21 |       "source": [
  22 |         "## **Introduction to NumPy:**\n",
  23 |         "```NumPy``` stands for *Numerical Python* and it's a fundamental package for scientific computing in Python. NumPy provides Python with an extensive math library capable of performing numerical computations effectively and efficiently. These lessons are intended as a basic overview of NumPy and introduces some of its most important features.\n",
  24 |         "\n"
  25 |       ]
  26 |     },
  27 |     {
  28 |       "cell_type": "markdown",
  29 |       "metadata": {
  30 |         "id": "oPnpCMIeYA90",
  31 |         "colab_type": "text"
  32 |       },
  33 |       "source": [
  34 |         "## **Why Numpy:**\n",
  35 |         "You may be wondering why people use NumPy - after all, Python can handle lists, as you learned in the Intro to Python lessons.\n",
  36 |         "\n",
  37 |         "Even though Python lists are great on their own, NumPy has a number of key features that give it great advantages over Python lists. One such feature is speed. When performing operations on large arrays NumPy can often perform several orders of magnitude faster than Python lists. This speed comes from the nature of NumPy arrays being memory-efficient and from optimized algorithms used by NumPy for doing arithmetic, statistical, and linear algebra operations."
  38 |       ]
  39 |     },
  40 |     {
  41 |       "cell_type": "code",
  42 |       "metadata": {
  43 |         "id": "1D9wUejeTCUi",
  44 |         "colab_type": "code",
  45 |         "colab": {}
  46 |       },
  47 |       "source": [
  48 |         "import time\n",
  49 |         "import numpy as np"
  50 |       ],
  51 |       "execution_count": 1,
  52 |       "outputs": []
  53 |     },
  54 |     {
  55 |       "cell_type": "code",
  56 |       "metadata": {
  57 |         "id": "cx33B30cYJzv",
  58 |         "colab_type": "code",
  59 |         "colab": {}
  60 |       },
  61 |       "source": [
  62 |         "x = np.random.rand(1000000000)"
  63 |       ],
  64 |       "execution_count": 3,
  65 |       "outputs": []
  66 |     },
  67 |     {
  68 |       "cell_type": "code",
  69 |       "metadata": {
  70 |         "id": "xXpuBxHLYOWg",
  71 |         "colab_type": "code",
  72 |         "colab": {
  73 |           "base_uri": "https://localhost:8080/",
  74 |           "height": 54
  75 |         },
  76 |         "outputId": "e72cc273-38e1-4398-8aa1-6f08f9a776f3"
  77 |       },
  78 |       "source": [
  79 |         "start = time.time()\n",
  80 |         "mean = sum(x)/len(x)\n",
  81 |         "print(time.time() - start)"
  82 |       ],
  83 |       "execution_count": 4,
  84 |       "outputs": [
  85 |         {
  86 |           "output_type": "stream",
  87 |           "text": [
  88 |             "164.07553839683533\n"
  89 |           ],
  90 |           "name": "stdout"
  91 |         }
  92 |       ]
  93 |     },
  94 |     {
  95 |       "cell_type": "code",
  96 |       "metadata": {
  97 |         "id": "zwi39jQRYkye",
  98 |         "colab_type": "code",
  99 |         "colab": {
 100 |           "base_uri": "https://localhost:8080/",
 101 |           "height": 54
 102 |         },
 103 |         "outputId": "11d53351-1df7-4e2c-e5cb-71c9ffd2185d"
 104 |       },
 105 |       "source": [
 106 |         "start = time.time()\n",
 107 |         "mean_np = np.mean(x)\n",
 108 |         "print(time.time() - start)"
 109 |       ],
 110 |       "execution_count": 6,
 111 |       "outputs": [
 112 |         {
 113 |           "output_type": "stream",
 114 |           "text": [
 115 |             "0.6675674915313721\n"
 116 |           ],
 117 |           "name": "stdout"
 118 |         }
 119 |       ]
 120 |     },
 121 |     {
 122 |       "cell_type": "markdown",
 123 |       "metadata": {
 124 |         "id": "Dibh7I4LcMdb",
 125 |         "colab_type": "text"
 126 |       },
 127 |       "source": [
 128 |         "## **Creating NumPy ndarrays:**\n",
 129 |         "At the core of NumPy is the ndarray, where nd stands for n-dimensional. An ndarray is a multidimensional array of elements all of the same type. In other words, an ndarray is a grid that can take on many shapes and can hold either numbers or strings. In many Machine Learning problems you will often find yourself using ndarrays in many different ways. For instance, you might use an ndarray to hold the pixel values of an image that will be fed into a Neural Network for image classification."
 130 |       ]
 131 |     },
 132 |     {
 133 |       "cell_type": "markdown",
 134 |       "metadata": {
 135 |         "id": "6XWJHaq0cabp",
 136 |         "colab_type": "text"
 137 |       },
 138 |       "source": [
 139 |         "There are several ways to create ndarrays in NumPy. In the following lessons we will see two ways to create ndarrays:\n",
 140 |         "\n",
 141 |         "1. Using regular Python lists\n",
 142 |         "\n",
 143 |         "2. Using built-in NumPy functions"
 144 |       ]
 145 |     },
 146 |     {
 147 |       "cell_type": "markdown",
 148 |       "metadata": {
 149 |         "id": "itKv0T1acqnf",
 150 |         "colab_type": "text"
 151 |       },
 152 |       "source": [
 153 |         "In this section, we will create ndarrays by providing Python lists to the NumPy np.array() function, it is just a function that returns an ndarray. We should note that for the purposes of clarity, the examples throughout these lessons will use small and simple ndarrays. Let's start by creating 1-Dimensional (1D) ndarrays."
 154 |       ]
 155 |     },
 156 |     {
 157 |       "cell_type": "code",
 158 |       "metadata": {
 159 |         "id": "bUHEilP2ZN5J",
 160 |         "colab_type": "code",
 161 |         "colab": {
 162 |           "base_uri": "https://localhost:8080/",
 163 |           "height": 54
 164 |         },
 165 |         "outputId": "13235ae1-b467-40b6-9c17-18f8082e2ef8"
 166 |       },
 167 |       "source": [
 168 |         "# We import NumPy into Python\n",
 169 |         "import numpy as np\n",
 170 |         "\n",
 171 |         "# We create a 1D ndarray that contains only integers\n",
 172 |         "x = np.array([1, 2, 3, 4, 5])\n",
 173 |         "\n",
 174 |         "# Let's print the ndarray we just created using the print() command\n",
 175 |         "print('x = ', x)"
 176 |       ],
 177 |       "execution_count": 2,
 178 |       "outputs": [
 179 |         {
 180 |           "output_type": "stream",
 181 |           "text": [
 182 |             "x =  [1 2 3 4 5]\n"
 183 |           ],
 184 |           "name": "stdout"
 185 |         }
 186 |       ]
 187 |     },
 188 |     {
 189 |       "cell_type": "markdown",
 190 |       "metadata": {
 191 |         "id": "Oi4x0r2fvwSd",
 192 |         "colab_type": "text"
 193 |       },
 194 |       "source": [
 195 |         "## **Shape of an ndarray:**\n",
 196 |         "Another important property of arrays is their shape. The shape of an array is the size along each of its dimensions. As you will see, NumPy ndarrays have attributes that allows us to get information about them in a very intuitive way. For example, the shape of an ndarray can be obtained using the .shape attribute. The shape attribute returns a tuple of N positive integers that specify the sizes of each dimension."
 197 |       ]
 198 |     },
 199 |     {
 200 |       "cell_type": "code",
 201 |       "metadata": {
 202 |         "id": "xskCTsNRc2HK",
 203 |         "colab_type": "code",
 204 |         "colab": {
 205 |           "base_uri": "https://localhost:8080/",
 206 |           "height": 137
 207 |         },
 208 |         "outputId": "fc340f5a-ff85-47f0-bb26-306106ad797e"
 209 |       },
 210 |       "source": [
 211 |         "# We create a 1D ndarray that contains only integers\n",
 212 |         "x = np.array([1, 2, 3, 4, 5])\n",
 213 |         "\n",
 214 |         "# We print x\n",
 215 |         "print()\n",
 216 |         "print('x = ', x)\n",
 217 |         "print()\n",
 218 |         "\n",
 219 |         "# We print information about x\n",
 220 |         "print('x has dimensions:', x.shape)\n",
 221 |         "print('x is an object of type:', type(x))\n",
 222 |         "print('The elements in x are of type:', x.dtype)"
 223 |       ],
 224 |       "execution_count": 3,
 225 |       "outputs": [
 226 |         {
 227 |           "output_type": "stream",
 228 |           "text": [
 229 |             "\n",
 230 |             "x =  [1 2 3 4 5]\n",
 231 |             "\n",
 232 |             "x has dimensions: (5,)\n",
 233 |             "x is an object of type: <class 'numpy.ndarray'>\n",
 234 |             "The elements in x are of type: int64\n"
 235 |           ],
 236 |           "name": "stdout"
 237 |         }
 238 |       ]
 239 |     },
 240 |     {
 241 |       "cell_type": "markdown",
 242 |       "metadata": {
 243 |         "id": "MLYrG8oLwXb2",
 244 |         "colab_type": "text"
 245 |       },
 246 |       "source": [
 247 |         "We can see that the shape attribute returns the tuple (5,) telling us that x is of rank 1 (i.e. x only has 1 dimension ) and it has 5 elements. The type() function tells us that x is indeed a NumPy ndarray. Finally, the .dtype attribute tells us that the elements of x are stored in memory as signed 64-bit integers. Another great advantage of NumPy is that it can handle more data-types than Python lists.[here](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.types.html)"
 248 |       ]
 249 |     },
 250 |     {
 251 |       "cell_type": "markdown",
 252 |       "metadata": {
 253 |         "id": "DTCnuASsz3mq",
 254 |         "colab_type": "text"
 255 |       },
 256 |       "source": [
 257 |         "As mentioned earlier, ndarrays can also hold strings. Let's see how we can create a rank 1 ndarray of strings in the same manner as before, by providing the np.array() function a Python list of strings."
 258 |       ]
 259 |     },
 260 |     {
 261 |       "cell_type": "code",
 262 |       "metadata": {
 263 |         "id": "W3rhKGfAwAOH",
 264 |         "colab_type": "code",
 265 |         "colab": {
 266 |           "base_uri": "https://localhost:8080/",
 267 |           "height": 137
 268 |         },
 269 |         "outputId": "c7b085a7-ac97-4e3d-aa93-8757291e7ef4"
 270 |       },
 271 |       "source": [
 272 |         "# We create a rank 1 ndarray that only contains strings\n",
 273 |         "x = np.array(['Hello', 'World'])\n",
 274 |         "\n",
 275 |         "# We print x\n",
 276 |         "print()\n",
 277 |         "print('x = ', x)\n",
 278 |         "print()\n",
 279 |         "\n",
 280 |         "# We print information about x\n",
 281 |         "print('x has dimensions:', x.shape)\n",
 282 |         "print('x is an object of type:', type(x))\n",
 283 |         "print('The elements in x are of type:', x.dtype)"
 284 |       ],
 285 |       "execution_count": 4,
 286 |       "outputs": [
 287 |         {
 288 |           "output_type": "stream",
 289 |           "text": [
 290 |             "\n",
 291 |             "x =  ['Hello' 'World']\n",
 292 |             "\n",
 293 |             "x has dimensions: (2,)\n",
 294 |             "x is an object of type: <class 'numpy.ndarray'>\n",
 295 |             "The elements in x are of type: <U5\n"
 296 |           ],
 297 |           "name": "stdout"
 298 |         }
 299 |       ]
 300 |     },
 301 |     {
 302 |       "cell_type": "markdown",
 303 |       "metadata": {
 304 |         "id": "XaJ8fHUx0Ey7",
 305 |         "colab_type": "text"
 306 |       },
 307 |       "source": [
 308 |         "It is important to remember that one big difference between Python lists and ndarrays, is that unlike Python lists, all the elements of an ndarray must be of the same type. So, while we can create Python lists with both integers and strings, we can't mix types in ndarrays. If you provide the np.array() function with a Python list that has both integers and strings, NumPy will interpret all elements as strings. We can see this in the next example:"
 309 |       ]
 310 |     },
 311 |     {
 312 |       "cell_type": "code",
 313 |       "metadata": {
 314 |         "id": "ACE0Mqcrz5-E",
 315 |         "colab_type": "code",
 316 |         "colab": {
 317 |           "base_uri": "https://localhost:8080/",
 318 |           "height": 137
 319 |         },
 320 |         "outputId": "f20b73e7-fd02-47f1-d9ed-20923c0da42d"
 321 |       },
 322 |       "source": [
 323 |         "# We create a rank 1 ndarray from a Python list that contains integers and strings\n",
 324 |         "x = np.array([1, 2, 'World'])\n",
 325 |         "\n",
 326 |         "# We print the ndarray\n",
 327 |         "print()\n",
 328 |         "print('x = ', x)\n",
 329 |         "print()\n",
 330 |         "\n",
 331 |         "# We print information about x\n",
 332 |         "print('x has dimensions:', x.shape)\n",
 333 |         "print('x is an object of type:', type(x))\n",
 334 |         "print('The elements in x are of type:', x.dtype)"
 335 |       ],
 336 |       "execution_count": 5,
 337 |       "outputs": [
 338 |         {
 339 |           "output_type": "stream",
 340 |           "text": [
 341 |             "\n",
 342 |             "x =  ['1' '2' 'World']\n",
 343 |             "\n",
 344 |             "x has dimensions: (3,)\n",
 345 |             "x is an object of type: <class 'numpy.ndarray'>\n",
 346 |             "The elements in x are of type: <U21\n"
 347 |           ],
 348 |           "name": "stdout"
 349 |         }
 350 |       ]
 351 |     },
 352 |     {
 353 |       "cell_type": "markdown",
 354 |       "metadata": {
 355 |         "id": "PFK0xNh-1hkE",
 356 |         "colab_type": "text"
 357 |       },
 358 |       "source": [
 359 |         "Let us now look at how we can create a rank 2 ndarray from a nested Python list."
 360 |       ]
 361 |     },
 362 |     {
 363 |       "cell_type": "code",
 364 |       "metadata": {
 365 |         "id": "mCEls7XS0IDI",
 366 |         "colab_type": "code",
 367 |         "colab": {
 368 |           "base_uri": "https://localhost:8080/",
 369 |           "height": 220
 370 |         },
 371 |         "outputId": "600e0e6e-35cc-4bea-e8c5-85cfc3fca72a"
 372 |       },
 373 |       "source": [
 374 |         "# We create a rank 2 ndarray that only contains integers\n",
 375 |         "Y = np.array([[1,2,3],[4,5,6],[7,8,9], [10,11,12]])\n",
 376 |         "\n",
 377 |         "# We print Y\n",
 378 |         "print()\n",
 379 |         "print('Y = \\n', Y)\n",
 380 |         "print()\n",
 381 |         "\n",
 382 |         "# We print information about Y\n",
 383 |         "print('Y has dimensions:', Y.shape)\n",
 384 |         "print('Y has a total of', Y.size, 'elements')\n",
 385 |         "print('Y is an object of type:', type(Y))\n",
 386 |         "print('The elements in Y are of type:', Y.dtype)"
 387 |       ],
 388 |       "execution_count": 6,
 389 |       "outputs": [
 390 |         {
 391 |           "output_type": "stream",
 392 |           "text": [
 393 |             "\n",
 394 |             "Y = \n",
 395 |             " [[ 1  2  3]\n",
 396 |             " [ 4  5  6]\n",
 397 |             " [ 7  8  9]\n",
 398 |             " [10 11 12]]\n",
 399 |             "\n",
 400 |             "Y has dimensions: (4, 3)\n",
 401 |             "Y has a total of 12 elements\n",
 402 |             "Y is an object of type: <class 'numpy.ndarray'>\n",
 403 |             "The elements in Y are of type: int64\n"
 404 |           ],
 405 |           "name": "stdout"
 406 |         }
 407 |       ]
 408 |     },
 409 |     {
 410 |       "cell_type": "markdown",
 411 |       "metadata": {
 412 |         "id": "BEGW5NcV1uK5",
 413 |         "colab_type": "text"
 414 |       },
 415 |       "source": [
 416 |         "Up to now, we have only created ndarrays with integers and strings. We saw that when we create an ndarray with only integers, NumPy will automatically assign the dtype int64 to its elements. Let's see what happens when we create ndarrays with floats and integers."
 417 |       ]
 418 |     },
 419 |     {
 420 |       "cell_type": "code",
 421 |       "metadata": {
 422 |         "id": "bb26oQPg1jty",
 423 |         "colab_type": "code",
 424 |         "colab": {
 425 |           "base_uri": "https://localhost:8080/",
 426 |           "height": 87
 427 |         },
 428 |         "outputId": "9a62d4b3-72e7-4ace-c6c0-e9c518b0e34f"
 429 |       },
 430 |       "source": [
 431 |         "# We create a rank 1 ndarray that contains integers\n",
 432 |         "x = np.array([1,2,3])\n",
 433 |         "\n",
 434 |         "# We create a rank 1 ndarray that contains floats\n",
 435 |         "y = np.array([1.0,2.0,3.0])\n",
 436 |         "\n",
 437 |         "# We create a rank 1 ndarray that contains integers and floats\n",
 438 |         "z = np.array([1, 2.5, 4])\n",
 439 |         "\n",
 440 |         "# We print the dtype of each ndarray\n",
 441 |         "print('The elements in x are of type:', x.dtype)\n",
 442 |         "print('The elements in y are of type:', y.dtype)\n",
 443 |         "print('The elements in z are of type:', z.dtype)"
 444 |       ],
 445 |       "execution_count": 7,
 446 |       "outputs": [
 447 |         {
 448 |           "output_type": "stream",
 449 |           "text": [
 450 |             "The elements in x are of type: int64\n",
 451 |             "The elements in y are of type: float64\n",
 452 |             "The elements in z are of type: float64\n"
 453 |           ],
 454 |           "name": "stdout"
 455 |         }
 456 |       ]
 457 |     },
 458 |     {
 459 |       "cell_type": "markdown",
 460 |       "metadata": {
 461 |         "id": "0z7cLmFD17Kw",
 462 |         "colab_type": "text"
 463 |       },
 464 |       "source": [
 465 |         "Notice that when we create an ndarray with both floats and integers, as we did with the z ndarray above, NumPy assigns its elements a *float64* dtype as well. This is called upcasting. Since all the elements of an ndarray must be of the same type, in this case NumPy upcasts the integers in z to floats in order to avoid losing precision in numerical computations."
 466 |       ]
 467 |     },
 468 |     {
 469 |       "cell_type": "markdown",
 470 |       "metadata": {
 471 |         "id": "XUmzWrvr2AVp",
 472 |         "colab_type": "text"
 473 |       },
 474 |       "source": [
 475 |         "Even though NumPy automatically selects the dtype of the ndarray, NumPy also allows you to specify the particular dtype you want to assign to the elements of the ndarray. You can specify the dtype when you create the ndarray using the keyword dtype in the np.array() function. Let's see an example:"
 476 |       ]
 477 |     },
 478 |     {
 479 |       "cell_type": "code",
 480 |       "metadata": {
 481 |         "id": "_BSEAZgQ1wsx",
 482 |         "colab_type": "code",
 483 |         "colab": {
 484 |           "base_uri": "https://localhost:8080/",
 485 |           "height": 104
 486 |         },
 487 |         "outputId": "da136eb8-2dd9-4c3b-f048-1fa9b9e5a740"
 488 |       },
 489 |       "source": [
 490 |         "# We create a rank 1 ndarray of floats but set the dtype to int64\n",
 491 |         "x = np.array([1.5, 2.2, 3.7, 4.0, 5.9], dtype = np.int64)\n",
 492 |         "\n",
 493 |         "# We print x\n",
 494 |         "print()\n",
 495 |         "print('x = ', x)\n",
 496 |         "print()\n",
 497 |         "\n",
 498 |         "# We print the dtype x\n",
 499 |         "print('The elements in x are of type:', x.dtype)"
 500 |       ],
 501 |       "execution_count": 8,
 502 |       "outputs": [
 503 |         {
 504 |           "output_type": "stream",
 505 |           "text": [
 506 |             "\n",
 507 |             "x =  [1 2 3 4 5]\n",
 508 |             "\n",
 509 |             "The elements in x are of type: int64\n"
 510 |           ],
 511 |           "name": "stdout"
 512 |         }
 513 |       ]
 514 |     },
 515 |     {
 516 |       "cell_type": "markdown",
 517 |       "metadata": {
 518 |         "id": "IIo6BbvW23A3",
 519 |         "colab_type": "text"
 520 |       },
 521 |       "source": [
 522 |         "Once you create an ndarray, you may want to save it to a file to be read later or to be used by another program. NumPy provides a way to save the arrays into files for later use - let's see how this is done."
 523 |       ]
 524 |     },
 525 |     {
 526 |       "cell_type": "code",
 527 |       "metadata": {
 528 |         "id": "heCxnnFC2UcA",
 529 |         "colab_type": "code",
 530 |         "colab": {}
 531 |       },
 532 |       "source": [
 533 |         "# We create a rank 1 ndarray\n",
 534 |         "x = np.array([1, 2, 3, 4, 5])\n",
 535 |         "\n",
 536 |         "# We save x into the current directory as \n",
 537 |         "np.save('my_array', x)"
 538 |       ],
 539 |       "execution_count": 9,
 540 |       "outputs": []
 541 |     },
 542 |     {
 543 |       "cell_type": "markdown",
 544 |       "metadata": {
 545 |         "id": "2zclAmaE28Ki",
 546 |         "colab_type": "text"
 547 |       },
 548 |       "source": [
 549 |         "The above saves the x ndarray into a file named my_array.npy. You can load the saved ndarray into a variable by using the load() function."
 550 |       ]
 551 |     },
 552 |     {
 553 |       "cell_type": "markdown",
 554 |       "metadata": {
 555 |         "id": "fqT_YREU3Bug",
 556 |         "colab_type": "text"
 557 |       },
 558 |       "source": [
 559 |         "> When loading an array from a file, make sure you include the name of the file together with the extension .npy, otherwise you will get an error."
 560 |       ]
 561 |     },
 562 |     {
 563 |       "cell_type": "code",
 564 |       "metadata": {
 565 |         "id": "A0LqlywX26AG",
 566 |         "colab_type": "code",
 567 |         "colab": {
 568 |           "base_uri": "https://localhost:8080/"
 569 |         },
 570 |         "outputId": "8971e9de-9229-4e21-aab8-53b548734066"
 571 |       },
 572 |       "source": [
 573 |         "# We load the saved array from our current directory into variable y\n",
 574 |         "y = np.load('my_array.npy')\n",
 575 |         "\n",
 576 |         "# We print y\n",
 577 |         "print()\n",
 578 |         "print('y = ', y)\n",
 579 |         "print()\n",
 580 |         "\n",
 581 |         "# We print information about the ndarray we loaded\n",
 582 |         "print('y is an object of type:', type(y))\n",
 583 |         "print('The elements in y are of type:', y.dtype)"
 584 |       ],
 585 |       "execution_count": 10,
 586 |       "outputs": [
 587 |         {
 588 |           "output_type": "stream",
 589 |           "text": [
 590 |             "\n",
 591 |             "y =  [1 2 3 4 5]\n",
 592 |             "\n",
 593 |             "y is an object of type: <class 'numpy.ndarray'>\n",
 594 |             "The elements in y are of type: int64\n"
 595 |           ],
 596 |           "name": "stdout"
 597 |         }
 598 |       ]
 599 |     },
 600 |     {
 601 |       "cell_type": "markdown",
 602 |       "metadata": {
 603 |         "id": "rotqBT2MFNux",
 604 |         "colab_type": "text"
 605 |       },
 606 |       "source": [
 607 |         "## **Specialized ndarrays:**"
 608 |       ]
 609 |     },
 610 |     {
 611 |       "cell_type": "markdown",
 612 |       "metadata": {
 613 |         "id": "BeRM9Yu7FKVR",
 614 |         "colab_type": "text"
 615 |       },
 616 |       "source": [
 617 |         "One great time-saving feature of NumPy is its ability to create ndarrays using built-in functions. These functions allow us to create certain kinds of ndarrays with just one line of code. Below we will see a few of the most useful built-in functions for creating ndarrays that you will come across when doing AI programming.\n",
 618 |         "\n",
 619 |         "### **np.zeros():**\n",
 620 |         "\n",
 621 |         "Let's start by creating an ndarray with a specified shape that is full of zeros. We can do this by using the np.zeros() function. The function np.zeros(shape) creates an ndarray full of zeros with the given shape. So, for example, if you wanted to create a rank 2 array with 3 rows and 4 columns, you will pass the shape to the function in the form of (rows, columns), as in the example below:"
 622 |       ]
 623 |     },
 624 |     {
 625 |       "cell_type": "code",
 626 |       "metadata": {
 627 |         "id": "w97eI9UE2_CQ",
 628 |         "colab_type": "code",
 629 |         "colab": {
 630 |           "base_uri": "https://localhost:8080/"
 631 |         },
 632 |         "outputId": "ca316a4f-5f98-4e1b-a04f-0104403b3864"
 633 |       },
 634 |       "source": [
 635 |         "# We create a 3 x 4 ndarray full of zeros. \n",
 636 |         "X = np.zeros((3,4))\n",
 637 |         "\n",
 638 |         "# We print X\n",
 639 |         "print()\n",
 640 |         "print('X = \\n', X)\n",
 641 |         "print()\n",
 642 |         "\n",
 643 |         "# We print information about X\n",
 644 |         "print('X has dimensions:', X.shape)\n",
 645 |         "print('X is an object of type:', type(X))\n",
 646 |         "print('The elements in X are of type:', X.dtype)\n"
 647 |       ],
 648 |       "execution_count": 11,
 649 |       "outputs": [
 650 |         {
 651 |           "output_type": "stream",
 652 |           "text": [
 653 |             "\n",
 654 |             "X = \n",
 655 |             " [[0. 0. 0. 0.]\n",
 656 |             " [0. 0. 0. 0.]\n",
 657 |             " [0. 0. 0. 0.]]\n",
 658 |             "\n",
 659 |             "X has dimensions: (3, 4)\n",
 660 |             "X is an object of type: <class 'numpy.ndarray'>\n",
 661 |             "The elements in X are of type: float64\n"
 662 |           ],
 663 |           "name": "stdout"
 664 |         }
 665 |       ]
 666 |     },
 667 |     {
 668 |       "cell_type": "markdown",
 669 |       "metadata": {
 670 |         "id": "eriuCEtcFah5",
 671 |         "colab_type": "text"
 672 |       },
 673 |       "source": [
 674 |         "> As we can see, the np.zeros() function creates by default an array with dtype float64. If desired, the data type can be changed by using the keyword dtype.\n",
 675 |         "\n",
 676 |         "### **np.ones:**\n",
 677 |         "\n",
 678 |         "Similarly, we can create an ndarray with a specified shape that is full of ones. We can do this by using the np.ones() function. Just like the np.zeros() function, the np.ones() function takes as an argument the shape of the ndarray you want to make. Let's see an example:"
 679 |       ]
 680 |     },
 681 |     {
 682 |       "cell_type": "code",
 683 |       "metadata": {
 684 |         "id": "iupUtBcYFXWM",
 685 |         "colab_type": "code",
 686 |         "colab": {
 687 |           "base_uri": "https://localhost:8080/",
 688 |           "height": 186
 689 |         },
 690 |         "outputId": "5eacb580-002e-44bd-eca3-a234b9b15c4a"
 691 |       },
 692 |       "source": [
 693 |         "# We create a 3 x 2 ndarray full of ones. \n",
 694 |         "X = np.ones((3,2))\n",
 695 |         "\n",
 696 |         "# We print X\n",
 697 |         "print()\n",
 698 |         "print('X = \\n', X)\n",
 699 |         "print()\n",
 700 |         "\n",
 701 |         "# We print information about X\n",
 702 |         "print('X has dimensions:', X.shape)\n",
 703 |         "print('X is an object of type:', type(X))\n",
 704 |         "print('The elements in X are of type:', X.dtype) "
 705 |       ],
 706 |       "execution_count": 12,
 707 |       "outputs": [
 708 |         {
 709 |           "output_type": "stream",
 710 |           "text": [
 711 |             "\n",
 712 |             "X = \n",
 713 |             " [[1. 1.]\n",
 714 |             " [1. 1.]\n",
 715 |             " [1. 1.]]\n",
 716 |             "\n",
 717 |             "X has dimensions: (3, 2)\n",
 718 |             "X is an object of type: <class 'numpy.ndarray'>\n",
 719 |             "The elements in X are of type: float64\n"
 720 |           ],
 721 |           "name": "stdout"
 722 |         }
 723 |       ]
 724 |     },
 725 |     {
 726 |       "cell_type": "markdown",
 727 |       "metadata": {
 728 |         "id": "XVoiNeI3GJx6",
 729 |         "colab_type": "text"
 730 |       },
 731 |       "source": [
 732 |         "### **np.full():**\n",
 733 |         "We can also create an ndarray with a specified shape that is full of any number we want. We can do this by using the np.full() function. The np.full(shape, constant value) function takes two arguments. The first argument is the shape of the ndarray you want to make and the second is the constant value you want to populate the array with. Let's see an example:"
 734 |       ]
 735 |     },
 736 |     {
 737 |       "cell_type": "code",
 738 |       "metadata": {
 739 |         "id": "91DRW1O1Fy9A",
 740 |         "colab_type": "code",
 741 |         "colab": {
 742 |           "base_uri": "https://localhost:8080/",
 743 |           "height": 170
 744 |         },
 745 |         "outputId": "0da40471-a0eb-4de4-f9bf-a60de030bae6"
 746 |       },
 747 |       "source": [
 748 |         "# We create a 2 x 3 ndarray full of fives. \n",
 749 |         "X = np.full((2,3), 5) \n",
 750 |         "\n",
 751 |         "# We print X\n",
 752 |         "print()\n",
 753 |         "print('X = \\n', X)\n",
 754 |         "print()\n",
 755 |         "\n",
 756 |         "# We print information about X\n",
 757 |         "print('X has dimensions:', X.shape)\n",
 758 |         "print('X is an object of type:', type(X))\n",
 759 |         "print('The elements in X are of type:', X.dtype)"
 760 |       ],
 761 |       "execution_count": 13,
 762 |       "outputs": [
 763 |         {
 764 |           "output_type": "stream",
 765 |           "text": [
 766 |             "\n",
 767 |             "X = \n",
 768 |             " [[5 5 5]\n",
 769 |             " [5 5 5]]\n",
 770 |             "\n",
 771 |             "X has dimensions: (2, 3)\n",
 772 |             "X is an object of type: <class 'numpy.ndarray'>\n",
 773 |             "The elements in X are of type: int64\n"
 774 |           ],
 775 |           "name": "stdout"
 776 |         }
 777 |       ]
 778 |     },
 779 |     {
 780 |       "cell_type": "markdown",
 781 |       "metadata": {
 782 |         "id": "Iq62WNEdGcds",
 783 |         "colab_type": "text"
 784 |       },
 785 |       "source": [
 786 |         "### **np.eye():**\n",
 787 |         "An Identity matrix is a square matrix that has only 1s in its main diagonal and zeros everywhere else. The function np.eye(N) creates a square N x N ndarray corresponding to the Identity matrix. Since all Identity Matrices are square, the np.eye() function only takes a single integer as an argument. Let's see an example:"
 788 |       ]
 789 |     },
 790 |     {
 791 |       "cell_type": "code",
 792 |       "metadata": {
 793 |         "id": "MqrDmyhxGcIO",
 794 |         "colab_type": "code",
 795 |         "colab": {
 796 |           "base_uri": "https://localhost:8080/",
 797 |           "height": 220
 798 |         },
 799 |         "outputId": "87689d57-208e-43d2-9c2a-16b663980a32"
 800 |       },
 801 |       "source": [
 802 |         "# We create a 5 x 5 Identity matrix. \n",
 803 |         "X = np.eye(5)\n",
 804 |         "\n",
 805 |         "# We print X\n",
 806 |         "print()\n",
 807 |         "print('X = \\n', X)\n",
 808 |         "print()\n",
 809 |         "\n",
 810 |         "# We print information about X\n",
 811 |         "print('X has dimensions:', X.shape)\n",
 812 |         "print('X is an object of type:', type(X))\n",
 813 |         "print('The elements in X are of type:', X.dtype)  "
 814 |       ],
 815 |       "execution_count": 14,
 816 |       "outputs": [
 817 |         {
 818 |           "output_type": "stream",
 819 |           "text": [
 820 |             "\n",
 821 |             "X = \n",
 822 |             " [[1. 0. 0. 0. 0.]\n",
 823 |             " [0. 1. 0. 0. 0.]\n",
 824 |             " [0. 0. 1. 0. 0.]\n",
 825 |             " [0. 0. 0. 1. 0.]\n",
 826 |             " [0. 0. 0. 0. 1.]]\n",
 827 |             "\n",
 828 |             "X has dimensions: (5, 5)\n",
 829 |             "X is an object of type: <class 'numpy.ndarray'>\n",
 830 |             "The elements in X are of type: float64\n"
 831 |           ],
 832 |           "name": "stdout"
 833 |         }
 834 |       ]
 835 |     },
 836 |     {
 837 |       "cell_type": "markdown",
 838 |       "metadata": {
 839 |         "id": "28i2JuXSGtyZ",
 840 |         "colab_type": "text"
 841 |       },
 842 |       "source": [
 843 |         "### **np.diag():**\n",
 844 |         "We can also create diagonal matrices by using the np.diag() function. A diagonal matrix is a square matrix that only has values in its main diagonal. The np.diag() function creates an ndarray corresponding to a diagonal matrix , as shown in the example below:"
 845 |       ]
 846 |     },
 847 |     {
 848 |       "cell_type": "code",
 849 |       "metadata": {
 850 |         "id": "kZ8T3h4pGR29",
 851 |         "colab_type": "code",
 852 |         "colab": {
 853 |           "base_uri": "https://localhost:8080/"
 854 |         },
 855 |         "outputId": "14c149c9-8ac3-4378-da44-4943c08ff295"
 856 |       },
 857 |       "source": [
 858 |         "# Create a 4 x 4 diagonal matrix\n",
 859 |         "# on its main diagonal\n",
 860 |         "X = np.diag([10,20,30,50])\n",
 861 |         "\n",
 862 |         "# We print X\n",
 863 |         "print()\n",
 864 |         "print('X = \\n', X)\n",
 865 |         "print()"
 866 |       ],
 867 |       "execution_count": 15,
 868 |       "outputs": [
 869 |         {
 870 |           "output_type": "stream",
 871 |           "text": [
 872 |             "\n",
 873 |             "X = \n",
 874 |             " [[10  0  0  0]\n",
 875 |             " [ 0 20  0  0]\n",
 876 |             " [ 0  0 30  0]\n",
 877 |             " [ 0  0  0 50]]\n",
 878 |             "\n"
 879 |           ],
 880 |           "name": "stdout"
 881 |         }
 882 |       ]
 883 |     },
 884 |     {
 885 |       "cell_type": "markdown",
 886 |       "metadata": {
 887 |         "id": "7mcJx5xHHY0Z",
 888 |         "colab_type": "text"
 889 |       },
 890 |       "source": [
 891 |         "### **np.arange():**\n",
 892 |         "NumPy also allows you to create ndarrays that have evenly spaced values within a given interval. NumPy's np.arange() function is very versatile and can be used with either one, two, or three arguments. Below we will see examples of each case and how they are used to create different kinds of ndarrays.\n",
 893 |         "\n",
 894 |         "Let's start by using np.arange() with only one argument. When used with only one argument, np.arange(N) will create a rank 1 ndarray with consecutive integers between 0 and N - 1. "
 895 |       ]
 896 |     },
 897 |     {
 898 |       "cell_type": "code",
 899 |       "metadata": {
 900 |         "id": "i4ZA3x60Hjpe",
 901 |         "colab_type": "code",
 902 |         "colab": {
 903 |           "base_uri": "https://localhost:8080/",
 904 |           "height": 137
 905 |         },
 906 |         "outputId": "06d5088a-103a-40f6-a508-ec1da572041f"
 907 |       },
 908 |       "source": [
 909 |         "# We create a rank 1 ndarray that has sequential integers from 0 to 9\n",
 910 |         "x = np.arange(10)\n",
 911 |         "\n",
 912 |         "# We print the ndarray\n",
 913 |         "print()\n",
 914 |         "print('x = ', x)\n",
 915 |         "print()\n",
 916 |         "\n",
 917 |         "# We print information about the ndarray\n",
 918 |         "print('x has dimensions:', x.shape)\n",
 919 |         "print('x is an object of type:', type(x))\n",
 920 |         "print('The elements in x are of type:', x.dtype) "
 921 |       ],
 922 |       "execution_count": 16,
 923 |       "outputs": [
 924 |         {
 925 |           "output_type": "stream",
 926 |           "text": [
 927 |             "\n",
 928 |             "x =  [0 1 2 3 4 5 6 7 8 9]\n",
 929 |             "\n",
 930 |             "x has dimensions: (10,)\n",
 931 |             "x is an object of type: <class 'numpy.ndarray'>\n",
 932 |             "The elements in x are of type: int64\n"
 933 |           ],
 934 |           "name": "stdout"
 935 |         }
 936 |       ]
 937 |     },
 938 |     {
 939 |       "cell_type": "markdown",
 940 |       "metadata": {
 941 |         "id": "mYqdyfZTHxqP",
 942 |         "colab_type": "text"
 943 |       },
 944 |       "source": [
 945 |         "When used with two arguments, np.arange(start,stop) will create a rank 1 ndarray with evenly spaced values within the half-open interval [start, stop). This means the evenly spaced numbers will include start but exclude stop. Let's see an example"
 946 |       ]
 947 |     },
 948 |     {
 949 |       "cell_type": "code",
 950 |       "metadata": {
 951 |         "id": "UBcnbmquHrgl",
 952 |         "colab_type": "code",
 953 |         "colab": {
 954 |           "base_uri": "https://localhost:8080/",
 955 |           "height": 137
 956 |         },
 957 |         "outputId": "3a181743-e07d-4347-ac5d-03a49d53302e"
 958 |       },
 959 |       "source": [
 960 |         "# We create a rank 1 ndarray that has sequential integers from 4 to 9. \n",
 961 |         "x = np.arange(4,10)\n",
 962 |         "\n",
 963 |         "# We print the ndarray\n",
 964 |         "print()\n",
 965 |         "print('x = ', x)\n",
 966 |         "print()\n",
 967 |         "\n",
 968 |         "# We print information about the ndarray\n",
 969 |         "print('x has dimensions:', x.shape)\n",
 970 |         "print('x is an object of type:', type(x))\n",
 971 |         "print('The elements in x are of type:', x.dtype) "
 972 |       ],
 973 |       "execution_count": 17,
 974 |       "outputs": [
 975 |         {
 976 |           "output_type": "stream",
 977 |           "text": [
 978 |             "\n",
 979 |             "x =  [4 5 6 7 8 9]\n",
 980 |             "\n",
 981 |             "x has dimensions: (6,)\n",
 982 |             "x is an object of type: <class 'numpy.ndarray'>\n",
 983 |             "The elements in x are of type: int64\n"
 984 |           ],
 985 |           "name": "stdout"
 986 |         }
 987 |       ]
 988 |     },
 989 |     {
 990 |       "cell_type": "markdown",
 991 |       "metadata": {
 992 |         "id": "-k99cyrkH7EE",
 993 |         "colab_type": "text"
 994 |       },
 995 |       "source": [
 996 |         "Finally, when used with three arguments, np.arange(start,stop,step) will create a rank 1 ndarray with evenly spaced values within the half-open interval [start, stop) with step being the distance between two adjacent values. Let's see an example:"
 997 |       ]
 998 |     },
 999 |     {
1000 |       "cell_type": "code",
1001 |       "metadata": {
1002 |         "id": "7AnrqDCQH34G",
1003 |         "colab_type": "code",
1004 |         "colab": {
1005 |           "base_uri": "https://localhost:8080/"
1006 |         },
1007 |         "outputId": "f83bba5c-73cf-49ea-9666-dd1b9a5d1d60"
1008 |       },
1009 |       "source": [
1010 |         "# We create a rank 1 ndarray that has evenly spaced \n",
1011 |         "# integers from 1 to 13 in steps of 3.\n",
1012 |         "x = np.arange(1,14,3)\n",
1013 |         "\n",
1014 |         "# We print the ndarray\n",
1015 |         "print()\n",
1016 |         "print('x = ', x)\n",
1017 |         "print()\n",
1018 |         "\n",
1019 |         "# We print information about the ndarray\n",
1020 |         "print('x has dimensions:', x.shape)\n",
1021 |         "print('x is an object of type:', type(x))\n",
1022 |         "print('The elements in x are of type:', x.dtype) "
1023 |       ],
1024 |       "execution_count": 18,
1025 |       "outputs": [
1026 |         {
1027 |           "output_type": "stream",
1028 |           "text": [
1029 |             "\n",
1030 |             "x =  [ 1  4  7 10 13]\n",
1031 |             "\n",
1032 |             "x has dimensions: (5,)\n",
1033 |             "x is an object of type: <class 'numpy.ndarray'>\n",
1034 |             "The elements in x are of type: int64\n"
1035 |           ],
1036 |           "name": "stdout"
1037 |         }
1038 |       ]
1039 |     },
1040 |     {
1041 |       "cell_type": "markdown",
1042 |       "metadata": {
1043 |         "id": "8ViXZqneuL7i",
1044 |         "colab_type": "text"
1045 |       },
1046 |       "source": [
1047 |         "## **np.linspace():**\n",
1048 |         "\n",
1049 |         "Even though the np.arange() function allows for non-integer steps, such as 0.3, the output is usually inconsistent, due to the finite floating point precision. For this reason, in the cases where non-integer steps are required, it is usually better to use the function np.linspace(). The np.linspace(start, stop, N) function returns N evenly spaced numbers over the closed interval [start, stop]. This means that both the start and thestop values are included. We should also note the np.linspace() function needs to be called with at least two arguments in the form np.linspace(start,stop). In this case, the default number of elements in the specified interval will be N= 50. The reason np.linspace() works better than the np.arange() function, is that np.linspace() uses the number of elements we want in a particular interval, instead of the step between values. Let's see some examples:"
1050 |       ]
1051 |     },
1052 |     {
1053 |       "cell_type": "code",
1054 |       "metadata": {
1055 |         "id": "rVOpuq8zIBhQ",
1056 |         "colab_type": "code",
1057 |         "colab": {
1058 |           "base_uri": "https://localhost:8080/",
1059 |           "height": 170
1060 |         },
1061 |         "outputId": "80ccf81a-0bf7-4619-f190-5acb958abf5e"
1062 |       },
1063 |       "source": [
1064 |         "# We create a rank 1 ndarray that has 10 integers evenly spaced between 0 and 25.\n",
1065 |         "x = np.linspace(0,25,10)\n",
1066 |         "\n",
1067 |         "# We print the ndarray\n",
1068 |         "print()\n",
1069 |         "print('x = \\n', x)\n",
1070 |         "print()\n",
1071 |         "\n",
1072 |         "# We print information about the ndarray\n",
1073 |         "print('x has dimensions:', x.shape)\n",
1074 |         "print('x is an object of type:', type(x))\n",
1075 |         "print('The elements in x are of type:', x.dtype)"
1076 |       ],
1077 |       "execution_count": 19,
1078 |       "outputs": [
1079 |         {
1080 |           "output_type": "stream",
1081 |           "text": [
1082 |             "\n",
1083 |             "x = \n",
1084 |             " [ 0.          2.77777778  5.55555556  8.33333333 11.11111111 13.88888889\n",
1085 |             " 16.66666667 19.44444444 22.22222222 25.        ]\n",
1086 |             "\n",
1087 |             "x has dimensions: (10,)\n",
1088 |             "x is an object of type: <class 'numpy.ndarray'>\n",
1089 |             "The elements in x are of type: float64\n"
1090 |           ],
1091 |           "name": "stdout"
1092 |         }
1093 |       ]
1094 |     },
1095 |     {
1096 |       "cell_type": "markdown",
1097 |       "metadata": {
1098 |         "id": "DDgc6lKNv5m8",
1099 |         "colab_type": "text"
1100 |       },
1101 |       "source": [
1102 |         "As we can see from the above example, the function np.linspace(0,25,10) returns an ndarray with 10 evenly spaced numbers in the closed interval [0, 25]. We can also see that both the start and end points, 0 and 25 in this case, are included. However, you can let the endpoint of the interval be excluded (just like in the np.arange() function) by setting the keyword endpoint = False in the np.linspace() function. Let's create the same x ndarray we created above but now with the endpoint excluded:"
1103 |       ]
1104 |     },
1105 |     {
1106 |       "cell_type": "code",
1107 |       "metadata": {
1108 |         "id": "cakMmQmdvy1t",
1109 |         "colab_type": "code",
1110 |         "colab": {
1111 |           "base_uri": "https://localhost:8080/",
1112 |           "height": 137
1113 |         },
1114 |         "outputId": "eeda1f25-0941-451c-c3ba-b99c12b17355"
1115 |       },
1116 |       "source": [
1117 |         "# We create a rank 1 ndarray that has 10 integers evenly spaced between 0 and 25,\n",
1118 |         "# with 25 excluded.\n",
1119 |         "x = np.linspace(0,25,10, endpoint = False)\n",
1120 |         "\n",
1121 |         "# We print the ndarray\n",
1122 |         "print()\n",
1123 |         "print('x = ', x)\n",
1124 |         "print()\n",
1125 |         "\n",
1126 |         "# We print information about the ndarray\n",
1127 |         "print('x has dimensions:', x.shape)\n",
1128 |         "print('x is an object of type:', type(x))\n",
1129 |         "print('The elements in x are of type:', x.dtype)"
1130 |       ],
1131 |       "execution_count": 20,
1132 |       "outputs": [
1133 |         {
1134 |           "output_type": "stream",
1135 |           "text": [
1136 |             "\n",
1137 |             "x =  [ 0.   2.5  5.   7.5 10.  12.5 15.  17.5 20.  22.5]\n",
1138 |             "\n",
1139 |             "x has dimensions: (10,)\n",
1140 |             "x is an object of type: <class 'numpy.ndarray'>\n",
1141 |             "The elements in x are of type: float64\n"
1142 |           ],
1143 |           "name": "stdout"
1144 |         }
1145 |       ]
1146 |     },
1147 |     {
1148 |       "cell_type": "markdown",
1149 |       "metadata": {
1150 |         "id": "xHA3hotizZoq",
1151 |         "colab_type": "text"
1152 |       },
1153 |       "source": [
1154 |         "## **np.reshape():**\n",
1155 |         "So far, we have only used the built-in functions np.arange() and np.linspace() to create rank 1 ndarrays. However, we can use these functions to create rank 2 ndarrays of any shape by combining them with the np.reshape() function. The np.reshape(ndarray, new_shape) function converts the given ndarray into the specified new_shape. It is important to note that the new_shape should be compatible with the number of elements in the given ndarray."
1156 |       ]
1157 |     },
1158 |     {
1159 |       "cell_type": "code",
1160 |       "metadata": {
1161 |         "id": "CBGX8P2Bzaf_",
1162 |         "colab_type": "code",
1163 |         "colab": {
1164 |           "base_uri": "https://localhost:8080/",
1165 |           "height": 253
1166 |         },
1167 |         "outputId": "da911b43-4adb-40e9-bc4d-7ab97a5863ea"
1168 |       },
1169 |       "source": [
1170 |         "# We create a rank 1 ndarray with sequential integers from 0 to 19\n",
1171 |         "x = np.arange(20)\n",
1172 |         "\n",
1173 |         "# We print x\n",
1174 |         "print()\n",
1175 |         "print('Original x = ', x)\n",
1176 |         "print()\n",
1177 |         "\n",
1178 |         "# We reshape x into a 4 x 5 ndarray \n",
1179 |         "x = np.reshape(x, (4,5))\n",
1180 |         "\n",
1181 |         "# We print the reshaped x\n",
1182 |         "print()\n",
1183 |         "print('Reshaped x = \\n', x)\n",
1184 |         "print()\n",
1185 |         "\n",
1186 |         "# We print information about the reshaped x\n",
1187 |         "print('x has dimensions:', x.shape)\n",
1188 |         "print('x is an object of type:', type(x))\n",
1189 |         "print('The elements in x are of type:', x.dtype) "
1190 |       ],
1191 |       "execution_count": 21,
1192 |       "outputs": [
1193 |         {
1194 |           "output_type": "stream",
1195 |           "text": [
1196 |             "\n",
1197 |             "Original x =  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]\n",
1198 |             "\n",
1199 |             "\n",
1200 |             "Reshaped x = \n",
1201 |             " [[ 0  1  2  3  4]\n",
1202 |             " [ 5  6  7  8  9]\n",
1203 |             " [10 11 12 13 14]\n",
1204 |             " [15 16 17 18 19]]\n",
1205 |             "\n",
1206 |             "x has dimensions: (4, 5)\n",
1207 |             "x is an object of type: <class 'numpy.ndarray'>\n",
1208 |             "The elements in x are of type: int64\n"
1209 |           ],
1210 |           "name": "stdout"
1211 |         }
1212 |       ]
1213 |     },
1214 |     {
1215 |       "cell_type": "markdown",
1216 |       "metadata": {
1217 |         "id": "EZHoy2duz-v6",
1218 |         "colab_type": "text"
1219 |       },
1220 |       "source": [
1221 |         "One great feature about NumPy, is that some functions can also be applied as methods. This allows us to apply different functions in sequence in just one line of code. ndarray methods are similar to ndarray attributes in that they are both applied using dot notation (.). Let's see how we can accomplish the same result as in the above example, but in just one line of code:"
1222 |       ]
1223 |     },
1224 |     {
1225 |       "cell_type": "code",
1226 |       "metadata": {
1227 |         "id": "vm5wTd5oze11",
1228 |         "colab_type": "code",
1229 |         "colab": {
1230 |           "base_uri": "https://localhost:8080/",
1231 |           "height": 203
1232 |         },
1233 |         "outputId": "8f12e4a7-53e8-48bf-9e62-93b0487aba85"
1234 |       },
1235 |       "source": [
1236 |         "# We create a a rank 1 ndarray with sequential integers from 0 to 19 and\n",
1237 |         "# reshape it to a 4 x 5 array \n",
1238 |         "Y = np.arange(20).reshape(4, 5)\n",
1239 |         "\n",
1240 |         "# We print Y\n",
1241 |         "print()\n",
1242 |         "print('Y = \\n', Y)\n",
1243 |         "print()\n",
1244 |         "\n",
1245 |         "# We print information about Y\n",
1246 |         "print('Y has dimensions:', Y.shape)\n",
1247 |         "print('Y is an object of type:', type(Y))\n",
1248 |         "print('The elements in Y are of type:', Y.dtype)"
1249 |       ],
1250 |       "execution_count": 23,
1251 |       "outputs": [
1252 |         {
1253 |           "output_type": "stream",
1254 |           "text": [
1255 |             "\n",
1256 |             "Y = \n",
1257 |             " [[ 0  1  2  3  4]\n",
1258 |             " [ 5  6  7  8  9]\n",
1259 |             " [10 11 12 13 14]\n",
1260 |             " [15 16 17 18 19]]\n",
1261 |             "\n",
1262 |             "Y has dimensions: (4, 5)\n",
1263 |             "Y is an object of type: <class 'numpy.ndarray'>\n",
1264 |             "The elements in Y are of type: int64\n"
1265 |           ],
1266 |           "name": "stdout"
1267 |         }
1268 |       ]
1269 |     },
1270 |     {
1271 |       "cell_type": "markdown",
1272 |       "metadata": {
1273 |         "id": "T2vboYR30a__",
1274 |         "colab_type": "text"
1275 |       },
1276 |       "source": [
1277 |         "In the same manner, we can also combine reshape() with np.linspace() to create rank 2 arrays, as shown in the next example."
1278 |       ]
1279 |     },
1280 |     {
1281 |       "cell_type": "code",
1282 |       "metadata": {
1283 |         "id": "uZgWrDH10b6b",
1284 |         "colab_type": "code",
1285 |         "colab": {
1286 |           "base_uri": "https://localhost:8080/",
1287 |           "height": 220
1288 |         },
1289 |         "outputId": "81263851-5635-42b7-84c6-15ed65de6ce4"
1290 |       },
1291 |       "source": [
1292 |         "# We create a rank 1 ndarray with 10 integers evenly spaced between 0 and 50,\n",
1293 |         "# with 50 excluded. We then reshape it to a 5 x 2 ndarray\n",
1294 |         "X = np.linspace(0,50,10, endpoint=False).reshape(5,2)\n",
1295 |         "\n",
1296 |         "# We print X\n",
1297 |         "print()\n",
1298 |         "print('X = \\n', X)\n",
1299 |         "print()\n",
1300 |         "\n",
1301 |         "# We print information about X\n",
1302 |         "print('X has dimensions:', X.shape)\n",
1303 |         "print('X is an object of type:', type(X))\n",
1304 |         "print('The elements in X are of type:', X.dtype)"
1305 |       ],
1306 |       "execution_count": 22,
1307 |       "outputs": [
1308 |         {
1309 |           "output_type": "stream",
1310 |           "text": [
1311 |             "\n",
1312 |             "X = \n",
1313 |             " [[ 0.  5.]\n",
1314 |             " [10. 15.]\n",
1315 |             " [20. 25.]\n",
1316 |             " [30. 35.]\n",
1317 |             " [40. 45.]]\n",
1318 |             "\n",
1319 |             "X has dimensions: (5, 2)\n",
1320 |             "X is an object of type: <class 'numpy.ndarray'>\n",
1321 |             "The elements in X are of type: float64\n"
1322 |           ],
1323 |           "name": "stdout"
1324 |         }
1325 |       ]
1326 |     },
1327 |     {
1328 |       "cell_type": "markdown",
1329 |       "metadata": {
1330 |         "id": "krYChE6J0vVv",
1331 |         "colab_type": "text"
1332 |       },
1333 |       "source": [
1334 |         "## **Random:**\n",
1335 |         "The last type of ndarrays we are going to create are random ndarrays. Random ndarrays are arrays that contain random numbers. Often in Machine Learning, you need to create random matrices, for example, when initializing the weights of a Neural Network. NumPy offers a variety of random functions to help us create random ndarrays of any shape."
1336 |       ]
1337 |     },
1338 |     {
1339 |       "cell_type": "code",
1340 |       "metadata": {
1341 |         "id": "ve4OSL3J0fFq",
1342 |         "colab_type": "code",
1343 |         "colab": {
1344 |           "base_uri": "https://localhost:8080/",
1345 |           "height": 186
1346 |         },
1347 |         "outputId": "fdc37494-1104-4b50-dec4-233aacc89929"
1348 |       },
1349 |       "source": [
1350 |         "# We create a 3 x 3 ndarray with random floats in the half-open interval [0.0, 1.0).\n",
1351 |         "X = np.random.random((3,3))\n",
1352 |         "\n",
1353 |         "# We print X\n",
1354 |         "print()\n",
1355 |         "print('X = \\n', X)\n",
1356 |         "print()\n",
1357 |         "\n",
1358 |         "# We print information about X\n",
1359 |         "print('X has dimensions:', X.shape)\n",
1360 |         "print('X is an object of type:', type(X))\n",
1361 |         "print('The elements in x are of type:', X.dtype)"
1362 |       ],
1363 |       "execution_count": 24,
1364 |       "outputs": [
1365 |         {
1366 |           "output_type": "stream",
1367 |           "text": [
1368 |             "\n",
1369 |             "X = \n",
1370 |             " [[0.70511948 0.40550946 0.58073781]\n",
1371 |             " [0.67423937 0.03346497 0.12587685]\n",
1372 |             " [0.31964513 0.15787991 0.39406672]]\n",
1373 |             "\n",
1374 |             "X has dimensions: (3, 3)\n",
1375 |             "X is an object of type: <class 'numpy.ndarray'>\n",
1376 |             "The elements in x are of type: float64\n"
1377 |           ],
1378 |           "name": "stdout"
1379 |         }
1380 |       ]
1381 |     },
1382 |     {
1383 |       "cell_type": "markdown",
1384 |       "metadata": {
1385 |         "id": "wOgmWsMW2lMA",
1386 |         "colab_type": "text"
1387 |       },
1388 |       "source": [
1389 |         "NumPy also allows us to create ndarrays with random integers within a particular interval. The function np.random.randint(start, stop, size = shape) creates an ndarray of the given shape with random integers in the half-open interval [start, stop). Let's see an example:"
1390 |       ]
1391 |     },
1392 |     {
1393 |       "cell_type": "code",
1394 |       "metadata": {
1395 |         "id": "8JluLn5s2huf",
1396 |         "colab_type": "code",
1397 |         "colab": {
1398 |           "base_uri": "https://localhost:8080/",
1399 |           "height": 186
1400 |         },
1401 |         "outputId": "3ba3833a-2ac5-40ae-834d-995fb1e6217c"
1402 |       },
1403 |       "source": [
1404 |         "# We create a 3 x 2 ndarray with random integers in the half-open interval [4, 15).\n",
1405 |         "X = np.random.randint(4,15,size=(3,2))\n",
1406 |         "\n",
1407 |         "# We print X\n",
1408 |         "print()\n",
1409 |         "print('X = \\n', X)\n",
1410 |         "print()\n",
1411 |         "\n",
1412 |         "# We print information about X\n",
1413 |         "print('X has dimensions:', X.shape)\n",
1414 |         "print('X is an object of type:', type(X))\n",
1415 |         "print('The elements in X are of type:', X.dtype)"
1416 |       ],
1417 |       "execution_count": 25,
1418 |       "outputs": [
1419 |         {
1420 |           "output_type": "stream",
1421 |           "text": [
1422 |             "\n",
1423 |             "X = \n",
1424 |             " [[ 8  5]\n",
1425 |             " [ 4 12]\n",
1426 |             " [ 7  8]]\n",
1427 |             "\n",
1428 |             "X has dimensions: (3, 2)\n",
1429 |             "X is an object of type: <class 'numpy.ndarray'>\n",
1430 |             "The elements in X are of type: int64\n"
1431 |           ],
1432 |           "name": "stdout"
1433 |         }
1434 |       ]
1435 |     },
1436 |     {
1437 |       "cell_type": "markdown",
1438 |       "metadata": {
1439 |         "id": "Kh37ENWG5NG2",
1440 |         "colab_type": "text"
1441 |       },
1442 |       "source": [
1443 |         "In some cases, you may need to create ndarrays with random numbers that satisfy certain statistical properties. For example, you may want the random numbers in the ndarray to have an average of 0. NumPy allows you create random ndarrays with numbers drawn from various probability distributions. The function np.random.normal(mean, standard deviation, size=shape), for example, creates an ndarray with the given shape that contains random numbers picked from a normal (Gaussian) distribution with the given mean and standard deviation."
1444 |       ]
1445 |     },
1446 |     {
1447 |       "cell_type": "code",
1448 |       "metadata": {
1449 |         "id": "XyqTesTi2pIf",
1450 |         "colab_type": "code",
1451 |         "colab": {
1452 |           "base_uri": "https://localhost:8080/",
1453 |           "height": 434
1454 |         },
1455 |         "outputId": "d1b19885-6ae1-4f58-828d-e51a7d65f321"
1456 |       },
1457 |       "source": [
1458 |         "# We create a 1000 x 1000 ndarray of random floats drawn from normal (Gaussian) distribution\n",
1459 |         "# with a mean of zero and a standard deviation of 0.1.\n",
1460 |         "X = np.random.normal(0, 0.1, size=(1000,1000))\n",
1461 |         "\n",
1462 |         "# We print X\n",
1463 |         "print()\n",
1464 |         "print('X = \\n', X)\n",
1465 |         "print()\n",
1466 |         "\n",
1467 |         "# We print information about X\n",
1468 |         "print('X has dimensions:', X.shape)\n",
1469 |         "print('X is an object of type:', type(X))\n",
1470 |         "print('The elements in X are of type:', X.dtype)\n",
1471 |         "print('The elements in X have a mean of:', X.mean())\n",
1472 |         "print('The maximum value in X is:', X.max())\n",
1473 |         "print('The minimum value in X is:', X.min())\n",
1474 |         "print('X has', (X < 0).sum(), 'negative numbers')\n",
1475 |         "print('X has', (X > 0).sum(), 'positive numbers')"
1476 |       ],
1477 |       "execution_count": 26,
1478 |       "outputs": [
1479 |         {
1480 |           "output_type": "stream",
1481 |           "text": [
1482 |             "\n",
1483 |             "X = \n",
1484 |             " [[ 0.06038575  0.07058648  0.00651944 ... -0.11510515  0.03274108\n",
1485 |             "  -0.04948388]\n",
1486 |             " [-0.0427396   0.01963788 -0.08193541 ...  0.22104453  0.00997958\n",
1487 |             "  -0.17775136]\n",
1488 |             " [ 0.08125654  0.01597223 -0.00085445 ... -0.1888355  -0.0522047\n",
1489 |             "  -0.03611404]\n",
1490 |             " ...\n",
1491 |             " [-0.05934912 -0.0268068   0.13427054 ... -0.0735276  -0.04742756\n",
1492 |             "  -0.07159635]\n",
1493 |             " [ 0.1307405  -0.07065583 -0.17023093 ...  0.03329357 -0.06680952\n",
1494 |             "  -0.06895992]\n",
1495 |             " [ 0.04490569  0.1388507   0.06680477 ... -0.0169742  -0.04751616\n",
1496 |             "   0.142401  ]]\n",
1497 |             "\n",
1498 |             "X has dimensions: (1000, 1000)\n",
1499 |             "X is an object of type: <class 'numpy.ndarray'>\n",
1500 |             "The elements in X are of type: float64\n",
1501 |             "The elements in X have a mean of: 0.00010345992650649735\n",
1502 |             "The maximum value in X is: 0.49171050961052176\n",
1503 |             "The minimum value in X is: -0.4572065155534312\n",
1504 |             "X has 499752 negative numbers\n",
1505 |             "X has 500248 positive numbers\n"
1506 |           ],
1507 |           "name": "stdout"
1508 |         }
1509 |       ]
1510 |     },
1511 |     {
1512 |       "cell_type": "markdown",
1513 |       "metadata": {
1514 |         "id": "OGKlN5Ui5ZC0",
1515 |         "colab_type": "text"
1516 |       },
1517 |       "source": [
1518 |         "## **Question:**\n",
1519 |         "Using the Built-in functions you learned about in the\n",
1520 |         "previous lesson, create a 4 x 4 ndarray that only\n",
1521 |         "contains consecutive even numbers from 2 to 32 (inclusive)\n"
1522 |       ]
1523 |     },
1524 |     {
1525 |       "cell_type": "code",
1526 |       "metadata": {
1527 |         "id": "fFpdoCad5S37",
1528 |         "colab_type": "code",
1529 |         "colab": {}
1530 |       },
1531 |       "source": [
1532 |         "import numpy as np\n",
1533 |         "\n",
1534 |         "X = "
1535 |       ],
1536 |       "execution_count": null,
1537 |       "outputs": []
1538 |     },
1539 |     {
1540 |       "cell_type": "markdown",
1541 |       "metadata": {
1542 |         "id": "B2q9JDTa5kuw",
1543 |         "colab_type": "text"
1544 |       },
1545 |       "source": [
1546 |         "## **Question:**\n",
1547 |         "Try creating the same array using the np.linspace() function."
1548 |       ]
1549 |     },
1550 |     {
1551 |       "cell_type": "code",
1552 |       "metadata": {
1553 |         "id": "U9LPnzMm5qJL",
1554 |         "colab_type": "code",
1555 |         "colab": {}
1556 |       },
1557 |       "source": [
1558 |         "import numpy as np\n",
1559 |         "\n",
1560 |         "X = "
1561 |       ],
1562 |       "execution_count": null,
1563 |       "outputs": []
1564 |     },
1565 |     {
1566 |       "cell_type": "markdown",
1567 |       "metadata": {
1568 |         "id": "wIbAPL5A7V1N",
1569 |         "colab_type": "text"
1570 |       },
1571 |       "source": [
1572 |         "## **Accessing Elements in ndarays:**\n",
1573 |         "Elements can be accessed using indices inside square brackets, [ ]. NumPy allows you to use both positive and negative indices to access elements in the ndarray. Positive indices are used to access elements from the beginning of the array, while negative indices are used to access elements from the end of the array. "
1574 |       ]
1575 |     },
1576 |     {
1577 |       "cell_type": "code",
1578 |       "metadata": {
1579 |         "id": "ZfjHHvWs7vYO",
1580 |         "colab_type": "code",
1581 |         "colab": {}
1582 |       },
1583 |       "source": [
1584 |         "# We create a rank 1 ndarray that contains integers from 1 to 5\n",
1585 |         "x = np.array([1, 2, 3, 4, 5])\n",
1586 |         "\n",
1587 |         "# We print x\n",
1588 |         "print()\n",
1589 |         "print('x = ', x)\n",
1590 |         "print()\n",
1591 |         "\n",
1592 |         "# Let's access some elements with positive indices\n",
1593 |         "print('This is First Element in x:', x[0]) \n",
1594 |         "print('This is Second Element in x:', x[1])\n",
1595 |         "print('This is Fifth (Last) Element in x:', x[4])\n",
1596 |         "print()\n",
1597 |         "\n",
1598 |         "# Let's access the same elements with negative indices\n",
1599 |         "print('This is First Element in x:', x[-5])\n",
1600 |         "print('This is Second Element in x:', x[-4])\n",
1601 |         "print('This is Fifth (Last) Element in x:', x[-1])"
1602 |       ],
1603 |       "execution_count": null,
1604 |       "outputs": []
1605 |     },
1606 |     {
1607 |       "cell_type": "markdown",
1608 |       "metadata": {
1609 |         "id": "bbYr59qc8arb",
1610 |         "colab_type": "text"
1611 |       },
1612 |       "source": [
1613 |         "## Modifying ndarrays:\n",
1614 |         "Now let's see how we can change the elements in rank 1 ndarrays. We do this by accessing the element we want to change and then using the = sign to assign the new value:"
1615 |       ]
1616 |     },
1617 |     {
1618 |       "cell_type": "code",
1619 |       "metadata": {
1620 |         "id": "yQwCZwSD85Q5",
1621 |         "colab_type": "code",
1622 |         "colab": {
1623 |           "base_uri": "https://localhost:8080/",
1624 |           "height": 137
1625 |         },
1626 |         "outputId": "c7855e58-b67e-46cc-b966-f39ca0180d0a"
1627 |       },
1628 |       "source": [
1629 |         "# We create a rank 1 ndarray that contains integers from 1 to 5\n",
1630 |         "x = np.array([1, 2, 3, 4, 5])\n",
1631 |         "\n",
1632 |         "# We print the original x\n",
1633 |         "print()\n",
1634 |         "print('Original:\\n x = ', x)\n",
1635 |         "print()\n",
1636 |         "\n",
1637 |         "# We change the fourth element in x from 4 to 20\n",
1638 |         "x[3] = 20\n",
1639 |         "\n",
1640 |         "# We print x after it was modified \n",
1641 |         "print('Modified:\\n x = ', x)\n"
1642 |       ],
1643 |       "execution_count": 27,
1644 |       "outputs": [
1645 |         {
1646 |           "output_type": "stream",
1647 |           "text": [
1648 |             "\n",
1649 |             "Original:\n",
1650 |             " x =  [1 2 3 4 5]\n",
1651 |             "\n",
1652 |             "Modified:\n",
1653 |             " x =  [ 1  2  3 20  5]\n"
1654 |           ],
1655 |           "name": "stdout"
1656 |         }
1657 |       ]
1658 |     },
1659 |     {
1660 |       "cell_type": "markdown",
1661 |       "metadata": {
1662 |         "id": "W-G-2CNy8-Kn",
1663 |         "colab_type": "text"
1664 |       },
1665 |       "source": [
1666 |         "Similarly, we can also access and modify specific elements of rank 2 ndarrays. To access elements in rank 2 ndarrays we need to provide 2 indices in the form [row, column]. Let's see some examples"
1667 |       ]
1668 |     },
1669 |     {
1670 |       "cell_type": "code",
1671 |       "metadata": {
1672 |         "id": "fHV8Q1Ik863v",
1673 |         "colab_type": "code",
1674 |         "colab": {
1675 |           "base_uri": "https://localhost:8080/",
1676 |           "height": 186
1677 |         },
1678 |         "outputId": "e580a229-0b25-40ff-f7a8-20749eb84ea3"
1679 |       },
1680 |       "source": [
1681 |         "# We create a 3 x 3 rank 2 ndarray that contains integers from 1 to 9\n",
1682 |         "X = np.array([[1,2,3],[4,5,6],[7,8,9]])\n",
1683 |         "\n",
1684 |         "# We print X\n",
1685 |         "print()\n",
1686 |         "print('X = \\n', X)\n",
1687 |         "print()\n",
1688 |         "\n",
1689 |         "# Let's access some elements in X\n",
1690 |         "print('This is (0,0) Element in X:', X[0,0])\n",
1691 |         "print('This is (0,1) Element in X:', X[0,1])\n",
1692 |         "print('This is (2,2) Element in X:', X[2,2])"
1693 |       ],
1694 |       "execution_count": 28,
1695 |       "outputs": [
1696 |         {
1697 |           "output_type": "stream",
1698 |           "text": [
1699 |             "\n",
1700 |             "X = \n",
1701 |             " [[1 2 3]\n",
1702 |             " [4 5 6]\n",
1703 |             " [7 8 9]]\n",
1704 |             "\n",
1705 |             "This is (0,0) Element in X: 1\n",
1706 |             "This is (0,1) Element in X: 2\n",
1707 |             "This is (2,2) Element in X: 9\n"
1708 |           ],
1709 |           "name": "stdout"
1710 |         }
1711 |       ]
1712 |     },
1713 |     {
1714 |       "cell_type": "markdown",
1715 |       "metadata": {
1716 |         "id": "kgQ4DUxZ9FPK",
1717 |         "colab_type": "text"
1718 |       },
1719 |       "source": [
1720 |         "Elements in rank 2 ndarrays can be modified in the same way as with rank 1 ndarrays. Let's see an example:"
1721 |       ]
1722 |     },
1723 |     {
1724 |       "cell_type": "code",
1725 |       "metadata": {
1726 |         "id": "uMP5cJ-P9A1P",
1727 |         "colab_type": "code",
1728 |         "colab": {
1729 |           "base_uri": "https://localhost:8080/",
1730 |           "height": 236
1731 |         },
1732 |         "outputId": "b0faf293-8a41-49b6-fe30-6f498201538e"
1733 |       },
1734 |       "source": [
1735 |         "# We create a 3 x 3 rank 2 ndarray that contains integers from 1 to 9\n",
1736 |         "X = np.array([[1,2,3],[4,5,6],[7,8,9]])\n",
1737 |         "\n",
1738 |         "# We print the original x\n",
1739 |         "print()\n",
1740 |         "print('Original:\\n X = \\n', X)\n",
1741 |         "print()\n",
1742 |         "\n",
1743 |         "# We change the (0,0) element in X from 1 to 20\n",
1744 |         "X[0,0] = 20\n",
1745 |         "\n",
1746 |         "# We print X after it was modified \n",
1747 |         "print('Modified:\\n X = \\n', X)\n"
1748 |       ],
1749 |       "execution_count": 29,
1750 |       "outputs": [
1751 |         {
1752 |           "output_type": "stream",
1753 |           "text": [
1754 |             "\n",
1755 |             "Original:\n",
1756 |             " X = \n",
1757 |             " [[1 2 3]\n",
1758 |             " [4 5 6]\n",
1759 |             " [7 8 9]]\n",
1760 |             "\n",
1761 |             "Modified:\n",
1762 |             " X = \n",
1763 |             " [[20  2  3]\n",
1764 |             " [ 4  5  6]\n",
1765 |             " [ 7  8  9]]\n"
1766 |           ],
1767 |           "name": "stdout"
1768 |         }
1769 |       ]
1770 |     },
1771 |     {
1772 |       "cell_type": "markdown",
1773 |       "metadata": {
1774 |         "id": "ZWSEPc159LEG",
1775 |         "colab_type": "text"
1776 |       },
1777 |       "source": [
1778 |         "## **Adding and Deleting elements:**\n",
1779 |         "Now, let's take a look at how we can add and delete elements from ndarrays. We can delete elements using the np.delete(ndarray, elements, axis) function. This function deletes the given list of elements from the given ndarray along the specified axis. For rank 1 ndarrays the axis keyword is not required. For rank 2 ndarrays, axis = 0 is used to select rows, and axis = 1 is used to select columns. Let's see some examples:"
1780 |       ]
1781 |     },
1782 |     {
1783 |       "cell_type": "code",
1784 |       "metadata": {
1785 |         "id": "DRB_V6dd9HlM",
1786 |         "colab_type": "code",
1787 |         "colab": {
1788 |           "base_uri": "https://localhost:8080/",
1789 |           "height": 335
1790 |         },
1791 |         "outputId": "b266340c-159d-42ee-d627-a109d0ccb75b"
1792 |       },
1793 |       "source": [
1794 |         "# We create a rank 1 ndarray \n",
1795 |         "x = np.array([1, 2, 3, 4, 5])\n",
1796 |         "\n",
1797 |         "# We create a rank 2 ndarray\n",
1798 |         "Y = np.array([[1,2,3],[4,5,6],[7,8,9]])\n",
1799 |         "\n",
1800 |         "# We print x\n",
1801 |         "print()\n",
1802 |         "print('Original x = ', x)\n",
1803 |         "\n",
1804 |         "# We delete the first and last element of x\n",
1805 |         "x = np.delete(x, [0,4])\n",
1806 |         "\n",
1807 |         "# We print x with the first and last element deleted\n",
1808 |         "print()\n",
1809 |         "print('Modified x = ', x)\n",
1810 |         "\n",
1811 |         "# We print Y\n",
1812 |         "print()\n",
1813 |         "print('Original Y = \\n', Y)\n",
1814 |         "\n",
1815 |         "# We delete the first row of y\n",
1816 |         "w = np.delete(Y, 0, axis=0)\n",
1817 |         "\n",
1818 |         "# We delete the first and last column of y\n",
1819 |         "v = np.delete(Y, [0,2], axis=1)\n",
1820 |         "\n",
1821 |         "# We print w\n",
1822 |         "print()\n",
1823 |         "print('w = \\n', w)\n",
1824 |         "\n",
1825 |         "# We print v\n",
1826 |         "print()\n",
1827 |         "print('v = \\n', v)"
1828 |       ],
1829 |       "execution_count": 30,
1830 |       "outputs": [
1831 |         {
1832 |           "output_type": "stream",
1833 |           "text": [
1834 |             "\n",
1835 |             "Original x =  [1 2 3 4 5]\n",
1836 |             "\n",
1837 |             "Modified x =  [2 3 4]\n",
1838 |             "\n",
1839 |             "Original Y = \n",
1840 |             " [[1 2 3]\n",
1841 |             " [4 5 6]\n",
1842 |             " [7 8 9]]\n",
1843 |             "\n",
1844 |             "w = \n",
1845 |             " [[4 5 6]\n",
1846 |             " [7 8 9]]\n",
1847 |             "\n",
1848 |             "v = \n",
1849 |             " [[2]\n",
1850 |             " [5]\n",
1851 |             " [8]]\n"
1852 |           ],
1853 |           "name": "stdout"
1854 |         }
1855 |       ]
1856 |     },
1857 |     {
1858 |       "cell_type": "markdown",
1859 |       "metadata": {
1860 |         "id": "Nqqwxir99XFI",
1861 |         "colab_type": "text"
1862 |       },
1863 |       "source": [
1864 |         "We can append values to ndarrays using the np.append(ndarray, elements, axis) function. This function appends the given list of elements to ndarray along the specified axis. Let's see some examples:"
1865 |       ]
1866 |     },
1867 |     {
1868 |       "cell_type": "code",
1869 |       "metadata": {
1870 |         "id": "Jv73CPgz9UYw",
1871 |         "colab_type": "code",
1872 |         "colab": {
1873 |           "base_uri": "https://localhost:8080/",
1874 |           "height": 352
1875 |         },
1876 |         "outputId": "06adb68e-fe58-42eb-ee4e-56c8b2958ca8"
1877 |       },
1878 |       "source": [
1879 |         "# We create a rank 1 ndarray \n",
1880 |         "x = np.array([1, 2, 3, 4, 5])\n",
1881 |         "\n",
1882 |         "# We create a rank 2 ndarray \n",
1883 |         "Y = np.array([[1,2,3],[4,5,6]])\n",
1884 |         "\n",
1885 |         "# We print x\n",
1886 |         "print()\n",
1887 |         "print('Original x = ', x)\n",
1888 |         "\n",
1889 |         "# We append the integer 6 to x\n",
1890 |         "x = np.append(x, 6)\n",
1891 |         "\n",
1892 |         "# We print x\n",
1893 |         "print()\n",
1894 |         "print('x = ', x)\n",
1895 |         "\n",
1896 |         "# We append the integer 7 and 8 to x\n",
1897 |         "x = np.append(x, [7,8])\n",
1898 |         "\n",
1899 |         "# We print x\n",
1900 |         "print()\n",
1901 |         "print('x = ', x)\n",
1902 |         "\n",
1903 |         "# We print Y\n",
1904 |         "print()\n",
1905 |         "print('Original Y = \\n', Y)\n",
1906 |         "\n",
1907 |         "# We append a new row containing 7,8,9 to y\n",
1908 |         "v = np.append(Y, [[7,8,9]], axis=0)\n",
1909 |         "\n",
1910 |         "# We append a new column containing 9 and 10 to y\n",
1911 |         "q = np.append(Y,[[9],[10]], axis=1)\n",
1912 |         "\n",
1913 |         "# We print v\n",
1914 |         "print()\n",
1915 |         "print('v = \\n', v)\n",
1916 |         "\n",
1917 |         "# We print q\n",
1918 |         "print()\n",
1919 |         "print('q = \\n', q)"
1920 |       ],
1921 |       "execution_count": 31,
1922 |       "outputs": [
1923 |         {
1924 |           "output_type": "stream",
1925 |           "text": [
1926 |             "\n",
1927 |             "Original x =  [1 2 3 4 5]\n",
1928 |             "\n",
1929 |             "x =  [1 2 3 4 5 6]\n",
1930 |             "\n",
1931 |             "x =  [1 2 3 4 5 6 7 8]\n",
1932 |             "\n",
1933 |             "Original Y = \n",
1934 |             " [[1 2 3]\n",
1935 |             " [4 5 6]]\n",
1936 |             "\n",
1937 |             "v = \n",
1938 |             " [[1 2 3]\n",
1939 |             " [4 5 6]\n",
1940 |             " [7 8 9]]\n",
1941 |             "\n",
1942 |             "q = \n",
1943 |             " [[ 1  2  3  9]\n",
1944 |             " [ 4  5  6 10]]\n"
1945 |           ],
1946 |           "name": "stdout"
1947 |         }
1948 |       ]
1949 |     },
1950 |     {
1951 |       "cell_type": "markdown",
1952 |       "metadata": {
1953 |         "id": "WsIxKWUN9gRr",
1954 |         "colab_type": "text"
1955 |       },
1956 |       "source": [
1957 |         "Now let's see now how we can insert values to ndarrays. We can insert values to ndarrays using the np.insert(ndarray, index, elements, axis) function. This function inserts the given list of elements to ndarray right before the given index along the specified axis. Let's see some examples:"
1958 |       ]
1959 |     },
1960 |     {
1961 |       "cell_type": "code",
1962 |       "metadata": {
1963 |         "id": "7oFcymIm9bTR",
1964 |         "colab_type": "code",
1965 |         "colab": {}
1966 |       },
1967 |       "source": [
1968 |         "# We create a rank 1 ndarray \n",
1969 |         "x = np.array([1, 2, 5, 6, 7])\n",
1970 |         "\n",
1971 |         "# We create a rank 2 ndarray \n",
1972 |         "Y = np.array([[1,2,3],[7,8,9]])\n",
1973 |         "\n",
1974 |         "# We print x\n",
1975 |         "print()\n",
1976 |         "print('Original x = ', x)\n",
1977 |         "\n",
1978 |         "# We insert the integer 3 and 4 between 2 and 5 in x. \n",
1979 |         "x = np.insert(x,2,[3,4])\n",
1980 |         "\n",
1981 |         "# We print x with the inserted elements\n",
1982 |         "print()\n",
1983 |         "print('x = ', x)\n",
1984 |         "\n",
1985 |         "# We print Y\n",
1986 |         "print()\n",
1987 |         "print('Original Y = \\n', Y)\n",
1988 |         "\n",
1989 |         "# We insert a row between the first and last row of y\n",
1990 |         "w = np.insert(Y,1,[4,5,6],axis=0)\n",
1991 |         "\n",
1992 |         "# We insert a column full of 5s between the first and second column of y\n",
1993 |         "v = np.insert(Y,1,5, axis=1)\n",
1994 |         "\n",
1995 |         "# We print w\n",
1996 |         "print()\n",
1997 |         "print('w = \\n', w)\n",
1998 |         "\n",
1999 |         "# We print v\n",
2000 |         "print()\n",
2001 |         "print('v = \\n', v)"
2002 |       ],
2003 |       "execution_count": null,
2004 |       "outputs": []
2005 |     },
2006 |     {
2007 |       "cell_type": "markdown",
2008 |       "metadata": {
2009 |         "id": "S0z5oy379mKd",
2010 |         "colab_type": "text"
2011 |       },
2012 |       "source": [
2013 |         "NumPy also allows us to stack ndarrays on top of each other, or to stack them side by side. The stacking is done using either the np.vstack() function for vertical stacking, or the np.hstack() function for horizontal stacking. It is important to note that in order to stack ndarrays, the shape of the ndarrays must match. Let's see some examples:"
2014 |       ]
2015 |     },
2016 |     {
2017 |       "cell_type": "code",
2018 |       "metadata": {
2019 |         "id": "J4iQxGR_9n9A",
2020 |         "colab_type": "code",
2021 |         "colab": {
2022 |           "base_uri": "https://localhost:8080/",
2023 |           "height": 286
2024 |         },
2025 |         "outputId": "ae6d63e0-d787-4128-b42d-850bb8cfe617"
2026 |       },
2027 |       "source": [
2028 |         "# We create a rank 1 ndarray \n",
2029 |         "x = np.array([1,2])\n",
2030 |         "\n",
2031 |         "# We create a rank 2 ndarray \n",
2032 |         "Y = np.array([[3,4],[5,6]])\n",
2033 |         "\n",
2034 |         "# We print x\n",
2035 |         "print()\n",
2036 |         "print('x = ', x)\n",
2037 |         "\n",
2038 |         "# We print Y\n",
2039 |         "print()\n",
2040 |         "print('Y = \\n', Y)\n",
2041 |         "\n",
2042 |         "# We stack x on top of Y\n",
2043 |         "z = np.vstack((x,Y))\n",
2044 |         "\n",
2045 |         "# We stack x on the right of Y. We need to reshape x in order to stack it on the right of Y. \n",
2046 |         "w = np.hstack((Y,x.reshape(2,1)))\n",
2047 |         "\n",
2048 |         "# We print z\n",
2049 |         "print()\n",
2050 |         "print('z = \\n', z)\n",
2051 |         "\n",
2052 |         "# We print w\n",
2053 |         "print()\n",
2054 |         "print('w = \\n', w)"
2055 |       ],
2056 |       "execution_count": 32,
2057 |       "outputs": [
2058 |         {
2059 |           "output_type": "stream",
2060 |           "text": [
2061 |             "\n",
2062 |             "x =  [1 2]\n",
2063 |             "\n",
2064 |             "Y = \n",
2065 |             " [[3 4]\n",
2066 |             " [5 6]]\n",
2067 |             "\n",
2068 |             "z = \n",
2069 |             " [[1 2]\n",
2070 |             " [3 4]\n",
2071 |             " [5 6]]\n",
2072 |             "\n",
2073 |             "w = \n",
2074 |             " [[3 4 1]\n",
2075 |             " [5 6 2]]\n"
2076 |           ],
2077 |           "name": "stdout"
2078 |         }
2079 |       ]
2080 |     },
2081 |     {
2082 |       "cell_type": "code",
2083 |       "metadata": {
2084 |         "id": "NoFIMQQu9ptw",
2085 |         "colab_type": "code",
2086 |         "colab": {}
2087 |       },
2088 |       "source": [
2089 |         ""
2090 |       ],
2091 |       "execution_count": null,
2092 |       "outputs": []
2093 |     }
2094 |   ]
2095 | }


--------------------------------------------------------------------------------