├── .gitignore
├── images
    ├── scipy.png
    ├── ndarray.png
    ├── df_inside.png
    ├── df_outside.png
    ├── reference.png
    ├── iris_setosa.jpg
    ├── storage_index.png
    ├── df_inside_numpy.png
    ├── euroscipy_logo.png
    ├── iris_versicolor.jpg
    ├── iris_virginica.jpg
    ├── storage_simple.png
    └── ndarray_with_details.png
├── requirements.txt
├── LICENSE
├── README.md
├── 00_tutorial_intro.ipynb
├── 06_numpy_internals.ipynb
├── 04_sparse_matrices.ipynb
├── 05_memmapping.ipynb
├── extra_torch_tensor.ipynb
├── 03_numpy_io_matlab.ipynb
└── 01_numpy_basics.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/*
2 | *.pyc
3 | 
4 | 


--------------------------------------------------------------------------------
/images/scipy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/scipy.png


--------------------------------------------------------------------------------
/images/ndarray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/ndarray.png


--------------------------------------------------------------------------------
/images/df_inside.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/df_inside.png


--------------------------------------------------------------------------------
/images/df_outside.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/df_outside.png


--------------------------------------------------------------------------------
/images/reference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/reference.png


--------------------------------------------------------------------------------
/images/iris_setosa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/iris_setosa.jpg


--------------------------------------------------------------------------------
/images/storage_index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/storage_index.png


--------------------------------------------------------------------------------
/images/df_inside_numpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/df_inside_numpy.png


--------------------------------------------------------------------------------
/images/euroscipy_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/euroscipy_logo.png


--------------------------------------------------------------------------------
/images/iris_versicolor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/iris_versicolor.jpg


--------------------------------------------------------------------------------
/images/iris_virginica.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/iris_virginica.jpg


--------------------------------------------------------------------------------
/images/storage_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/storage_simple.png


--------------------------------------------------------------------------------
/images/ndarray_with_details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/ndarray_with_details.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ipykernel==5.1.2
 2 | jupyter-client==5.3.1
 3 | jupyter-console==6.0.0
 4 | jupyter-core==4.4.0
 5 | notebook==6.4.12
 6 | numpy==1.22.0
 7 | # requirements for the "advanced" part
 8 | scipy==1.3.1
 9 | torch==1.2.0
10 | 
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Valerio Maggio
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Never get in battle of bits without ammunitions
 2 | 
 3 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leriomaggio/numpy-euroscipy/master)
 4 | 
 5 | **Title Credits**: Gentle reference to the homonymous [talk](https://pyvideo.org/europython-2013/never-get-in-a-battle-of-bits-without-ammunition.html) presented at 
 6 | **EuroPython 2013** in Florence by my friend **riko** (_a.k.a._ Enrico Franchi ).
 7 | 
 8 | ## Abstract
 9 | 
10 | The `numpy` package takes a central role in Python scientific ecosystem. 
11 | This is mainly because `numpy` code has been designed with
12 | high performance in mind. 
13 | 
14 | This tutorial will provide materials for the most essential concepts
15 | to become confident with `numpy` and `ndarray` in (a matter of) `90 mins`.
16 | 
17 | # Outline
18 | 
19 | **Part I** Numpy Basics
20 | 
21 | - Introduction to NumPy Arrays
22 |     - numpy internals schematics
23 |     - Reshaping and Resizing
24 | - Numerical Data Types
25 |     - Record Array
26 |     
27 |     
28 | **Part II** Indexing and Slicing
29 |     
30 | - Indexing numpy arrays
31 |     - fancy indexing
32 |     - array masking
33 | - Slicing & Stacking
34 | - Vectorization & Broadcasting
35 | 
36 | **Part III** "Advanced NumPy"
37 | 
38 | - Serialisation & I/O
39 |     - `.mat` files
40 | - Array and Matrix
41 |     - Matlab compatibility
42 | - Memmap 
43 | - Bits of Data Science with NumPy
44 | - NumPy beyond `numpy`
45 | 
46 | ### Python version
47 | 
48 | The minimum recommended version of Python to use for this tutorial is **Python 3.5**, although 
49 | Python 2.7 should be fine, as well as previous versions of Python 3. 
50 | 
51 | Py3.5+ is recommended due to a reference to the `@` operator in the linear algebra notebook.
52 | 
53 | ## License and Sharing Material
54 | 
55 | <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-sa/4.0/80x15.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>.
56 | 


--------------------------------------------------------------------------------
/00_tutorial_intro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NumPy Tutorial @ EuroSciPy 2019\n",
  8 |     "\n",
  9 |     "<img style=\"float: left\" src=\"images/euroscipy_logo.png\" height=\"100%\" />\n",
 10 |     "\n",
 11 |     " "
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Goal of this Tutorial\n",
 19 |     "\n",
 20 |     "- Introduce the basics of scientific and numerical computation in Python using **Numpy**\n",
 21 |     "- Understand why `numpy` has a central role in the Python scientific ecosystem\n",
 22 |     "        "
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# Outline\n",
 30 |     "\n",
 31 |     "**11:00 - 11:45** (_45 mins_) Numpy Basics\n",
 32 |     "\n",
 33 |     "- Introduction to NumPy Arrays\n",
 34 |     "    - numpy internals schematics\n",
 35 |     "    - Reshaping and Resizing\n",
 36 |     "- Numerical Data Types\n",
 37 |     "    - Record Array\n",
 38 |     "    \n",
 39 |     "**11:50 - 12:30** (_40 mins_) Indexing and Slicing\n",
 40 |     "    \n",
 41 |     "- Indexing numpy arrays\n",
 42 |     "    - fancy indexing\n",
 43 |     "    - array masking\n",
 44 |     "- Slicing & Stacking\n",
 45 |     "- Vectorization & Broadcasting\n",
 46 |     "\n",
 47 |     "**Follow up** \"Advanced NumPy: Bits of Data Science with NumPy\n",
 48 |     "\n",
 49 |     "- Serialisation & I/O\n",
 50 |     "    - `.mat` files\n",
 51 |     "- Array and Matrix\n",
 52 |     "    - Matlab compatibility\n",
 53 |     "- Sparse Matrices\n",
 54 |     "- Memmap \n",
 55 |     "- Ubiquitous NumPy: NumPy beyond `numpy`"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "# Requirements"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "This tutorial has one main requirement: `numpy`.\n",
 70 |     "\n",
 71 |     "Materials are provided as Jupyter notebooks, so IPython notebook (`pip install notebook`) is also required.\n",
 72 |     "\n",
 73 |     "#### Advanced Part\n",
 74 |     "\n",
 75 |     "This part has more dependencies: `scipy`, `scikit-learn`, `matplotlib`, `torch`.\n",
 76 |     "All these dependencies have been collected in the `requirements.txt` file:\n",
 77 |     "\n",
 78 |     "```\n",
 79 |     "$ pip install -r requirements.txt\n",
 80 |     "```\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "### Python version\n",
 84 |     "\n",
 85 |     "The minimum recommended version of Python to use for this tutorial is **Python 3.5**, although \n",
 86 |     "Python 2.7 should be fine, as well as previous versions of Python 3. \n",
 87 |     "\n",
 88 |     "Py3.5+ is recommended due to a reference to the `@` operator in the linear algebra notebook.\n",
 89 |     "\n",
 90 |     "\n"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "# MyBinder\n",
 98 |     "\n",
 99 |     "If you don't want to bother setting up the environment on your own computer, you can use MyBinder\n",
100 |     "\n",
101 |     "(**Note**: recommended only with a proper Wi-Fi connection)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leriomaggio/numpy-euroscipy/master)"
109 |    ]
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
115 |    "language": "python",
116 |    "name": "numpy-euroscipy"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.7.3"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 4
133 | }
134 | 


--------------------------------------------------------------------------------
/06_numpy_internals.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Understanding NumPy Internals\n",
  8 |     "\n",
  9 |     "We can achieve significant performance speed enhancement with NumPy over native Python code, particularly when our computations follow the **Single Instruction, Multiple Data (SIMD)** paradigm. "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {
 24 |     "slideshow": {
 25 |      "slide_type": "slide"
 26 |     }
 27 |    },
 28 |    "source": [
 29 |     "## Copy and \"deep copy\""
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {
 35 |     "slideshow": {
 36 |      "slide_type": "subslide"
 37 |     }
 38 |    },
 39 |    "source": [
 40 |     "To achieve high performance, assignments in Python usually do not copy the underlaying objects. \n",
 41 |     "\n",
 42 |     "This is important for example when objects are passed between functions, to avoid an excessive amount of memory copying when it is not necessary (techincal term: **pass by reference**).\n",
 43 |     "\n",
 44 |     "<img src=\"images/reference.png\" />"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "First, we need a way to check whether two arrays share the same underlying data buffer in memory. \n",
 52 |     "\n",
 53 |     "Let's define a function `aid()` that returns the memory location of the underlying data buffer:"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 3,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "def aid(x):\n",
 63 |     "    # This function returns the memory\n",
 64 |     "    # block address of an array.\n",
 65 |     "    return x.__array_interface__['data'][0]"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "Two arrays with the same data location (as returned by `aid()`) share the same underlying data buffer. \n",
 73 |     "\n",
 74 |     "However, the opposite is true only if the arrays have the same offset (meaning that they have the same first element). "
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {
 81 |     "collapsed": false,
 82 |     "jupyter": {
 83 |      "outputs_hidden": false
 84 |     },
 85 |     "slideshow": {
 86 |      "slide_type": "subslide"
 87 |     }
 88 |    },
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "array([[1, 2],\n",
 94 |        "       [3, 4]])"
 95 |       ]
 96 |      },
 97 |      "execution_count": 4,
 98 |      "metadata": {},
 99 |      "output_type": "execute_result"
100 |     }
101 |    ],
102 |    "source": [
103 |     "A = np.array([[1, 2], [3, 4]])\n",
104 |     "\n",
105 |     "A"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {
112 |     "collapsed": false,
113 |     "jupyter": {
114 |      "outputs_hidden": false
115 |     },
116 |     "slideshow": {
117 |      "slide_type": "fragment"
118 |     }
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "# now B is referring to the same array data as A \n",
123 |     "B = A "
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 6,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "True"
135 |       ]
136 |      },
137 |      "execution_count": 6,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "aid(A) == aid(B)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 7,
149 |    "metadata": {
150 |     "collapsed": false,
151 |     "jupyter": {
152 |      "outputs_hidden": false
153 |     },
154 |     "slideshow": {
155 |      "slide_type": "fragment"
156 |     }
157 |    },
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "array([[10,  2],\n",
163 |        "       [ 3,  4]])"
164 |       ]
165 |      },
166 |      "execution_count": 7,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "# changing B affects A\n",
173 |     "B[0,0] = 10\n",
174 |     "\n",
175 |     "B"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 8,
181 |    "metadata": {
182 |     "collapsed": false,
183 |     "jupyter": {
184 |      "outputs_hidden": false
185 |     },
186 |     "slideshow": {
187 |      "slide_type": "fragment"
188 |     }
189 |    },
190 |    "outputs": [
191 |     {
192 |      "data": {
193 |       "text/plain": [
194 |        "array([[10,  2],\n",
195 |        "       [ 3,  4]])"
196 |       ]
197 |      },
198 |      "execution_count": 8,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "A"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {
210 |     "slideshow": {
211 |      "slide_type": "subslide"
212 |     }
213 |    },
214 |    "source": [
215 |     "* If we want to **avoid** this behavior, so that when we get a new completely independent object `B` copied from `A`, then we need to do a so-called **deep copy** using the function `np.copy`:"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 9,
221 |    "metadata": {
222 |     "collapsed": false,
223 |     "jupyter": {
224 |      "outputs_hidden": false
225 |     },
226 |     "slideshow": {
227 |      "slide_type": "fragment"
228 |     }
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "B = np.copy(A)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 10,
238 |    "metadata": {
239 |     "collapsed": false,
240 |     "jupyter": {
241 |      "outputs_hidden": false
242 |     },
243 |     "slideshow": {
244 |      "slide_type": "fragment"
245 |     }
246 |    },
247 |    "outputs": [
248 |     {
249 |      "data": {
250 |       "text/plain": [
251 |        "array([[-5,  2],\n",
252 |        "       [ 3,  4]])"
253 |       ]
254 |      },
255 |      "execution_count": 10,
256 |      "metadata": {},
257 |      "output_type": "execute_result"
258 |     }
259 |    ],
260 |    "source": [
261 |     "# now, if we modify B, A is not affected\n",
262 |     "B[0,0] = -5\n",
263 |     "\n",
264 |     "B"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 11,
270 |    "metadata": {
271 |     "collapsed": false,
272 |     "jupyter": {
273 |      "outputs_hidden": false
274 |     },
275 |     "slideshow": {
276 |      "slide_type": "fragment"
277 |     }
278 |    },
279 |    "outputs": [
280 |     {
281 |      "data": {
282 |       "text/plain": [
283 |        "array([[10,  2],\n",
284 |        "       [ 3,  4]])"
285 |       ]
286 |      },
287 |      "execution_count": 11,
288 |      "metadata": {},
289 |      "output_type": "execute_result"
290 |     }
291 |    ],
292 |    "source": [
293 |     "A"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 13,
299 |    "metadata": {},
300 |    "outputs": [
301 |     {
302 |      "data": {
303 |       "text/plain": [
304 |        "False"
305 |       ]
306 |      },
307 |      "execution_count": 13,
308 |      "metadata": {},
309 |      "output_type": "execute_result"
310 |     }
311 |    ],
312 |    "source": [
313 |     "aid(A) == aid(B)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "---"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": []
329 |   }
330 |  ],
331 |  "metadata": {
332 |   "kernelspec": {
333 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
334 |    "language": "python",
335 |    "name": "numpy-euroscipy"
336 |   },
337 |   "language_info": {
338 |    "codemirror_mode": {
339 |     "name": "ipython",
340 |     "version": 3
341 |    },
342 |    "file_extension": ".py",
343 |    "mimetype": "text/x-python",
344 |    "name": "python",
345 |    "nbconvert_exporter": "python",
346 |    "pygments_lexer": "ipython3",
347 |    "version": "3.7.3"
348 |   }
349 |  },
350 |  "nbformat": 4,
351 |  "nbformat_minor": 4
352 | }
353 | 


--------------------------------------------------------------------------------
/04_sparse_matrices.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "![scipy](images/scipy.png)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "slideshow": {
 14 |      "slide_type": "slide"
 15 |     }
 16 |    },
 17 |    "source": [
 18 |     "# Scipy Sparse Matrices"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {
 24 |     "slideshow": {
 25 |      "slide_type": "subslide"
 26 |     }
 27 |    },
 28 |    "source": [
 29 |     "**Sparse Matrices** are very nice in some situations.  \n",
 30 |     "\n",
 31 |     "For example, in some machine learning tasks, especially those associated\n",
 32 |     "with textual analysis, the data may be mostly zeros.  \n",
 33 |     "\n",
 34 |     "Storing all these zeros is very inefficient.  \n",
 35 |     "\n",
 36 |     "We can create and manipulate sparse matrices as follows:"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 1,
 42 |    "metadata": {
 43 |     "slideshow": {
 44 |      "slide_type": "skip"
 45 |     }
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import numpy as np"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 2,
 55 |    "metadata": {
 56 |     "collapsed": false,
 57 |     "jupyter": {
 58 |      "outputs_hidden": false
 59 |     },
 60 |     "slideshow": {
 61 |      "slide_type": "subslide"
 62 |     }
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "name": "stdout",
 67 |      "output_type": "stream",
 68 |      "text": [
 69 |       "[[0.52508939 0.55969684 0.38059541 0.14994033 0.3561533 ]\n",
 70 |       " [0.94612104 0.20796991 0.18345058 0.03266521 0.71642811]\n",
 71 |       " [0.76801146 0.18143891 0.44346617 0.3509763  0.70771478]\n",
 72 |       " [0.96785438 0.64010409 0.20666769 0.99005094 0.42858088]\n",
 73 |       " [0.24971981 0.88585392 0.1683662  0.70119483 0.48374682]\n",
 74 |       " [0.01736319 0.87369042 0.19830546 0.56395574 0.20060824]\n",
 75 |       " [0.11881578 0.65524562 0.21570217 0.02114718 0.8527528 ]\n",
 76 |       " [0.7722977  0.44208694 0.01126588 0.80556187 0.07607147]\n",
 77 |       " [0.75409907 0.78761663 0.41863968 0.30373673 0.63332945]\n",
 78 |       " [0.99874432 0.37336682 0.14359151 0.76142434 0.1988419 ]]\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "# Create a random array with a lot of zeros\n",
 84 |     "X = np.random.random((10, 5))\n",
 85 |     "print(X)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 4,
 91 |    "metadata": {
 92 |     "collapsed": false,
 93 |     "jupyter": {
 94 |      "outputs_hidden": false
 95 |     },
 96 |     "slideshow": {
 97 |      "slide_type": "subslide"
 98 |     }
 99 |    },
100 |    "outputs": [
101 |     {
102 |      "name": "stdout",
103 |      "output_type": "stream",
104 |      "text": [
105 |       "[[0.         0.         0.         0.         0.        ]\n",
106 |       " [0.94612104 0.         0.         0.         0.71642811]\n",
107 |       " [0.76801146 0.         0.         0.         0.70771478]\n",
108 |       " [0.96785438 0.         0.         0.99005094 0.        ]\n",
109 |       " [0.         0.88585392 0.         0.70119483 0.        ]\n",
110 |       " [0.         0.87369042 0.         0.         0.        ]\n",
111 |       " [0.         0.         0.         0.         0.8527528 ]\n",
112 |       " [0.7722977  0.         0.         0.80556187 0.        ]\n",
113 |       " [0.75409907 0.78761663 0.         0.         0.        ]\n",
114 |       " [0.99874432 0.         0.         0.76142434 0.        ]]\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "X[X < 0.7] = 0  # note: fancy indexing\n",
120 |     "print(X)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 5,
126 |    "metadata": {
127 |     "collapsed": false,
128 |     "jupyter": {
129 |      "outputs_hidden": false
130 |     },
131 |     "slideshow": {
132 |      "slide_type": "subslide"
133 |     }
134 |    },
135 |    "outputs": [
136 |     {
137 |      "name": "stdout",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "  (1, 0)\t0.9461210440608149\n",
141 |       "  (1, 4)\t0.7164281142304602\n",
142 |       "  (2, 0)\t0.7680114556976801\n",
143 |       "  (2, 4)\t0.7077147754658187\n",
144 |       "  (3, 0)\t0.9678543752795629\n",
145 |       "  (3, 3)\t0.9900509407165115\n",
146 |       "  (4, 1)\t0.8858539179438214\n",
147 |       "  (4, 3)\t0.7011948276939008\n",
148 |       "  (5, 1)\t0.8736904234085155\n",
149 |       "  (6, 4)\t0.8527528049269587\n",
150 |       "  (7, 0)\t0.7722977020522017\n",
151 |       "  (7, 3)\t0.8055618728634483\n",
152 |       "  (8, 0)\t0.7540990714791828\n",
153 |       "  (8, 1)\t0.7876166309534933\n",
154 |       "  (9, 0)\t0.9987443167367364\n",
155 |       "  (9, 3)\t0.7614243372618548\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "from scipy import sparse\n",
161 |     "\n",
162 |     "# turn X into a csr (Compressed-Sparse-Row) matrix\n",
163 |     "X_csr = sparse.csr_matrix(X)\n",
164 |     "print(X_csr)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 6,
170 |    "metadata": {
171 |     "collapsed": false,
172 |     "jupyter": {
173 |      "outputs_hidden": false
174 |     },
175 |     "slideshow": {
176 |      "slide_type": "subslide"
177 |     }
178 |    },
179 |    "outputs": [
180 |     {
181 |      "name": "stdout",
182 |      "output_type": "stream",
183 |      "text": [
184 |       "[[0.         0.         0.         0.         0.        ]\n",
185 |       " [0.94612104 0.         0.         0.         0.71642811]\n",
186 |       " [0.76801146 0.         0.         0.         0.70771478]\n",
187 |       " [0.96785438 0.         0.         0.99005094 0.        ]\n",
188 |       " [0.         0.88585392 0.         0.70119483 0.        ]\n",
189 |       " [0.         0.87369042 0.         0.         0.        ]\n",
190 |       " [0.         0.         0.         0.         0.8527528 ]\n",
191 |       " [0.7722977  0.         0.         0.80556187 0.        ]\n",
192 |       " [0.75409907 0.78761663 0.         0.         0.        ]\n",
193 |       " [0.99874432 0.         0.         0.76142434 0.        ]]\n"
194 |      ]
195 |     }
196 |    ],
197 |    "source": [
198 |     "# convert the sparse matrix to a dense array\n",
199 |     "print(X_csr.toarray())"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 7,
205 |    "metadata": {
206 |     "collapsed": false,
207 |     "jupyter": {
208 |      "outputs_hidden": false
209 |     },
210 |     "slideshow": {
211 |      "slide_type": "subslide"
212 |     }
213 |    },
214 |    "outputs": [
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "True"
219 |       ]
220 |      },
221 |      "execution_count": 7,
222 |      "metadata": {},
223 |      "output_type": "execute_result"
224 |     }
225 |    ],
226 |    "source": [
227 |     "# Sparse matrices support linear algebra:\n",
228 |     "y = np.random.random(X_csr.shape[1])\n",
229 |     "z1 = X_csr.dot(y)\n",
230 |     "z2 = X.dot(y)\n",
231 |     "np.allclose(z1, z2)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {
237 |     "slideshow": {
238 |      "slide_type": "subslide"
239 |     }
240 |    },
241 |    "source": [
242 |     "* The CSR representation can be very efficient for computations, but it is not as good for adding elements.  \n",
243 |     "\n",
244 |     "* For that, the **LIL** (List-In-List) representation is better:"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 8,
250 |    "metadata": {
251 |     "collapsed": false,
252 |     "jupyter": {
253 |      "outputs_hidden": false
254 |     },
255 |     "slideshow": {
256 |      "slide_type": "fragment"
257 |     }
258 |    },
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "  (0, 1)\t1.0\n",
265 |       "  (0, 2)\t2.0\n",
266 |       "  (1, 1)\t2.0\n",
267 |       "  (1, 3)\t4.0\n",
268 |       "  (2, 0)\t2.0\n",
269 |       "  (2, 1)\t3.0\n",
270 |       "  (2, 2)\t4.0\n",
271 |       "  (2, 3)\t5.0\n",
272 |       "  (3, 0)\t3.0\n",
273 |       "  (4, 0)\t4.0\n",
274 |       "  (4, 1)\t5.0\n",
275 |       "  (4, 2)\t6.0\n",
276 |       "[[0. 1. 2. 0. 0.]\n",
277 |       " [0. 2. 0. 4. 0.]\n",
278 |       " [2. 3. 4. 5. 0.]\n",
279 |       " [3. 0. 0. 0. 0.]\n",
280 |       " [4. 5. 6. 0. 0.]]\n"
281 |      ]
282 |     }
283 |    ],
284 |    "source": [
285 |     "# Create an empty LIL matrix and add some items\n",
286 |     "X_lil = sparse.lil_matrix((5, 5))\n",
287 |     "\n",
288 |     "for i, j in np.random.randint(0, 5, (15, 2)):\n",
289 |     "    X_lil[i, j] = i + j\n",
290 |     "\n",
291 |     "print(X_lil)\n",
292 |     "print(X_lil.toarray())"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {
298 |     "slideshow": {
299 |      "slide_type": "subslide"
300 |     }
301 |    },
302 |    "source": [
303 |     "* Often, once an LIL matrix is created, it is useful to convert it to a CSR format \n",
304 |     "    * **Note**: many scikit-learn algorithms require CSR or CSC format"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 9,
310 |    "metadata": {
311 |     "collapsed": false,
312 |     "jupyter": {
313 |      "outputs_hidden": false
314 |     },
315 |     "slideshow": {
316 |      "slide_type": "fragment"
317 |     }
318 |    },
319 |    "outputs": [
320 |     {
321 |      "name": "stdout",
322 |      "output_type": "stream",
323 |      "text": [
324 |       "  (0, 1)\t1.0\n",
325 |       "  (0, 2)\t2.0\n",
326 |       "  (1, 1)\t2.0\n",
327 |       "  (1, 3)\t4.0\n",
328 |       "  (2, 0)\t2.0\n",
329 |       "  (2, 1)\t3.0\n",
330 |       "  (2, 2)\t4.0\n",
331 |       "  (2, 3)\t5.0\n",
332 |       "  (3, 0)\t3.0\n",
333 |       "  (4, 0)\t4.0\n",
334 |       "  (4, 1)\t5.0\n",
335 |       "  (4, 2)\t6.0\n"
336 |      ]
337 |     }
338 |    ],
339 |    "source": [
340 |     "X_csr = X_lil.tocsr()\n",
341 |     "print(X_csr)"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "markdown",
346 |    "metadata": {
347 |     "slideshow": {
348 |      "slide_type": "subslide"
349 |     }
350 |    },
351 |    "source": [
352 |     "There are several other sparse formats that can be useful for various problems:\n",
353 |     "\n",
354 |     "- `CSC` (compressed sparse column)\n",
355 |     "- `BSR` (block sparse row)\n",
356 |     "- `COO` (coordinate)\n",
357 |     "- `DIA` (diagonal)\n",
358 |     "- `DOK` (dictionary of keys)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {
364 |     "slideshow": {
365 |      "slide_type": "slide"
366 |     }
367 |    },
368 |    "source": [
369 |     "## CSC - Compressed Sparse Column\n",
370 |     "\n",
371 |     "**Advantages of the CSC format**\n",
372 |     "\n",
373 |     "    * efficient arithmetic operations CSC + CSC, CSC * CSC, etc.\n",
374 |     "    * efficient column slicing\n",
375 |     "    * fast matrix vector products (CSR, BSR may be faster)\n",
376 |     "\n",
377 |     "**Disadvantages of the CSC format**\n",
378 |     "\n",
379 |     "    * slow row slicing operations (consider CSR)\n",
380 |     "    * changes to the sparsity structure are expensive (consider LIL or DOK)"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {
386 |     "slideshow": {
387 |      "slide_type": "subslide"
388 |     }
389 |    },
390 |    "source": [
391 |     "### BSR - Block Sparse Row\n",
392 |     "\n",
393 |     "The Block Compressed Row (`BSR`) format is very similar to the Compressed Sparse Row (`CSR`) format. \n",
394 |     "\n",
395 |     "BSR is appropriate for sparse matrices with *dense sub matrices* like the example below. \n",
396 |     "\n",
397 |     "Block matrices often arise in *vector-valued* finite element discretizations. \n",
398 |     "\n",
399 |     "In such cases, BSR is **considerably more efficient** than CSR and CSC for many sparse arithmetic operations."
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": 10,
405 |    "metadata": {
406 |     "collapsed": false,
407 |     "jupyter": {
408 |      "outputs_hidden": false
409 |     },
410 |     "slideshow": {
411 |      "slide_type": "subslide"
412 |     }
413 |    },
414 |    "outputs": [
415 |     {
416 |      "data": {
417 |       "text/plain": [
418 |        "array([[1, 1, 0, 0, 2, 2],\n",
419 |        "       [1, 1, 0, 0, 2, 2],\n",
420 |        "       [0, 0, 0, 0, 3, 3],\n",
421 |        "       [0, 0, 0, 0, 3, 3],\n",
422 |        "       [4, 4, 5, 5, 6, 6],\n",
423 |        "       [4, 4, 5, 5, 6, 6]])"
424 |       ]
425 |      },
426 |      "execution_count": 10,
427 |      "metadata": {},
428 |      "output_type": "execute_result"
429 |     }
430 |    ],
431 |    "source": [
432 |     "from scipy.sparse import bsr_matrix\n",
433 |     "\n",
434 |     "indptr = np.array([0, 2, 3, 6])\n",
435 |     "indices = np.array([0, 2, 2, 0, 1, 2])\n",
436 |     "data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)\n",
437 |     "bsr_matrix((data,indices,indptr), shape=(6, 6)).toarray()"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "markdown",
442 |    "metadata": {
443 |     "slideshow": {
444 |      "slide_type": "slide"
445 |     }
446 |    },
447 |    "source": [
448 |     "## COO - Coordinate Sparse Matrix\n",
449 |     "\n",
450 |     "**Advantages of the CSC format**\n",
451 |     "\n",
452 |     "    * facilitates fast conversion among sparse formats\n",
453 |     "    * permits duplicate entries (see example)\n",
454 |     "    * very fast conversion to and from CSR/CSC formats\n",
455 |     "\n",
456 |     "**Disadvantages of the CSC format**\n",
457 |     "\n",
458 |     "    * does not directly support arithmetic operations and slicing\n",
459 |     "    \n",
460 |     "** Intended Usage**\n",
461 |     "\n",
462 |     "    * COO is a fast format for constructing sparse matrices\n",
463 |     "    * Once a matrix has been constructed, convert to CSR or CSC format for fast arithmetic and matrix vector\n",
464 |     "    operations\n",
465 |     "    * By default when converting to CSR or CSC format, duplicate (i,j) entries will be summed together. \n",
466 |     "    This facilitates efficient construction of finite element matrices and the like.\n"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "markdown",
471 |    "metadata": {
472 |     "slideshow": {
473 |      "slide_type": "slide"
474 |     }
475 |    },
476 |    "source": [
477 |     "## DOK - Dictionary of Keys\n",
478 |     "\n",
479 |     "Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power.\n",
480 |     "\n",
481 |     "Allows for efficient O(1) access of individual elements. Duplicates are not allowed. Can be efficiently converted to a coo_matrix once constructed."
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "code",
486 |    "execution_count": 11,
487 |    "metadata": {
488 |     "collapsed": false,
489 |     "jupyter": {
490 |      "outputs_hidden": false
491 |     },
492 |     "slideshow": {
493 |      "slide_type": "subslide"
494 |     }
495 |    },
496 |    "outputs": [
497 |     {
498 |      "data": {
499 |       "text/plain": [
500 |        "array([[0., 1., 2., 3., 4.],\n",
501 |        "       [0., 2., 3., 4., 5.],\n",
502 |        "       [0., 0., 4., 5., 6.],\n",
503 |        "       [0., 0., 0., 6., 7.],\n",
504 |        "       [0., 0., 0., 0., 8.]], dtype=float32)"
505 |       ]
506 |      },
507 |      "execution_count": 11,
508 |      "metadata": {},
509 |      "output_type": "execute_result"
510 |     }
511 |    ],
512 |    "source": [
513 |     "from scipy.sparse import dok_matrix\n",
514 |     "S = dok_matrix((5, 5), dtype=np.float32)\n",
515 |     "for i in range(5):\n",
516 |     "    for j in range(i, 5):\n",
517 |     "        S[i,j] = i+j\n",
518 |     "        \n",
519 |     "S.toarray()"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "markdown",
524 |    "metadata": {
525 |     "slideshow": {
526 |      "slide_type": "subslide"
527 |     }
528 |    },
529 |    "source": [
530 |     "The ``scipy.sparse`` submodule also has a lot of functions for sparse matrices\n",
531 |     "including linear algebra, sparse solvers, graph algorithms, and much more."
532 |    ]
533 |   }
534 |  ],
535 |  "metadata": {
536 |   "celltoolbar": "Slideshow",
537 |   "kernelspec": {
538 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
539 |    "language": "python",
540 |    "name": "numpy-euroscipy"
541 |   },
542 |   "language_info": {
543 |    "codemirror_mode": {
544 |     "name": "ipython",
545 |     "version": 3
546 |    },
547 |    "file_extension": ".py",
548 |    "mimetype": "text/x-python",
549 |    "name": "python",
550 |    "nbconvert_exporter": "python",
551 |    "pygments_lexer": "ipython3",
552 |    "version": "3.7.3"
553 |   }
554 |  },
555 |  "nbformat": 4,
556 |  "nbformat_minor": 4
557 | }
558 | 


--------------------------------------------------------------------------------
/05_memmapping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# Processing large NumPy arrays with memory mapping\n",
 12 |     "\n",
 13 |     "\n",
 14 |     "**Reference**: _IPython Interactive Computing and Visualization Cookbook - Second Edition, by Cyrille Rossant_"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "---"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "Sometimes, we need to deal with NumPy arrays that are too big to fit in the system memory. \n",
 29 |     "\n",
 30 |     "A common solution is to use memory mapping and implement **out-of-core** computations. \n",
 31 |     "\n",
 32 |     "The array is stored in a file on the hard drive, and we create a **memory-mapped** object to this file that can be used as a regular NumPy array. \n",
 33 |     "\n",
 34 |     "Accessing a portion of the array results in the corresponding data being automatically fetched from the hard drive. Therefore, we only consume what we use."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 1,
 40 |    "metadata": {
 41 |     "collapsed": false,
 42 |     "jupyter": {
 43 |      "outputs_hidden": false
 44 |     },
 45 |     "slideshow": {
 46 |      "slide_type": "subslide"
 47 |     }
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import numpy as np"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 2,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Let's create a Memory-Mapped Array in write mode\n",
 61 |     "\n",
 62 |     "nrows, ncols = 1000000, 100\n",
 63 |     "f = np.memmap('memmapped.dat', dtype=np.float32, mode='w+', shape=(nrows, ncols))"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "Let's feed the array with random values, one column at a time because our system's memory is limited!"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 3,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "for i in range(ncols):\n",
 80 |     "    f[:, i] = np.random.rand(nrows)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "Save the last column of the Array"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 4,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "x = f[:, -1]"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "Now, we flush memory changes to disk by deleting the object:"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "del f"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "Reading a memory-mapped array from disk involves the same memmap() function. The data type and the shape need to be specified again, as this information is not stored in the file:"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 8,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "f = np.memmap('memmapped.dat', dtype=np.float32,\n",
129 |     "                  shape=(nrows, ncols))"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 9,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "True"
141 |       ]
142 |      },
143 |      "execution_count": 9,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "np.array_equal(f[:, -1], x)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 10,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "del f"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "**Note**:\n",
166 |     "\n",
167 |     ">This method is not adapted for long-term storage of data and data sharing. \n",
168 |     ">A better file format for this specific case will be the **HDF5**."
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "## How `memmap` works"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "Memory mapping lets you work with huge arrays almost as if they were regular arrays. Python code that accepts a NumPy array as input will also accept a `memmap` array. However, we need to ensure that the array is used efficiently. That is, the array is never loaded as a whole (otherwise, it would waste system memory and would obviate any advantage of the technique)."
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "Memory mapping is also useful when you have a huge file containing raw data in a homogeneous binary format with a known **data type and shape**. \n",
190 |     "\n",
191 |     "In this case, an alternative solution is to use NumPy's `fromfile()` function with a file handle created with Python's native `open()` function. \n",
192 |     "\n",
193 |     "Using `f.seek()` lets you position the cursor at any location and load a given number of bytes into a NumPy array."
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {
199 |     "slideshow": {
200 |      "slide_type": "subslide"
201 |     }
202 |    },
203 |    "source": [
204 |     "The numpy package makes it possible to memory map large contiguous chunks of binary files as shared memory for all the Python processes running on a given host:"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {
210 |     "slideshow": {
211 |      "slide_type": "slide"
212 |     }
213 |    },
214 |    "source": [
215 |     "### Memmap Operations"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 11,
221 |    "metadata": {
222 |     "collapsed": false,
223 |     "jupyter": {
224 |      "outputs_hidden": false
225 |     },
226 |     "slideshow": {
227 |      "slide_type": "subslide"
228 |     }
229 |    },
230 |    "outputs": [
231 |     {
232 |      "name": "stdout",
233 |      "output_type": "stream",
234 |      "text": [
235 |       "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
236 |      ]
237 |     }
238 |    ],
239 |    "source": [
240 |     "mm_w = np.memmap('small_test.mmap', shape=10, dtype=np.float32, mode='w+')\n",
241 |     "print(mm_w)"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {
247 |     "slideshow": {
248 |      "slide_type": "subslide"
249 |     }
250 |    },
251 |    "source": [
252 |     "* This binary file can then be mapped as a new numpy array by all the engines having access to the same filesystem. \n",
253 |     "* The `mode='r+'` opens this shared memory area in read write mode:"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 12,
259 |    "metadata": {
260 |     "collapsed": false,
261 |     "jupyter": {
262 |      "outputs_hidden": false
263 |     },
264 |     "slideshow": {
265 |      "slide_type": "subslide"
266 |     }
267 |    },
268 |    "outputs": [
269 |     {
270 |      "name": "stdout",
271 |      "output_type": "stream",
272 |      "text": [
273 |       "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
274 |      ]
275 |     }
276 |    ],
277 |    "source": [
278 |     "mm_r = np.memmap('small_test.mmap', dtype=np.float32, mode='r+')\n",
279 |     "print(mm_r)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 13,
285 |    "metadata": {
286 |     "collapsed": false,
287 |     "jupyter": {
288 |      "outputs_hidden": false
289 |     },
290 |     "slideshow": {
291 |      "slide_type": "fragment"
292 |     }
293 |    },
294 |    "outputs": [
295 |     {
296 |      "name": "stdout",
297 |      "output_type": "stream",
298 |      "text": [
299 |       "[42.  0.  0.  0.  0.  0.  0.  0.  0.  0.]\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "mm_w[0] = 42\n",
305 |     "print(mm_w)"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 14,
311 |    "metadata": {
312 |     "collapsed": false,
313 |     "jupyter": {
314 |      "outputs_hidden": false
315 |     },
316 |     "slideshow": {
317 |      "slide_type": "fragment"
318 |     }
319 |    },
320 |    "outputs": [
321 |     {
322 |      "name": "stdout",
323 |      "output_type": "stream",
324 |      "text": [
325 |       "[42.  0.  0.  0.  0.  0.  0.  0.  0.  0.]\n"
326 |      ]
327 |     }
328 |    ],
329 |    "source": [
330 |     "print(mm_r)"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {
336 |     "slideshow": {
337 |      "slide_type": "subslide"
338 |     }
339 |    },
340 |    "source": [
341 |     "* Memory mapped arrays created with `mode='r+'` can be modified and the modifications are shared \n",
342 |     "    - in case of multiple process"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 15,
348 |    "metadata": {
349 |     "collapsed": false,
350 |     "jupyter": {
351 |      "outputs_hidden": false
352 |     },
353 |     "slideshow": {
354 |      "slide_type": "fragment"
355 |     }
356 |    },
357 |    "outputs": [],
358 |    "source": [
359 |     "mm_r[1] = 43"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": 16,
365 |    "metadata": {
366 |     "collapsed": false,
367 |     "jupyter": {
368 |      "outputs_hidden": false
369 |     },
370 |     "slideshow": {
371 |      "slide_type": "fragment"
372 |     }
373 |    },
374 |    "outputs": [
375 |     {
376 |      "name": "stdout",
377 |      "output_type": "stream",
378 |      "text": [
379 |       "[42. 43.  0.  0.  0.  0.  0.  0.  0.  0.]\n"
380 |      ]
381 |     }
382 |    ],
383 |    "source": [
384 |     "print(mm_r)"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "metadata": {
390 |     "slideshow": {
391 |      "slide_type": "subslide"
392 |     }
393 |    },
394 |    "source": [
395 |     "Memmap arrays generally behave very much like regular in-memory numpy arrays:"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": 17,
401 |    "metadata": {
402 |     "collapsed": false,
403 |     "jupyter": {
404 |      "outputs_hidden": false
405 |     },
406 |     "slideshow": {
407 |      "slide_type": "subslide"
408 |     }
409 |    },
410 |    "outputs": [
411 |     {
412 |      "name": "stdout",
413 |      "output_type": "stream",
414 |      "text": [
415 |       "85.0\n",
416 |       "sum=85.0, mean=8.5, std=17.0014705657959\n"
417 |      ]
418 |     }
419 |    ],
420 |    "source": [
421 |     "print(mm_r.sum())\n",
422 |     "print(\"sum={0}, mean={1}, std={2}\".format(mm_r.sum(), \n",
423 |     "                                          np.mean(mm_r), np.std(mm_r)))"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {
429 |     "slideshow": {
430 |      "slide_type": "subslide"
431 |     }
432 |    },
433 |    "source": [
434 |     "Before allocating more data let us define a couple of utility functions from the previous exercise (and more) to monitor what is used by which engine and what is still free on the cluster as a whole:"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "metadata": {
440 |     "slideshow": {
441 |      "slide_type": "subslide"
442 |     }
443 |    },
444 |    "source": [
445 |     "* Let's allocate a 80MB memmap array:"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": 18,
451 |    "metadata": {
452 |     "collapsed": false,
453 |     "jupyter": {
454 |      "outputs_hidden": false
455 |     },
456 |     "slideshow": {
457 |      "slide_type": "fragment"
458 |     }
459 |    },
460 |    "outputs": [
461 |     {
462 |      "data": {
463 |       "text/plain": [
464 |        "memmap([0., 0., 0., ..., 0., 0., 0.])"
465 |       ]
466 |      },
467 |      "execution_count": 18,
468 |      "metadata": {},
469 |      "output_type": "execute_result"
470 |     }
471 |    ],
472 |    "source": [
473 |     "np.memmap('bigger_test.mmap', shape=10 * int(1e6), dtype=np.float64, mode='w+')"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "markdown",
478 |    "metadata": {
479 |     "slideshow": {
480 |      "slide_type": "subslide"
481 |     }
482 |    },
483 |    "source": [
484 |     "No significant memory was used in this operation as we just asked the OS to allocate the buffer on the hard drive and just maitain a virtual memory area as a cheap reference to this buffer.\n",
485 |     "\n",
486 |     "Let's open new references to the same buffer from all the engines at once:"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": 19,
492 |    "metadata": {
493 |     "collapsed": false,
494 |     "jupyter": {
495 |      "outputs_hidden": false
496 |     },
497 |     "slideshow": {
498 |      "slide_type": "subslide"
499 |     }
500 |    },
501 |    "outputs": [
502 |     {
503 |      "name": "stdout",
504 |      "output_type": "stream",
505 |      "text": [
506 |       "CPU times: user 616 µs, sys: 778 µs, total: 1.39 ms\n",
507 |       "Wall time: 17.3 ms\n"
508 |      ]
509 |     }
510 |    ],
511 |    "source": [
512 |     "%time big_mmap = np.memmap('bigger_test.mmap', dtype=np.float64, mode='r+')"
513 |    ]
514 |   },
515 |   {
516 |    "cell_type": "code",
517 |    "execution_count": 20,
518 |    "metadata": {
519 |     "collapsed": false,
520 |     "jupyter": {
521 |      "outputs_hidden": false
522 |     },
523 |     "slideshow": {
524 |      "slide_type": "subslide"
525 |     }
526 |    },
527 |    "outputs": [
528 |     {
529 |      "data": {
530 |       "text/plain": [
531 |        "memmap([0., 0., 0., ..., 0., 0., 0.])"
532 |       ]
533 |      },
534 |      "execution_count": 20,
535 |      "metadata": {},
536 |      "output_type": "execute_result"
537 |     }
538 |    ],
539 |    "source": [
540 |     "big_mmap"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "markdown",
545 |    "metadata": {
546 |     "slideshow": {
547 |      "slide_type": "subslide"
548 |     }
549 |    },
550 |    "source": [
551 |     "* Let's trigger an actual load of the data from the drive into the in-memory disk cache of the OS, this can take some time depending on the speed of the hard drive (on the order of 100MB/s to 300MB/s hence 3s to 8s for this dataset):"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "code",
556 |    "execution_count": 21,
557 |    "metadata": {
558 |     "collapsed": false,
559 |     "jupyter": {
560 |      "outputs_hidden": false
561 |     },
562 |     "slideshow": {
563 |      "slide_type": "subslide"
564 |     }
565 |    },
566 |    "outputs": [
567 |     {
568 |      "name": "stdout",
569 |      "output_type": "stream",
570 |      "text": [
571 |       "CPU times: user 20.5 ms, sys: 32.9 ms, total: 53.5 ms\n",
572 |       "Wall time: 54.3 ms\n"
573 |      ]
574 |     },
575 |     {
576 |      "data": {
577 |       "text/plain": [
578 |        "0.0"
579 |       ]
580 |      },
581 |      "execution_count": 21,
582 |      "metadata": {},
583 |      "output_type": "execute_result"
584 |     }
585 |    ],
586 |    "source": [
587 |     "%time np.sum(big_mmap)"
588 |    ]
589 |   },
590 |   {
591 |    "cell_type": "markdown",
592 |    "metadata": {
593 |     "slideshow": {
594 |      "slide_type": "subslide"
595 |     }
596 |    },
597 |    "source": [
598 |     "* Now back into memory"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": 22,
604 |    "metadata": {
605 |     "collapsed": false,
606 |     "jupyter": {
607 |      "outputs_hidden": false
608 |     },
609 |     "slideshow": {
610 |      "slide_type": "fragment"
611 |     }
612 |    },
613 |    "outputs": [
614 |     {
615 |      "name": "stdout",
616 |      "output_type": "stream",
617 |      "text": [
618 |       "CPU times: user 15 ms, sys: 1.36 ms, total: 16.4 ms\n",
619 |       "Wall time: 14.7 ms\n"
620 |      ]
621 |     },
622 |     {
623 |      "data": {
624 |       "text/plain": [
625 |        "0.0"
626 |       ]
627 |      },
628 |      "execution_count": 22,
629 |      "metadata": {},
630 |      "output_type": "execute_result"
631 |     }
632 |    ],
633 |    "source": [
634 |     "%time np.sum(big_mmap)"
635 |    ]
636 |   }
637 |  ],
638 |  "metadata": {
639 |   "celltoolbar": "Slideshow",
640 |   "kernelspec": {
641 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
642 |    "language": "python",
643 |    "name": "numpy-euroscipy"
644 |   },
645 |   "language_info": {
646 |    "codemirror_mode": {
647 |     "name": "ipython",
648 |     "version": 3
649 |    },
650 |    "file_extension": ".py",
651 |    "mimetype": "text/x-python",
652 |    "name": "python",
653 |    "nbconvert_exporter": "python",
654 |    "pygments_lexer": "ipython3",
655 |    "version": "3.7.3"
656 |   }
657 |  },
658 |  "nbformat": 4,
659 |  "nbformat_minor": 4
660 | }
661 | 


--------------------------------------------------------------------------------
/extra_torch_tensor.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Original Notebook\n",
  8 |     "\n",
  9 |     "### Introduction to PyTorch Tensor\n",
 10 |     "\n",
 11 |     "**Reference**: [\"What is PyTorch?\"](https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py) by [Soumith Chintala](http://soumith.ch)"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "\n",
 19 |     "What is PyTorch?\n",
 20 |     "================\n",
 21 |     "\n",
 22 |     "It’s a Python-based scientific computing package targeted at two sets of\n",
 23 |     "audiences:\n",
 24 |     "\n",
 25 |     "-  A replacement for NumPy to use the power of GPUs\n",
 26 |     "-  a deep learning research platform that provides maximum flexibility\n",
 27 |     "   and speed\n",
 28 |     "\n",
 29 |     "Getting Started\n",
 30 |     "---------------\n",
 31 |     "\n",
 32 |     "Tensors\n",
 33 |     "^^^^^^^\n",
 34 |     "\n",
 35 |     "Tensors are similar to NumPy’s ndarrays, with the addition being that\n",
 36 |     "Tensors can also be used on a GPU to accelerate computing.\n",
 37 |     "\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 1,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import torch"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "<div class=\"alert alert-info\"><h4>Note</h4><p>An uninitialized matrix is declared,\n",
 54 |     "    but does not contain definite known\n",
 55 |     "    values before it is used. When an\n",
 56 |     "    uninitialized matrix is created,\n",
 57 |     "    whatever values were in the allocated\n",
 58 |     "    memory at the time will appear as the initial values.</p></div>\n",
 59 |     "\n"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "Construct a 5x3 matrix, uninitialized:\n",
 67 |     "\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 2,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "tensor([[0., 0., 0.],\n",
 80 |       "        [0., 0., 0.],\n",
 81 |       "        [0., 0., 0.],\n",
 82 |       "        [0., 0., 0.],\n",
 83 |       "        [0., 0., 0.]])\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "x = torch.empty(5, 3)\n",
 89 |     "print(x)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 3,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "data": {
 99 |       "text/plain": [
100 |        "torch.Tensor"
101 |       ]
102 |      },
103 |      "execution_count": 3,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "type(x)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "Construct a randomly initialized matrix:\n",
117 |     "\n"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 4,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "tensor([[0.1698, 0.9210, 0.6316],\n",
130 |       "        [0.3320, 0.5131, 0.0979],\n",
131 |       "        [0.7772, 0.0237, 0.9043],\n",
132 |       "        [0.8486, 0.5272, 0.0556],\n",
133 |       "        [0.6724, 0.9832, 0.3684]])\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "x = torch.rand(5, 3)\n",
139 |     "print(x)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "Construct a matrix filled zeros and of dtype long:\n",
147 |     "\n"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 5,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "tensor([[0, 0, 0],\n",
160 |       "        [0, 0, 0],\n",
161 |       "        [0, 0, 0],\n",
162 |       "        [0, 0, 0],\n",
163 |       "        [0, 0, 0]])\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "x = torch.zeros(5, 3, dtype=torch.long)\n",
169 |     "print(x)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "Construct a tensor directly from data:\n",
177 |     "\n"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 6,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "name": "stdout",
187 |      "output_type": "stream",
188 |      "text": [
189 |       "tensor([5.5000, 3.0000])\n"
190 |      ]
191 |     }
192 |    ],
193 |    "source": [
194 |     "x = torch.tensor([5.5, 3])\n",
195 |     "print(x)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "or create a tensor based on an existing tensor. These methods\n",
203 |     "will reuse properties of the input tensor, e.g. dtype, unless\n",
204 |     "new values are provided by user\n",
205 |     "\n"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 7,
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "name": "stdout",
215 |      "output_type": "stream",
216 |      "text": [
217 |       "tensor([[1., 1., 1.],\n",
218 |       "        [1., 1., 1.],\n",
219 |       "        [1., 1., 1.],\n",
220 |       "        [1., 1., 1.],\n",
221 |       "        [1., 1., 1.]], dtype=torch.float64)\n",
222 |       "tensor([[ 0.5752,  1.0416,  1.3851],\n",
223 |       "        [ 1.3198,  0.5191,  0.9312],\n",
224 |       "        [ 0.7598, -1.3322, -0.6987],\n",
225 |       "        [-1.6433,  0.9256, -0.1461],\n",
226 |       "        [-0.0300, -0.1440,  0.7353]])\n"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes\n",
232 |     "print(x)\n",
233 |     "\n",
234 |     "x = torch.randn_like(x, dtype=torch.float)    # override dtype!\n",
235 |     "print(x)                                      # result has the same size"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "Get its size:\n",
243 |     "\n"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 8,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "name": "stdout",
253 |      "output_type": "stream",
254 |      "text": [
255 |       "torch.Size([5, 3])\n"
256 |      ]
257 |     }
258 |    ],
259 |    "source": [
260 |     "print(x.size())"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "metadata": {},
266 |    "source": [
267 |     "<div class=\"alert alert-info\"><h4>Note</h4><p>``torch.Size`` is in fact a tuple, so it supports all tuple operations.</p></div>\n",
268 |     "\n",
269 |     "Operations\n",
270 |     "^^^^^^^^^^\n",
271 |     "There are multiple syntaxes for operations. In the following\n",
272 |     "example, we will take a look at the addition operation.\n",
273 |     "\n",
274 |     "Addition: syntax 1\n",
275 |     "\n"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 9,
281 |    "metadata": {},
282 |    "outputs": [
283 |     {
284 |      "name": "stdout",
285 |      "output_type": "stream",
286 |      "text": [
287 |       "tensor([[ 1.1527,  1.2403,  1.4093],\n",
288 |       "        [ 1.7279,  0.7558,  1.0488],\n",
289 |       "        [ 0.9757, -0.4742, -0.0138],\n",
290 |       "        [-1.4919,  1.5009,  0.1834],\n",
291 |       "        [ 0.2617,  0.2694,  1.3259]])\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "y = torch.rand(5, 3)\n",
297 |     "print(x + y)"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "Addition: syntax 2\n",
305 |     "\n"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 10,
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "tensor([[ 1.1527,  1.2403,  1.4093],\n",
318 |       "        [ 1.7279,  0.7558,  1.0488],\n",
319 |       "        [ 0.9757, -0.4742, -0.0138],\n",
320 |       "        [-1.4919,  1.5009,  0.1834],\n",
321 |       "        [ 0.2617,  0.2694,  1.3259]])\n"
322 |      ]
323 |     }
324 |    ],
325 |    "source": [
326 |     "print(torch.add(x, y))"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "Addition: providing an output tensor as argument\n",
334 |     "\n"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 11,
340 |    "metadata": {},
341 |    "outputs": [
342 |     {
343 |      "name": "stdout",
344 |      "output_type": "stream",
345 |      "text": [
346 |       "tensor([[ 1.1527,  1.2403,  1.4093],\n",
347 |       "        [ 1.7279,  0.7558,  1.0488],\n",
348 |       "        [ 0.9757, -0.4742, -0.0138],\n",
349 |       "        [-1.4919,  1.5009,  0.1834],\n",
350 |       "        [ 0.2617,  0.2694,  1.3259]])\n"
351 |      ]
352 |     }
353 |    ],
354 |    "source": [
355 |     "result = torch.empty(5, 3)\n",
356 |     "torch.add(x, y, out=result)\n",
357 |     "print(result)"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "Addition: in-place\n",
365 |     "\n"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 12,
371 |    "metadata": {},
372 |    "outputs": [
373 |     {
374 |      "name": "stdout",
375 |      "output_type": "stream",
376 |      "text": [
377 |       "tensor([[ 1.1527,  1.2403,  1.4093],\n",
378 |       "        [ 1.7279,  0.7558,  1.0488],\n",
379 |       "        [ 0.9757, -0.4742, -0.0138],\n",
380 |       "        [-1.4919,  1.5009,  0.1834],\n",
381 |       "        [ 0.2617,  0.2694,  1.3259]])\n"
382 |      ]
383 |     }
384 |    ],
385 |    "source": [
386 |     "# adds x to y\n",
387 |     "y.add_(x)\n",
388 |     "print(y)"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "<div class=\"alert alert-info\"><h4>Note</h4><p>Any operation that mutates a tensor in-place is post-fixed with an ``_``.\n",
396 |     "    For example: ``x.copy_(y)``, ``x.t_()``, will change ``x``.</p></div>\n",
397 |     "\n",
398 |     "You can use standard NumPy-like indexing with all bells and whistles!\n",
399 |     "\n"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": 13,
405 |    "metadata": {},
406 |    "outputs": [
407 |     {
408 |      "name": "stdout",
409 |      "output_type": "stream",
410 |      "text": [
411 |       "tensor([ 1.0416,  0.5191, -1.3322,  0.9256, -0.1440])\n"
412 |      ]
413 |     }
414 |    ],
415 |    "source": [
416 |     "print(x[:, 1])"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "Resizing: If you want to resize/reshape tensor, you can use ``torch.view``:\n",
424 |     "\n"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": 14,
430 |    "metadata": {},
431 |    "outputs": [
432 |     {
433 |      "name": "stdout",
434 |      "output_type": "stream",
435 |      "text": [
436 |       "torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])\n"
437 |      ]
438 |     }
439 |    ],
440 |    "source": [
441 |     "x = torch.randn(4, 4)\n",
442 |     "y = x.view(16)\n",
443 |     "z = x.view(-1, 8)  # the size -1 is inferred from other dimensions\n",
444 |     "print(x.size(), y.size(), z.size())"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "markdown",
449 |    "metadata": {},
450 |    "source": [
451 |     "If you have a one element tensor, use ``.item()`` to get the value as a\n",
452 |     "Python number\n",
453 |     "\n"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": 15,
459 |    "metadata": {},
460 |    "outputs": [
461 |     {
462 |      "name": "stdout",
463 |      "output_type": "stream",
464 |      "text": [
465 |       "tensor([1.0556])\n",
466 |       "1.0555715560913086\n"
467 |      ]
468 |     }
469 |    ],
470 |    "source": [
471 |     "x = torch.randn(1)\n",
472 |     "print(x)\n",
473 |     "print(x.item())"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "markdown",
478 |    "metadata": {},
479 |    "source": [
480 |     "**Read later:**\n",
481 |     "\n",
482 |     "\n",
483 |     "  100+ Tensor operations, including transposing, indexing, slicing,\n",
484 |     "  mathematical operations, linear algebra, random numbers, etc.,\n",
485 |     "  are described\n",
486 |     "  `here <https://pytorch.org/docs/torch>`_.\n",
487 |     "\n",
488 |     "NumPy Bridge\n",
489 |     "------------\n",
490 |     "\n",
491 |     "Converting a Torch Tensor to a NumPy array and vice versa is a breeze.\n",
492 |     "\n",
493 |     "The Torch Tensor and NumPy array will share their underlying memory\n",
494 |     "locations (if the Torch Tensor is on CPU), and changing one will change\n",
495 |     "the other.\n",
496 |     "\n",
497 |     "Converting a Torch Tensor to a NumPy Array\n",
498 |     "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
499 |     "\n"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": 16,
505 |    "metadata": {},
506 |    "outputs": [
507 |     {
508 |      "name": "stdout",
509 |      "output_type": "stream",
510 |      "text": [
511 |       "tensor([1., 1., 1., 1., 1.])\n"
512 |      ]
513 |     }
514 |    ],
515 |    "source": [
516 |     "a = torch.ones(5)\n",
517 |     "print(a)"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 17,
523 |    "metadata": {},
524 |    "outputs": [
525 |     {
526 |      "name": "stdout",
527 |      "output_type": "stream",
528 |      "text": [
529 |       "[1. 1. 1. 1. 1.]\n"
530 |      ]
531 |     }
532 |    ],
533 |    "source": [
534 |     "b = a.numpy()\n",
535 |     "print(b)"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "markdown",
540 |    "metadata": {},
541 |    "source": [
542 |     "See how the numpy array changed in value.\n",
543 |     "\n"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": 18,
549 |    "metadata": {},
550 |    "outputs": [
551 |     {
552 |      "name": "stdout",
553 |      "output_type": "stream",
554 |      "text": [
555 |       "tensor([2., 2., 2., 2., 2.])\n",
556 |       "[2. 2. 2. 2. 2.]\n"
557 |      ]
558 |     }
559 |    ],
560 |    "source": [
561 |     "a.add_(1)\n",
562 |     "print(a)\n",
563 |     "print(b)"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "Converting NumPy Array to Torch Tensor\n",
571 |     "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
572 |     "See how changing the np array changed the Torch Tensor automatically\n",
573 |     "\n"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 19,
579 |    "metadata": {},
580 |    "outputs": [
581 |     {
582 |      "name": "stdout",
583 |      "output_type": "stream",
584 |      "text": [
585 |       "[2. 2. 2. 2. 2.]\n",
586 |       "tensor([2., 2., 2., 2., 2.], dtype=torch.float64)\n"
587 |      ]
588 |     }
589 |    ],
590 |    "source": [
591 |     "import numpy as np\n",
592 |     "a = np.ones(5)\n",
593 |     "b = torch.from_numpy(a)\n",
594 |     "np.add(a, 1, out=a)\n",
595 |     "print(a)\n",
596 |     "print(b)"
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "markdown",
601 |    "metadata": {},
602 |    "source": [
603 |     "All the Tensors on the CPU except a CharTensor support converting to\n",
604 |     "NumPy and back.\n",
605 |     "\n",
606 |     "CUDA Tensors\n",
607 |     "------------\n",
608 |     "\n",
609 |     "Tensors can be moved onto any device using the ``.to`` method.\n",
610 |     "\n"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": 20,
616 |    "metadata": {},
617 |    "outputs": [],
618 |    "source": [
619 |     "# let us run this cell only if CUDA is available\n",
620 |     "# We will use ``torch.device`` objects to move tensors in and out of GPU\n",
621 |     "if torch.cuda.is_available():\n",
622 |     "    device = torch.device(\"cuda\")          # a CUDA device object\n",
623 |     "    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU\n",
624 |     "    x = x.to(device)                       # or just use strings ``.to(\"cuda\")``\n",
625 |     "    z = x + y\n",
626 |     "    print(z)\n",
627 |     "    print(z.to(\"cpu\", torch.double))       # ``.to`` can also change dtype together!"
628 |    ]
629 |   }
630 |  ],
631 |  "metadata": {
632 |   "kernelspec": {
633 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
634 |    "language": "python",
635 |    "name": "numpy-euroscipy"
636 |   },
637 |   "language_info": {
638 |    "codemirror_mode": {
639 |     "name": "ipython",
640 |     "version": 3
641 |    },
642 |    "file_extension": ".py",
643 |    "mimetype": "text/x-python",
644 |    "name": "python",
645 |    "nbconvert_exporter": "python",
646 |    "pygments_lexer": "ipython3",
647 |    "version": "3.7.3"
648 |   }
649 |  },
650 |  "nbformat": 4,
651 |  "nbformat_minor": 1
652 | }
653 | 


--------------------------------------------------------------------------------
/03_numpy_io_matlab.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# NumPy Seralisation and I/O\n",
   8 |     "\n",
   9 |     "In this notebook we will focus on NumPy built-in support for **Serialisation** and **I/O**. In other words, we will learn how to save and load NumPy `ndarray` objects in native (binary) format for easy sharing. Moreover we are going to discover how NumPy can load data from external files."
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "code",
  14 |    "execution_count": 1,
  15 |    "metadata": {},
  16 |    "outputs": [],
  17 |    "source": [
  18 |     "import numpy as np"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "markdown",
  23 |    "metadata": {
  24 |     "slideshow": {
  25 |      "slide_type": "subslide"
  26 |     }
  27 |    },
  28 |    "source": [
  29 |     "## Comma-separated values (CSV)"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "markdown",
  34 |    "metadata": {
  35 |     "slideshow": {
  36 |      "slide_type": "subslide"
  37 |     }
  38 |    },
  39 |    "source": [
  40 |     "A very common file format for data files are the comma-separated values (CSV), or related format such as TSV (tab-separated values). \n",
  41 |     "\n",
  42 |     "To read data from such file into Numpy arrays we can use the `numpy.genfromtxt` function."
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "code",
  47 |    "execution_count": 2,
  48 |    "metadata": {
  49 |     "jupyter": {
  50 |      "outputs_hidden": false
  51 |     },
  52 |     "slideshow": {
  53 |      "slide_type": "subslide"
  54 |     }
  55 |    },
  56 |    "outputs": [
  57 |     {
  58 |      "name": "stdout",
  59 |      "output_type": "stream",
  60 |      "text": [
  61 |       "Year Month Day T_6 T12 T18 Valid \r\n",
  62 |       "1800  1  1    -6.1    -6.1    -6.1 1\r\n",
  63 |       "1800  1  2   -15.4   -15.4   -15.4 1\r\n",
  64 |       "1800  1  3   -15.0   -15.0   -15.0 1\r\n",
  65 |       "1800  1  4   -19.3   -19.3   -19.3 1\r\n",
  66 |       "1800  1  5   -16.8   -16.8   -16.8 1\r\n",
  67 |       "1800  1  6   -11.4   -11.4   -11.4 1\r\n",
  68 |       "1800  1  7    -7.6    -7.6    -7.6 1\r\n",
  69 |       "1800  1  8    -7.1    -7.1    -7.1 1\r\n",
  70 |       "1800  1  9   -10.1   -10.1   -10.1 1\r\n"
  71 |      ]
  72 |     }
  73 |    ],
  74 |    "source": [
  75 |     "# In Jupyter, all commands starting with ! are mapped as SHELL commands\n",
  76 |     "!head stockholm_td_adj.dat"
  77 |    ]
  78 |   },
  79 |   {
  80 |    "cell_type": "code",
  81 |    "execution_count": 3,
  82 |    "metadata": {},
  83 |    "outputs": [],
  84 |    "source": [
  85 |     "np.genfromtxt?"
  86 |    ]
  87 |   },
  88 |   {
  89 |    "cell_type": "code",
  90 |    "execution_count": 4,
  91 |    "metadata": {
  92 |     "jupyter": {
  93 |      "outputs_hidden": false
  94 |     },
  95 |     "slideshow": {
  96 |      "slide_type": "subslide"
  97 |     }
  98 |    },
  99 |    "outputs": [],
 100 |    "source": [
 101 |     "st_temperatures = np.genfromtxt('stockholm_td_adj.dat', \n",
 102 |     "                                skip_header=1)"
 103 |    ]
 104 |   },
 105 |   {
 106 |    "cell_type": "code",
 107 |    "execution_count": 5,
 108 |    "metadata": {
 109 |     "jupyter": {
 110 |      "outputs_hidden": false
 111 |     },
 112 |     "slideshow": {
 113 |      "slide_type": "fragment"
 114 |     }
 115 |    },
 116 |    "outputs": [
 117 |     {
 118 |      "data": {
 119 |       "text/plain": [
 120 |        "(77431, 7)"
 121 |       ]
 122 |      },
 123 |      "execution_count": 5,
 124 |      "metadata": {},
 125 |      "output_type": "execute_result"
 126 |     }
 127 |    ],
 128 |    "source": [
 129 |     "st_temperatures.shape"
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "markdown",
 134 |    "metadata": {},
 135 |    "source": [
 136 |     "### DYI\n",
 137 |     "\n",
 138 |     "Let's play a bit with the data loaded `st_temperatures` to combine **fancy indexing** (i.e. defining conditions to get subset of data) and very simple statistics.\n",
 139 |     "\n",
 140 |     "For example:"
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "code",
 145 |    "execution_count": 6,
 146 |    "metadata": {},
 147 |    "outputs": [
 148 |     {
 149 |      "data": {
 150 |       "text/plain": [
 151 |        "array([[ 1.80e+03,  1.00e+00,  1.00e+00, -6.10e+00, -6.10e+00, -6.10e+00,\n",
 152 |        "         1.00e+00],\n",
 153 |        "       [ 1.80e+03,  1.00e+00,  2.00e+00, -1.54e+01, -1.54e+01, -1.54e+01,\n",
 154 |        "         1.00e+00],\n",
 155 |        "       [ 1.80e+03,  1.00e+00,  3.00e+00, -1.50e+01, -1.50e+01, -1.50e+01,\n",
 156 |        "         1.00e+00],\n",
 157 |        "       [ 1.80e+03,  1.00e+00,  4.00e+00, -1.93e+01, -1.93e+01, -1.93e+01,\n",
 158 |        "         1.00e+00],\n",
 159 |        "       [ 1.80e+03,  1.00e+00,  5.00e+00, -1.68e+01, -1.68e+01, -1.68e+01,\n",
 160 |        "         1.00e+00],\n",
 161 |        "       [ 1.80e+03,  1.00e+00,  6.00e+00, -1.14e+01, -1.14e+01, -1.14e+01,\n",
 162 |        "         1.00e+00],\n",
 163 |        "       [ 1.80e+03,  1.00e+00,  7.00e+00, -7.60e+00, -7.60e+00, -7.60e+00,\n",
 164 |        "         1.00e+00],\n",
 165 |        "       [ 1.80e+03,  1.00e+00,  8.00e+00, -7.10e+00, -7.10e+00, -7.10e+00,\n",
 166 |        "         1.00e+00],\n",
 167 |        "       [ 1.80e+03,  1.00e+00,  9.00e+00, -1.01e+01, -1.01e+01, -1.01e+01,\n",
 168 |        "         1.00e+00],\n",
 169 |        "       [ 1.80e+03,  1.00e+00,  1.00e+01, -9.50e+00, -9.50e+00, -9.50e+00,\n",
 170 |        "         1.00e+00]])"
 171 |       ]
 172 |      },
 173 |      "execution_count": 6,
 174 |      "metadata": {},
 175 |      "output_type": "execute_result"
 176 |     }
 177 |    ],
 178 |    "source": [
 179 |     "st_temperatures[:10, ]"
 180 |    ]
 181 |   },
 182 |   {
 183 |    "cell_type": "code",
 184 |    "execution_count": 7,
 185 |    "metadata": {},
 186 |    "outputs": [
 187 |     {
 188 |      "data": {
 189 |       "text/plain": [
 190 |        "dtype('float64')"
 191 |       ]
 192 |      },
 193 |      "execution_count": 7,
 194 |      "metadata": {},
 195 |      "output_type": "execute_result"
 196 |     }
 197 |    ],
 198 |    "source": [
 199 |     "st_temperatures.dtype"
 200 |    ]
 201 |   },
 202 |   {
 203 |    "cell_type": "code",
 204 |    "execution_count": 8,
 205 |    "metadata": {},
 206 |    "outputs": [
 207 |     {
 208 |      "data": {
 209 |       "text/plain": [
 210 |        "(array([1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,\n",
 211 |        "        1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821,\n",
 212 |        "        1822, 1823, 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832,\n",
 213 |        "        1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, 1843,\n",
 214 |        "        1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854,\n",
 215 |        "        1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865,\n",
 216 |        "        1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876,\n",
 217 |        "        1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887,\n",
 218 |        "        1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898,\n",
 219 |        "        1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909,\n",
 220 |        "        1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920,\n",
 221 |        "        1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931,\n",
 222 |        "        1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942,\n",
 223 |        "        1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953,\n",
 224 |        "        1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964,\n",
 225 |        "        1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975,\n",
 226 |        "        1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,\n",
 227 |        "        1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,\n",
 228 |        "        1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,\n",
 229 |        "        2009, 2010, 2011]), 212)"
 230 |       ]
 231 |      },
 232 |      "execution_count": 8,
 233 |      "metadata": {},
 234 |      "output_type": "execute_result"
 235 |     }
 236 |    ],
 237 |    "source": [
 238 |     "## Calculate which and how many years we have in our data\n",
 239 |     "years = np.unique(st_temperatures[:, 0]).astype(np.int)\n",
 240 |     "years, len(years)"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "code",
 245 |    "execution_count": 10,
 246 |    "metadata": {},
 247 |    "outputs": [
 248 |     {
 249 |      "data": {
 250 |       "text/plain": [
 251 |        "(1800, 2011)"
 252 |       ]
 253 |      },
 254 |      "execution_count": 10,
 255 |      "metadata": {},
 256 |      "output_type": "execute_result"
 257 |     }
 258 |    ],
 259 |    "source": [
 260 |     "years.min(), years.max()"
 261 |    ]
 262 |   },
 263 |   {
 264 |    "cell_type": "code",
 265 |    "execution_count": 11,
 266 |    "metadata": {},
 267 |    "outputs": [
 268 |     {
 269 |      "name": "stdout",
 270 |      "output_type": "stream",
 271 |      "text": [
 272 |       "Year Month Day T_6 T12 T18 Valid \r\n",
 273 |       "1800  1  1    -6.1    -6.1    -6.1 1\r\n",
 274 |       "1800  1  2   -15.4   -15.4   -15.4 1\r\n",
 275 |       "1800  1  3   -15.0   -15.0   -15.0 1\r\n",
 276 |       "1800  1  4   -19.3   -19.3   -19.3 1\r\n",
 277 |       "1800  1  5   -16.8   -16.8   -16.8 1\r\n",
 278 |       "1800  1  6   -11.4   -11.4   -11.4 1\r\n",
 279 |       "1800  1  7    -7.6    -7.6    -7.6 1\r\n",
 280 |       "1800  1  8    -7.1    -7.1    -7.1 1\r\n",
 281 |       "1800  1  9   -10.1   -10.1   -10.1 1\r\n"
 282 |      ]
 283 |     }
 284 |    ],
 285 |    "source": [
 286 |     "!head stockholm_td_adj.dat"
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": 12,
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "mask_year = st_temperatures[:, 0] == 1984"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "code",
 300 |    "execution_count": 24,
 301 |    "metadata": {},
 302 |    "outputs": [],
 303 |    "source": [
 304 |     "mask_feb = st_temperatures[:, 1] == 2"
 305 |    ]
 306 |   },
 307 |   {
 308 |    "cell_type": "code",
 309 |    "execution_count": 25,
 310 |    "metadata": {},
 311 |    "outputs": [
 312 |     {
 313 |      "data": {
 314 |       "text/plain": [
 315 |        "(77431,)"
 316 |       ]
 317 |      },
 318 |      "execution_count": 25,
 319 |      "metadata": {},
 320 |      "output_type": "execute_result"
 321 |     }
 322 |    ],
 323 |    "source": [
 324 |     "mask_feb.shape"
 325 |    ]
 326 |   },
 327 |   {
 328 |    "cell_type": "code",
 329 |    "execution_count": 26,
 330 |    "metadata": {},
 331 |    "outputs": [
 332 |     {
 333 |      "data": {
 334 |       "text/plain": [
 335 |        "dtype('bool')"
 336 |       ]
 337 |      },
 338 |      "execution_count": 26,
 339 |      "metadata": {},
 340 |      "output_type": "execute_result"
 341 |     }
 342 |    ],
 343 |    "source": [
 344 |     "mask_year.dtype"
 345 |    ]
 346 |   },
 347 |   {
 348 |    "cell_type": "code",
 349 |    "execution_count": 27,
 350 |    "metadata": {},
 351 |    "outputs": [
 352 |     {
 353 |      "data": {
 354 |       "text/plain": [
 355 |        "numpy.ndarray"
 356 |       ]
 357 |      },
 358 |      "execution_count": 27,
 359 |      "metadata": {},
 360 |      "output_type": "execute_result"
 361 |     }
 362 |    ],
 363 |    "source": [
 364 |     "type(mask_year)"
 365 |    ]
 366 |   },
 367 |   {
 368 |    "cell_type": "code",
 369 |    "execution_count": 28,
 370 |    "metadata": {},
 371 |    "outputs": [],
 372 |    "source": [
 373 |     "## Calculate the mean temperature of mid-days on February in 1984\n",
 374 |     "feb_noon_temps = st_temperatures[(mask_year & mask_feb), 4]"
 375 |    ]
 376 |   },
 377 |   {
 378 |    "cell_type": "code",
 379 |    "execution_count": 29,
 380 |    "metadata": {},
 381 |    "outputs": [
 382 |     {
 383 |      "data": {
 384 |       "text/plain": [
 385 |        "numpy.ndarray"
 386 |       ]
 387 |      },
 388 |      "execution_count": 29,
 389 |      "metadata": {},
 390 |      "output_type": "execute_result"
 391 |     }
 392 |    ],
 393 |    "source": [
 394 |     "type(feb_noon_temps)"
 395 |    ]
 396 |   },
 397 |   {
 398 |    "cell_type": "code",
 399 |    "execution_count": 30,
 400 |    "metadata": {},
 401 |    "outputs": [
 402 |     {
 403 |      "data": {
 404 |       "text/plain": [
 405 |        "dtype('float64')"
 406 |       ]
 407 |      },
 408 |      "execution_count": 30,
 409 |      "metadata": {},
 410 |      "output_type": "execute_result"
 411 |     }
 412 |    ],
 413 |    "source": [
 414 |     "feb_noon_temps.dtype"
 415 |    ]
 416 |   },
 417 |   {
 418 |    "cell_type": "code",
 419 |    "execution_count": 31,
 420 |    "metadata": {},
 421 |    "outputs": [
 422 |     {
 423 |      "data": {
 424 |       "text/plain": [
 425 |        "-1.7344827586206901"
 426 |       ]
 427 |      },
 428 |      "execution_count": 31,
 429 |      "metadata": {},
 430 |      "output_type": "execute_result"
 431 |     }
 432 |    ],
 433 |    "source": [
 434 |     "feb_noon_temps.mean()"
 435 |    ]
 436 |   },
 437 |   {
 438 |    "cell_type": "code",
 439 |    "execution_count": 21,
 440 |    "metadata": {},
 441 |    "outputs": [],
 442 |    "source": [
 443 |     "## ...."
 444 |    ]
 445 |   },
 446 |   {
 447 |    "cell_type": "markdown",
 448 |    "metadata": {
 449 |     "slideshow": {
 450 |      "slide_type": "slide"
 451 |     }
 452 |    },
 453 |    "source": [
 454 |     "## Numpy's native file format"
 455 |    ]
 456 |   },
 457 |   {
 458 |    "cell_type": "markdown",
 459 |    "metadata": {
 460 |     "slideshow": {
 461 |      "slide_type": "subslide"
 462 |     }
 463 |    },
 464 |    "source": [
 465 |     "* Useful when storing and reading back numpy array data. \n",
 466 |     "\n",
 467 |     "* Use the functions `np.save` and `np.load`:"
 468 |    ]
 469 |   },
 470 |   {
 471 |    "cell_type": "markdown",
 472 |    "metadata": {
 473 |     "slideshow": {
 474 |      "slide_type": "subslide"
 475 |     }
 476 |    },
 477 |    "source": [
 478 |     "### `np.save`"
 479 |    ]
 480 |   },
 481 |   {
 482 |    "cell_type": "code",
 483 |    "execution_count": 22,
 484 |    "metadata": {
 485 |     "jupyter": {
 486 |      "outputs_hidden": false
 487 |     },
 488 |     "slideshow": {
 489 |      "slide_type": "fragment"
 490 |     }
 491 |    },
 492 |    "outputs": [],
 493 |    "source": [
 494 |     "np.save(\"st_temperatures.npy\", st_temperatures)"
 495 |    ]
 496 |   },
 497 |   {
 498 |    "cell_type": "markdown",
 499 |    "metadata": {},
 500 |    "source": [
 501 |     "**See also**:\n",
 502 |     "\n",
 503 |     "- `np.savez` : save several NumPy arrays into one single file\n",
 504 |     "- `np.savez_compressed`\n",
 505 |     "- `np.savetxt`"
 506 |    ]
 507 |   },
 508 |   {
 509 |    "cell_type": "markdown",
 510 |    "metadata": {
 511 |     "slideshow": {
 512 |      "slide_type": "subslide"
 513 |     }
 514 |    },
 515 |    "source": [
 516 |     "### `np.load`"
 517 |    ]
 518 |   },
 519 |   {
 520 |    "cell_type": "code",
 521 |    "execution_count": 23,
 522 |    "metadata": {
 523 |     "jupyter": {
 524 |      "outputs_hidden": false
 525 |     },
 526 |     "slideshow": {
 527 |      "slide_type": "fragment"
 528 |     }
 529 |    },
 530 |    "outputs": [
 531 |     {
 532 |      "name": "stdout",
 533 |      "output_type": "stream",
 534 |      "text": [
 535 |       "(77431, 7) float64\n"
 536 |      ]
 537 |     }
 538 |    ],
 539 |    "source": [
 540 |     "T = np.load(\"st_temperatures.npy\")\n",
 541 |     "print(T.shape, T.dtype)"
 542 |    ]
 543 |   },
 544 |   {
 545 |    "cell_type": "markdown",
 546 |    "metadata": {},
 547 |    "source": [
 548 |     "---"
 549 |    ]
 550 |   },
 551 |   {
 552 |    "cell_type": "markdown",
 553 |    "metadata": {
 554 |     "slideshow": {
 555 |      "slide_type": "subslide"
 556 |     }
 557 |    },
 558 |    "source": [
 559 |     "## NumPy for Matlab Users (really?)\n",
 560 |     "\n",
 561 |     "\n",
 562 |     "If you are a MATLAB&reg; user I do recommend to read [Numpy for MATLAB Users](https://docs.scipy.org/doc/numpy-1.15.0/user/numpy-for-matlab-users.html)."
 563 |    ]
 564 |   },
 565 |   {
 566 |    "cell_type": "markdown",
 567 |    "metadata": {},
 568 |    "source": [
 569 |     "### Numpy can load and save native MATLAB® files:"
 570 |    ]
 571 |   },
 572 |   {
 573 |    "cell_type": "markdown",
 574 |    "metadata": {},
 575 |    "source": [
 576 |     "---"
 577 |    ]
 578 |   },
 579 |   {
 580 |    "cell_type": "markdown",
 581 |    "metadata": {
 582 |     "slideshow": {
 583 |      "slide_type": "slide"
 584 |     }
 585 |    },
 586 |    "source": [
 587 |     "### The `Matrix` Array Type"
 588 |    ]
 589 |   },
 590 |   {
 591 |    "cell_type": "markdown",
 592 |    "metadata": {
 593 |     "slideshow": {
 594 |      "slide_type": "subslide"
 595 |     }
 596 |    },
 597 |    "source": [
 598 |     "In addition to the `numpy.ndarray` type, NumPy also support a very specific data type called `Matrix`. \n",
 599 |     "\n",
 600 |     "This special type of object has been introduced to allow for API and programming compatibility with\n",
 601 |     "MATLAB®. \n",
 602 |     "\n",
 603 |     "**Note**: The most relevant feature of this new _array type_ is the behavior of the standard arithmetic operators `+, -, *` to use matrix algebra, which work as they would in MATLAB."
 604 |    ]
 605 |   },
 606 |   {
 607 |    "cell_type": "code",
 608 |    "execution_count": 2,
 609 |    "metadata": {
 610 |     "slideshow": {
 611 |      "slide_type": "subslide"
 612 |     }
 613 |    },
 614 |    "outputs": [],
 615 |    "source": [
 616 |     "from numpy import matrix"
 617 |    ]
 618 |   },
 619 |   {
 620 |    "cell_type": "code",
 621 |    "execution_count": 3,
 622 |    "metadata": {},
 623 |    "outputs": [],
 624 |    "source": [
 625 |     "a = np.arange(0, 5)\n",
 626 |     "A = np.array([[n+m*10 for n in range(5)] for m in range(5)])"
 627 |    ]
 628 |   },
 629 |   {
 630 |    "cell_type": "code",
 631 |    "execution_count": 4,
 632 |    "metadata": {},
 633 |    "outputs": [
 634 |     {
 635 |      "data": {
 636 |       "text/plain": [
 637 |        "array([0, 1, 2, 3, 4])"
 638 |       ]
 639 |      },
 640 |      "execution_count": 4,
 641 |      "metadata": {},
 642 |      "output_type": "execute_result"
 643 |     }
 644 |    ],
 645 |    "source": [
 646 |     "a"
 647 |    ]
 648 |   },
 649 |   {
 650 |    "cell_type": "code",
 651 |    "execution_count": 5,
 652 |    "metadata": {},
 653 |    "outputs": [
 654 |     {
 655 |      "data": {
 656 |       "text/plain": [
 657 |        "array([[ 0,  1,  2,  3,  4],\n",
 658 |        "       [10, 11, 12, 13, 14],\n",
 659 |        "       [20, 21, 22, 23, 24],\n",
 660 |        "       [30, 31, 32, 33, 34],\n",
 661 |        "       [40, 41, 42, 43, 44]])"
 662 |       ]
 663 |      },
 664 |      "execution_count": 5,
 665 |      "metadata": {},
 666 |      "output_type": "execute_result"
 667 |     }
 668 |    ],
 669 |    "source": [
 670 |     "A"
 671 |    ]
 672 |   },
 673 |   {
 674 |    "cell_type": "code",
 675 |    "execution_count": 6,
 676 |    "metadata": {
 677 |     "jupyter": {
 678 |      "outputs_hidden": false
 679 |     },
 680 |     "slideshow": {
 681 |      "slide_type": "fragment"
 682 |     }
 683 |    },
 684 |    "outputs": [],
 685 |    "source": [
 686 |     "M = matrix(A)\n",
 687 |     "v = matrix(a).T # make it a column vector"
 688 |    ]
 689 |   },
 690 |   {
 691 |    "cell_type": "code",
 692 |    "execution_count": 7,
 693 |    "metadata": {
 694 |     "jupyter": {
 695 |      "outputs_hidden": false
 696 |     },
 697 |     "slideshow": {
 698 |      "slide_type": "fragment"
 699 |     }
 700 |    },
 701 |    "outputs": [
 702 |     {
 703 |      "data": {
 704 |       "text/plain": [
 705 |        "array([0, 1, 2, 3, 4])"
 706 |       ]
 707 |      },
 708 |      "execution_count": 7,
 709 |      "metadata": {},
 710 |      "output_type": "execute_result"
 711 |     }
 712 |    ],
 713 |    "source": [
 714 |     "a"
 715 |    ]
 716 |   },
 717 |   {
 718 |    "cell_type": "code",
 719 |    "execution_count": 8,
 720 |    "metadata": {
 721 |     "jupyter": {
 722 |      "outputs_hidden": false
 723 |     },
 724 |     "slideshow": {
 725 |      "slide_type": "subslide"
 726 |     }
 727 |    },
 728 |    "outputs": [
 729 |     {
 730 |      "data": {
 731 |       "text/plain": [
 732 |        "matrix([[ 300,  310,  320,  330,  340],\n",
 733 |        "        [1300, 1360, 1420, 1480, 1540],\n",
 734 |        "        [2300, 2410, 2520, 2630, 2740],\n",
 735 |        "        [3300, 3460, 3620, 3780, 3940],\n",
 736 |        "        [4300, 4510, 4720, 4930, 5140]])"
 737 |       ]
 738 |      },
 739 |      "execution_count": 8,
 740 |      "metadata": {},
 741 |      "output_type": "execute_result"
 742 |     }
 743 |    ],
 744 |    "source": [
 745 |     "M * M"
 746 |    ]
 747 |   },
 748 |   {
 749 |    "cell_type": "code",
 750 |    "execution_count": 9,
 751 |    "metadata": {},
 752 |    "outputs": [
 753 |     {
 754 |      "data": {
 755 |       "text/plain": [
 756 |        "array([[ 300,  310,  320,  330,  340],\n",
 757 |        "       [1300, 1360, 1420, 1480, 1540],\n",
 758 |        "       [2300, 2410, 2520, 2630, 2740],\n",
 759 |        "       [3300, 3460, 3620, 3780, 3940],\n",
 760 |        "       [4300, 4510, 4720, 4930, 5140]])"
 761 |       ]
 762 |      },
 763 |      "execution_count": 9,
 764 |      "metadata": {},
 765 |      "output_type": "execute_result"
 766 |     }
 767 |    ],
 768 |    "source": [
 769 |     "A @ A  # @ operator equivalent to np.dot(A, A)"
 770 |    ]
 771 |   },
 772 |   {
 773 |    "cell_type": "code",
 774 |    "execution_count": 10,
 775 |    "metadata": {},
 776 |    "outputs": [
 777 |     {
 778 |      "data": {
 779 |       "text/plain": [
 780 |        "array([[   0,    1,    4,    9,   16],\n",
 781 |        "       [ 100,  121,  144,  169,  196],\n",
 782 |        "       [ 400,  441,  484,  529,  576],\n",
 783 |        "       [ 900,  961, 1024, 1089, 1156],\n",
 784 |        "       [1600, 1681, 1764, 1849, 1936]])"
 785 |       ]
 786 |      },
 787 |      "execution_count": 10,
 788 |      "metadata": {},
 789 |      "output_type": "execute_result"
 790 |     }
 791 |    ],
 792 |    "source": [
 793 |     "# Element wise multiplication in NumPy\n",
 794 |     "A * A"
 795 |    ]
 796 |   },
 797 |   {
 798 |    "cell_type": "code",
 799 |    "execution_count": 11,
 800 |    "metadata": {
 801 |     "jupyter": {
 802 |      "outputs_hidden": false
 803 |     },
 804 |     "slideshow": {
 805 |      "slide_type": "subslide"
 806 |     }
 807 |    },
 808 |    "outputs": [
 809 |     {
 810 |      "data": {
 811 |       "text/plain": [
 812 |        "matrix([[ 30],\n",
 813 |        "        [130],\n",
 814 |        "        [230],\n",
 815 |        "        [330],\n",
 816 |        "        [430]])"
 817 |       ]
 818 |      },
 819 |      "execution_count": 11,
 820 |      "metadata": {},
 821 |      "output_type": "execute_result"
 822 |     }
 823 |    ],
 824 |    "source": [
 825 |     "M * v"
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "code",
 830 |    "execution_count": 12,
 831 |    "metadata": {},
 832 |    "outputs": [
 833 |     {
 834 |      "data": {
 835 |       "text/plain": [
 836 |        "array([[  0,   1,   4,   9,  16],\n",
 837 |        "       [  0,  11,  24,  39,  56],\n",
 838 |        "       [  0,  21,  44,  69,  96],\n",
 839 |        "       [  0,  31,  64,  99, 136],\n",
 840 |        "       [  0,  41,  84, 129, 176]])"
 841 |       ]
 842 |      },
 843 |      "execution_count": 12,
 844 |      "metadata": {},
 845 |      "output_type": "execute_result"
 846 |     }
 847 |    ],
 848 |    "source": [
 849 |     "A * a"
 850 |    ]
 851 |   },
 852 |   {
 853 |    "cell_type": "code",
 854 |    "execution_count": 13,
 855 |    "metadata": {
 856 |     "jupyter": {
 857 |      "outputs_hidden": false
 858 |     },
 859 |     "slideshow": {
 860 |      "slide_type": "subslide"
 861 |     }
 862 |    },
 863 |    "outputs": [
 864 |     {
 865 |      "data": {
 866 |       "text/plain": [
 867 |        "matrix([[30]])"
 868 |       ]
 869 |      },
 870 |      "execution_count": 13,
 871 |      "metadata": {},
 872 |      "output_type": "execute_result"
 873 |     }
 874 |    ],
 875 |    "source": [
 876 |     "# inner product\n",
 877 |     "v.T * v"
 878 |    ]
 879 |   },
 880 |   {
 881 |    "cell_type": "code",
 882 |    "execution_count": 14,
 883 |    "metadata": {
 884 |     "jupyter": {
 885 |      "outputs_hidden": false
 886 |     },
 887 |     "slideshow": {
 888 |      "slide_type": "fragment"
 889 |     }
 890 |    },
 891 |    "outputs": [
 892 |     {
 893 |      "data": {
 894 |       "text/plain": [
 895 |        "matrix([[ 30],\n",
 896 |        "        [131],\n",
 897 |        "        [232],\n",
 898 |        "        [333],\n",
 899 |        "        [434]])"
 900 |       ]
 901 |      },
 902 |      "execution_count": 14,
 903 |      "metadata": {},
 904 |      "output_type": "execute_result"
 905 |     }
 906 |    ],
 907 |    "source": [
 908 |     "# with matrix objects, standard matrix algebra applies\n",
 909 |     "v + M*v"
 910 |    ]
 911 |   },
 912 |   {
 913 |    "cell_type": "markdown",
 914 |    "metadata": {
 915 |     "slideshow": {
 916 |      "slide_type": "subslide"
 917 |     }
 918 |    },
 919 |    "source": [
 920 |     "If we try to add, subtract or multiply objects with incomplatible shapes we get an error:"
 921 |    ]
 922 |   },
 923 |   {
 924 |    "cell_type": "code",
 925 |    "execution_count": 15,
 926 |    "metadata": {
 927 |     "jupyter": {
 928 |      "outputs_hidden": false
 929 |     },
 930 |     "slideshow": {
 931 |      "slide_type": "fragment"
 932 |     }
 933 |    },
 934 |    "outputs": [],
 935 |    "source": [
 936 |     "v_incompat = matrix(list(range(1, 7))).T"
 937 |    ]
 938 |   },
 939 |   {
 940 |    "cell_type": "code",
 941 |    "execution_count": 16,
 942 |    "metadata": {
 943 |     "jupyter": {
 944 |      "outputs_hidden": false
 945 |     },
 946 |     "slideshow": {
 947 |      "slide_type": "fragment"
 948 |     }
 949 |    },
 950 |    "outputs": [
 951 |     {
 952 |      "data": {
 953 |       "text/plain": [
 954 |        "((5, 5), (6, 1))"
 955 |       ]
 956 |      },
 957 |      "execution_count": 16,
 958 |      "metadata": {},
 959 |      "output_type": "execute_result"
 960 |     }
 961 |    ],
 962 |    "source": [
 963 |     "M.shape, v_incompat.shape"
 964 |    ]
 965 |   },
 966 |   {
 967 |    "cell_type": "code",
 968 |    "execution_count": 17,
 969 |    "metadata": {
 970 |     "jupyter": {
 971 |      "outputs_hidden": false
 972 |     },
 973 |     "slideshow": {
 974 |      "slide_type": "subslide"
 975 |     }
 976 |    },
 977 |    "outputs": [
 978 |     {
 979 |      "ename": "ValueError",
 980 |      "evalue": "shapes (5,5) and (6,1) not aligned: 5 (dim 1) != 6 (dim 0)",
 981 |      "output_type": "error",
 982 |      "traceback": [
 983 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 984 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
 985 |       "\u001b[0;32m<ipython-input-17-bd00a30033f6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mM\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mv_incompat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 986 |       "\u001b[0;32m~/anaconda3/envs/numpy-euroscipy/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py\u001b[0m in \u001b[0;36m__mul__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    218\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m             \u001b[0;31m# This promotes 1-D vectors to row vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0masmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    221\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misscalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'__rmul__'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    222\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 987 |       "\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mdot\u001b[0;34m(*args, **kwargs)\u001b[0m\n",
 988 |       "\u001b[0;31mValueError\u001b[0m: shapes (5,5) and (6,1) not aligned: 5 (dim 1) != 6 (dim 0)"
 989 |      ]
 990 |     }
 991 |    ],
 992 |    "source": [
 993 |     "M * v_incompat"
 994 |    ]
 995 |   },
 996 |   {
 997 |    "cell_type": "markdown",
 998 |    "metadata": {
 999 |     "slideshow": {
1000 |      "slide_type": "subslide"
1001 |     }
1002 |    },
1003 |    "source": [
1004 |     "See also the related functions: `inner`, `outer`, `cross`, `kron`, `tensordot`. \n",
1005 |     "\n",
1006 |     "Try for example `help(inner)`."
1007 |    ]
1008 |   },
1009 |   {
1010 |    "cell_type": "markdown",
1011 |    "metadata": {},
1012 |    "source": [
1013 |     "---"
1014 |    ]
1015 |   },
1016 |   {
1017 |    "cell_type": "markdown",
1018 |    "metadata": {},
1019 |    "source": [
1020 |     "## Loading and Saving `.mat` file"
1021 |    ]
1022 |   },
1023 |   {
1024 |    "cell_type": "markdown",
1025 |    "metadata": {},
1026 |    "source": [
1027 |     "Let's create a `numpy.ndarray` object"
1028 |    ]
1029 |   },
1030 |   {
1031 |    "cell_type": "code",
1032 |    "execution_count": 21,
1033 |    "metadata": {
1034 |     "slideshow": {
1035 |      "slide_type": "fragment"
1036 |     }
1037 |    },
1038 |    "outputs": [],
1039 |    "source": [
1040 |     "A = np.random.rand(10000, 300, 50)  # note: this may take a while"
1041 |    ]
1042 |   },
1043 |   {
1044 |    "cell_type": "code",
1045 |    "execution_count": 22,
1046 |    "metadata": {
1047 |     "jupyter": {
1048 |      "outputs_hidden": false
1049 |     },
1050 |     "slideshow": {
1051 |      "slide_type": "subslide"
1052 |     }
1053 |    },
1054 |    "outputs": [
1055 |     {
1056 |      "data": {
1057 |       "text/plain": [
1058 |        "array([[[0.30788845, 0.60569692, 0.74159203, ..., 0.99513856,\n",
1059 |        "         0.86615676, 0.65581839],\n",
1060 |        "        [0.29972906, 0.1727805 , 0.73877596, ..., 0.57321798,\n",
1061 |        "         0.52657155, 0.15148499],\n",
1062 |        "        [0.91677054, 0.30289045, 0.47086303, ..., 0.91076997,\n",
1063 |        "         0.15659756, 0.74502433],\n",
1064 |        "        ...,\n",
1065 |        "        [0.16246413, 0.57601666, 0.64519549, ..., 0.04166688,\n",
1066 |        "         0.71115738, 0.75984878],\n",
1067 |        "        [0.99626814, 0.89529207, 0.89520696, ..., 0.927474  ,\n",
1068 |        "         0.46998733, 0.809978  ],\n",
1069 |        "        [0.52545775, 0.42922203, 0.40999633, ..., 0.7497839 ,\n",
1070 |        "         0.26582518, 0.68821719]],\n",
1071 |        "\n",
1072 |        "       [[0.93763072, 0.68660253, 0.03060252, ..., 0.08489496,\n",
1073 |        "         0.3368953 , 0.0040575 ],\n",
1074 |        "        [0.17680589, 0.44922269, 0.32552186, ..., 0.49081397,\n",
1075 |        "         0.7718607 , 0.91216332],\n",
1076 |        "        [0.48935017, 0.28293444, 0.57762148, ..., 0.64988995,\n",
1077 |        "         0.96036063, 0.62395338],\n",
1078 |        "        ...,\n",
1079 |        "        [0.77554755, 0.23174591, 0.80126054, ..., 0.34982511,\n",
1080 |        "         0.13648038, 0.63953428],\n",
1081 |        "        [0.4502637 , 0.74376194, 0.47531237, ..., 0.94077276,\n",
1082 |        "         0.64544446, 0.20241967],\n",
1083 |        "        [0.65158873, 0.93520847, 0.1153165 , ..., 0.92607143,\n",
1084 |        "         0.42194542, 0.49231582]],\n",
1085 |        "\n",
1086 |        "       [[0.60652634, 0.55707594, 0.7861307 , ..., 0.49618863,\n",
1087 |        "         0.26073645, 0.57230289],\n",
1088 |        "        [0.33445447, 0.51254754, 0.89760192, ..., 0.20161607,\n",
1089 |        "         0.54935607, 0.97355349],\n",
1090 |        "        [0.82742407, 0.13811956, 0.77549593, ..., 0.97417726,\n",
1091 |        "         0.75828111, 0.20726388],\n",
1092 |        "        ...,\n",
1093 |        "        [0.89885131, 0.95168761, 0.04908857, ..., 0.26560786,\n",
1094 |        "         0.19828306, 0.34056713],\n",
1095 |        "        [0.37462286, 0.00294645, 0.46417234, ..., 0.98287275,\n",
1096 |        "         0.63560479, 0.37498829],\n",
1097 |        "        [0.80824186, 0.77414402, 0.27137252, ..., 0.97397635,\n",
1098 |        "         0.73792667, 0.47235421]],\n",
1099 |        "\n",
1100 |        "       ...,\n",
1101 |        "\n",
1102 |        "       [[0.79534194, 0.19495982, 0.69419483, ..., 0.98484659,\n",
1103 |        "         0.07524489, 0.35898295],\n",
1104 |        "        [0.75246125, 0.1448565 , 0.31596133, ..., 0.97989236,\n",
1105 |        "         0.66466035, 0.09253075],\n",
1106 |        "        [0.13218267, 0.24674062, 0.93687433, ..., 0.26530807,\n",
1107 |        "         0.64653497, 0.25848279],\n",
1108 |        "        ...,\n",
1109 |        "        [0.01839164, 0.4127106 , 0.36428583, ..., 0.97212349,\n",
1110 |        "         0.867556  , 0.58971199],\n",
1111 |        "        [0.49075206, 0.80264193, 0.82420669, ..., 0.13249282,\n",
1112 |        "         0.70465219, 0.97575252],\n",
1113 |        "        [0.2735621 , 0.37780973, 0.19581884, ..., 0.55415141,\n",
1114 |        "         0.33630774, 0.62376131]],\n",
1115 |        "\n",
1116 |        "       [[0.95740591, 0.6409855 , 0.29668168, ..., 0.85582114,\n",
1117 |        "         0.02653775, 0.07433918],\n",
1118 |        "        [0.97968508, 0.7192658 , 0.96627464, ..., 0.25708965,\n",
1119 |        "         0.60037787, 0.8001345 ],\n",
1120 |        "        [0.98598865, 0.7660025 , 0.05743886, ..., 0.84864957,\n",
1121 |        "         0.5717346 , 0.48107095],\n",
1122 |        "        ...,\n",
1123 |        "        [0.04048004, 0.24279597, 0.43556563, ..., 0.74962769,\n",
1124 |        "         0.71872639, 0.08429666],\n",
1125 |        "        [0.09697323, 0.51034331, 0.6199531 , ..., 0.95157892,\n",
1126 |        "         0.52082535, 0.36331146],\n",
1127 |        "        [0.91967882, 0.47842183, 0.55403126, ..., 0.99053768,\n",
1128 |        "         0.68606411, 0.4186365 ]],\n",
1129 |        "\n",
1130 |        "       [[0.83101977, 0.7800826 , 0.52552153, ..., 0.45411436,\n",
1131 |        "         0.96688267, 0.14787061],\n",
1132 |        "        [0.76365986, 0.97841123, 0.99583821, ..., 0.96043423,\n",
1133 |        "         0.72406206, 0.97100977],\n",
1134 |        "        [0.92772653, 0.01373546, 0.59448744, ..., 0.64587074,\n",
1135 |        "         0.13641851, 0.40625453],\n",
1136 |        "        ...,\n",
1137 |        "        [0.24169963, 0.22511255, 0.85599095, ..., 0.75448232,\n",
1138 |        "         0.42633244, 0.31373371],\n",
1139 |        "        [0.28480721, 0.83815003, 0.77828307, ..., 0.52597019,\n",
1140 |        "         0.88834579, 0.09847287],\n",
1141 |        "        [0.32613764, 0.67313394, 0.82862416, ..., 0.87137257,\n",
1142 |        "         0.13503096, 0.0888404 ]]])"
1143 |       ]
1144 |      },
1145 |      "execution_count": 22,
1146 |      "metadata": {},
1147 |      "output_type": "execute_result"
1148 |     }
1149 |    ],
1150 |    "source": [
1151 |     "A"
1152 |    ]
1153 |   },
1154 |   {
1155 |    "cell_type": "markdown",
1156 |    "metadata": {},
1157 |    "source": [
1158 |     "### Introducing SciPy (ecosystem)"
1159 |    ]
1160 |   },
1161 |   {
1162 |    "cell_type": "markdown",
1163 |    "metadata": {},
1164 |    "source": [
1165 |     "![scipy](images/scipy.png)"
1166 |    ]
1167 |   },
1168 |   {
1169 |    "cell_type": "markdown",
1170 |    "metadata": {},
1171 |    "source": [
1172 |     "### `scipy.io`"
1173 |    ]
1174 |   },
1175 |   {
1176 |    "cell_type": "code",
1177 |    "execution_count": 20,
1178 |    "metadata": {
1179 |     "jupyter": {
1180 |      "outputs_hidden": false
1181 |     },
1182 |     "slideshow": {
1183 |      "slide_type": "fragment"
1184 |     }
1185 |    },
1186 |    "outputs": [],
1187 |    "source": [
1188 |     "from scipy import io as spio"
1189 |    ]
1190 |   },
1191 |   {
1192 |    "cell_type": "markdown",
1193 |    "metadata": {},
1194 |    "source": [
1195 |     "### NumPy $\\mapsto$ MATLAB :  `scipy.io.savemat`"
1196 |    ]
1197 |   },
1198 |   {
1199 |    "cell_type": "code",
1200 |    "execution_count": 23,
1201 |    "metadata": {},
1202 |    "outputs": [],
1203 |    "source": [
1204 |     "spio.savemat('numpy_to.mat', {'A': A}, oned_as='row')  # savemat expects a dictionary"
1205 |    ]
1206 |   },
1207 |   {
1208 |    "cell_type": "markdown",
1209 |    "metadata": {},
1210 |    "source": [
1211 |     "MATLAB $\\mapsto$ NumPy: `scipy.io.loadmat`"
1212 |    ]
1213 |   },
1214 |   {
1215 |    "cell_type": "code",
1216 |    "execution_count": 24,
1217 |    "metadata": {},
1218 |    "outputs": [],
1219 |    "source": [
1220 |     "data_dictionary = spio.loadmat('numpy_to.mat')\n"
1221 |    ]
1222 |   },
1223 |   {
1224 |    "cell_type": "code",
1225 |    "execution_count": 25,
1226 |    "metadata": {},
1227 |    "outputs": [
1228 |     {
1229 |      "data": {
1230 |       "text/plain": [
1231 |        "['__header__', '__version__', '__globals__', 'A']"
1232 |       ]
1233 |      },
1234 |      "execution_count": 25,
1235 |      "metadata": {},
1236 |      "output_type": "execute_result"
1237 |     }
1238 |    ],
1239 |    "source": [
1240 |     "list(data_dictionary.keys())"
1241 |    ]
1242 |   },
1243 |   {
1244 |    "cell_type": "code",
1245 |    "execution_count": 26,
1246 |    "metadata": {},
1247 |    "outputs": [
1248 |     {
1249 |      "data": {
1250 |       "text/plain": [
1251 |        "array([[[0.30788845, 0.60569692, 0.74159203, ..., 0.99513856,\n",
1252 |        "         0.86615676, 0.65581839],\n",
1253 |        "        [0.29972906, 0.1727805 , 0.73877596, ..., 0.57321798,\n",
1254 |        "         0.52657155, 0.15148499],\n",
1255 |        "        [0.91677054, 0.30289045, 0.47086303, ..., 0.91076997,\n",
1256 |        "         0.15659756, 0.74502433],\n",
1257 |        "        ...,\n",
1258 |        "        [0.16246413, 0.57601666, 0.64519549, ..., 0.04166688,\n",
1259 |        "         0.71115738, 0.75984878],\n",
1260 |        "        [0.99626814, 0.89529207, 0.89520696, ..., 0.927474  ,\n",
1261 |        "         0.46998733, 0.809978  ],\n",
1262 |        "        [0.52545775, 0.42922203, 0.40999633, ..., 0.7497839 ,\n",
1263 |        "         0.26582518, 0.68821719]],\n",
1264 |        "\n",
1265 |        "       [[0.93763072, 0.68660253, 0.03060252, ..., 0.08489496,\n",
1266 |        "         0.3368953 , 0.0040575 ],\n",
1267 |        "        [0.17680589, 0.44922269, 0.32552186, ..., 0.49081397,\n",
1268 |        "         0.7718607 , 0.91216332],\n",
1269 |        "        [0.48935017, 0.28293444, 0.57762148, ..., 0.64988995,\n",
1270 |        "         0.96036063, 0.62395338],\n",
1271 |        "        ...,\n",
1272 |        "        [0.77554755, 0.23174591, 0.80126054, ..., 0.34982511,\n",
1273 |        "         0.13648038, 0.63953428],\n",
1274 |        "        [0.4502637 , 0.74376194, 0.47531237, ..., 0.94077276,\n",
1275 |        "         0.64544446, 0.20241967],\n",
1276 |        "        [0.65158873, 0.93520847, 0.1153165 , ..., 0.92607143,\n",
1277 |        "         0.42194542, 0.49231582]],\n",
1278 |        "\n",
1279 |        "       [[0.60652634, 0.55707594, 0.7861307 , ..., 0.49618863,\n",
1280 |        "         0.26073645, 0.57230289],\n",
1281 |        "        [0.33445447, 0.51254754, 0.89760192, ..., 0.20161607,\n",
1282 |        "         0.54935607, 0.97355349],\n",
1283 |        "        [0.82742407, 0.13811956, 0.77549593, ..., 0.97417726,\n",
1284 |        "         0.75828111, 0.20726388],\n",
1285 |        "        ...,\n",
1286 |        "        [0.89885131, 0.95168761, 0.04908857, ..., 0.26560786,\n",
1287 |        "         0.19828306, 0.34056713],\n",
1288 |        "        [0.37462286, 0.00294645, 0.46417234, ..., 0.98287275,\n",
1289 |        "         0.63560479, 0.37498829],\n",
1290 |        "        [0.80824186, 0.77414402, 0.27137252, ..., 0.97397635,\n",
1291 |        "         0.73792667, 0.47235421]],\n",
1292 |        "\n",
1293 |        "       ...,\n",
1294 |        "\n",
1295 |        "       [[0.79534194, 0.19495982, 0.69419483, ..., 0.98484659,\n",
1296 |        "         0.07524489, 0.35898295],\n",
1297 |        "        [0.75246125, 0.1448565 , 0.31596133, ..., 0.97989236,\n",
1298 |        "         0.66466035, 0.09253075],\n",
1299 |        "        [0.13218267, 0.24674062, 0.93687433, ..., 0.26530807,\n",
1300 |        "         0.64653497, 0.25848279],\n",
1301 |        "        ...,\n",
1302 |        "        [0.01839164, 0.4127106 , 0.36428583, ..., 0.97212349,\n",
1303 |        "         0.867556  , 0.58971199],\n",
1304 |        "        [0.49075206, 0.80264193, 0.82420669, ..., 0.13249282,\n",
1305 |        "         0.70465219, 0.97575252],\n",
1306 |        "        [0.2735621 , 0.37780973, 0.19581884, ..., 0.55415141,\n",
1307 |        "         0.33630774, 0.62376131]],\n",
1308 |        "\n",
1309 |        "       [[0.95740591, 0.6409855 , 0.29668168, ..., 0.85582114,\n",
1310 |        "         0.02653775, 0.07433918],\n",
1311 |        "        [0.97968508, 0.7192658 , 0.96627464, ..., 0.25708965,\n",
1312 |        "         0.60037787, 0.8001345 ],\n",
1313 |        "        [0.98598865, 0.7660025 , 0.05743886, ..., 0.84864957,\n",
1314 |        "         0.5717346 , 0.48107095],\n",
1315 |        "        ...,\n",
1316 |        "        [0.04048004, 0.24279597, 0.43556563, ..., 0.74962769,\n",
1317 |        "         0.71872639, 0.08429666],\n",
1318 |        "        [0.09697323, 0.51034331, 0.6199531 , ..., 0.95157892,\n",
1319 |        "         0.52082535, 0.36331146],\n",
1320 |        "        [0.91967882, 0.47842183, 0.55403126, ..., 0.99053768,\n",
1321 |        "         0.68606411, 0.4186365 ]],\n",
1322 |        "\n",
1323 |        "       [[0.83101977, 0.7800826 , 0.52552153, ..., 0.45411436,\n",
1324 |        "         0.96688267, 0.14787061],\n",
1325 |        "        [0.76365986, 0.97841123, 0.99583821, ..., 0.96043423,\n",
1326 |        "         0.72406206, 0.97100977],\n",
1327 |        "        [0.92772653, 0.01373546, 0.59448744, ..., 0.64587074,\n",
1328 |        "         0.13641851, 0.40625453],\n",
1329 |        "        ...,\n",
1330 |        "        [0.24169963, 0.22511255, 0.85599095, ..., 0.75448232,\n",
1331 |        "         0.42633244, 0.31373371],\n",
1332 |        "        [0.28480721, 0.83815003, 0.77828307, ..., 0.52597019,\n",
1333 |        "         0.88834579, 0.09847287],\n",
1334 |        "        [0.32613764, 0.67313394, 0.82862416, ..., 0.87137257,\n",
1335 |        "         0.13503096, 0.0888404 ]]])"
1336 |       ]
1337 |      },
1338 |      "execution_count": 26,
1339 |      "metadata": {},
1340 |      "output_type": "execute_result"
1341 |     }
1342 |    ],
1343 |    "source": [
1344 |     "data_dictionary['A']"
1345 |    ]
1346 |   },
1347 |   {
1348 |    "cell_type": "code",
1349 |    "execution_count": 27,
1350 |    "metadata": {},
1351 |    "outputs": [],
1352 |    "source": [
1353 |     "A_load = data_dictionary['A']"
1354 |    ]
1355 |   },
1356 |   {
1357 |    "cell_type": "code",
1358 |    "execution_count": 28,
1359 |    "metadata": {},
1360 |    "outputs": [
1361 |     {
1362 |      "data": {
1363 |       "text/plain": [
1364 |        "True"
1365 |       ]
1366 |      },
1367 |      "execution_count": 28,
1368 |      "metadata": {},
1369 |      "output_type": "execute_result"
1370 |     }
1371 |    ],
1372 |    "source": [
1373 |     "np.all(A == A_load)"
1374 |    ]
1375 |   },
1376 |   {
1377 |    "cell_type": "code",
1378 |    "execution_count": 30,
1379 |    "metadata": {},
1380 |    "outputs": [
1381 |     {
1382 |      "data": {
1383 |       "text/plain": [
1384 |        "numpy.ndarray"
1385 |       ]
1386 |      },
1387 |      "execution_count": 30,
1388 |      "metadata": {},
1389 |      "output_type": "execute_result"
1390 |     }
1391 |    ],
1392 |    "source": [
1393 |     "type(A_load)"
1394 |    ]
1395 |   }
1396 |  ],
1397 |  "metadata": {
1398 |   "kernelspec": {
1399 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
1400 |    "language": "python",
1401 |    "name": "numpy-euroscipy"
1402 |   },
1403 |   "language_info": {
1404 |    "codemirror_mode": {
1405 |     "name": "ipython",
1406 |     "version": 3
1407 |    },
1408 |    "file_extension": ".py",
1409 |    "mimetype": "text/x-python",
1410 |    "name": "python",
1411 |    "nbconvert_exporter": "python",
1412 |    "pygments_lexer": "ipython3",
1413 |    "version": "3.7.3"
1414 |   }
1415 |  },
1416 |  "nbformat": 4,
1417 |  "nbformat_minor": 4
1418 | }
1419 | 


--------------------------------------------------------------------------------
/01_numpy_basics.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "slideshow": {
   7 |      "slide_type": "slide"
   8 |     }
   9 |    },
  10 |    "source": [
  11 |     "# What is Numpy"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "markdown",
  16 |    "metadata": {
  17 |     "slideshow": {
  18 |      "slide_type": "slide"
  19 |     }
  20 |    },
  21 |    "source": [
  22 |     "NumPy is the fundamental package for scientific computing with Python. \n",
  23 |     "It is a package that provide high-performance vector, matrix and higher-dimensional data structures for Python. \n",
  24 |     "It is implemented in C and Fortran so when calculations are **vectorized**, performance is very good.\n",
  25 |     "\n",
  26 |     "So, in a nutshell:\n",
  27 |     "\n",
  28 |     "* a powerful Python extension for N-dimensional array\n",
  29 |     "* a tool for integrating C/C++ and Fortran code\n",
  30 |     "* designed for scientific computation: linear algebra and Signal Analysis\n",
  31 |     "\n",
  32 |     "If you are a MATLAB&reg; user I do recommend to read [Numpy for MATLAB Users](https://docs.scipy.org/doc/numpy-1.15.0/user/numpy-for-matlab-users.html). \n",
  33 |     "\n",
  34 |     "I'm a supporter of the **Open Science Movement**, thus I humbly suggest you to take a look at the [Science Code Manifesto](http://sciencecodemanifesto.org/)"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "markdown",
  39 |    "metadata": {
  40 |     "slideshow": {
  41 |      "slide_type": "slide"
  42 |     }
  43 |    },
  44 |    "source": [
  45 |     "# Getting Started with Numpy Arrays"
  46 |    ]
  47 |   },
  48 |   {
  49 |    "cell_type": "markdown",
  50 |    "metadata": {
  51 |     "slideshow": {
  52 |      "slide_type": "slide"
  53 |     }
  54 |    },
  55 |    "source": [
  56 |     "NumPy's main object is the **homogeneous** ***multidimensional array***. It is a table of elements (usually numbers), all of the same type. \n",
  57 |     "\n",
  58 |     "In Numpy dimensions are called **axes**. \n",
  59 |     "\n",
  60 |     "The number of axes is called **rank**. \n",
  61 |     "\n",
  62 |     "The most important attributes of an ndarray object are:\n",
  63 |     "\n",
  64 |     "* **ndarray.ndim**     - the number of axes (dimensions) of the array. \n",
  65 |     "* **ndarray.shape**    - the dimensions of the array. For a matrix with n rows and m columns, shape will be (n,m). \n",
  66 |     "* **ndarray.size**     - the total number of elements of the array. \n",
  67 |     "* **ndarray.dtype**    - numpy.int32, numpy.int16, and numpy.float64 are some examples. \n",
  68 |     "* **ndarray.itemsize** - the size in bytes of elements of the array. For example, elements of type float64 has itemsize 8 (=64/8) "
  69 |    ]
  70 |   },
  71 |   {
  72 |    "cell_type": "markdown",
  73 |    "metadata": {
  74 |     "slideshow": {
  75 |      "slide_type": "slide"
  76 |     }
  77 |    },
  78 |    "source": [
  79 |     "To use `numpy` need to import the module it using of example:"
  80 |    ]
  81 |   },
  82 |   {
  83 |    "cell_type": "code",
  84 |    "execution_count": 2,
  85 |    "metadata": {
  86 |     "slideshow": {
  87 |      "slide_type": "fragment"
  88 |     }
  89 |    },
  90 |    "outputs": [],
  91 |    "source": [
  92 |     "import numpy as np  # naming import convention"
  93 |    ]
  94 |   },
  95 |   {
  96 |    "cell_type": "markdown",
  97 |    "metadata": {
  98 |     "slideshow": {
  99 |      "slide_type": "slide"
 100 |     }
 101 |    },
 102 |    "source": [
 103 |     "### Terminology Assumption"
 104 |    ]
 105 |   },
 106 |   {
 107 |    "cell_type": "markdown",
 108 |    "metadata": {
 109 |     "slideshow": {
 110 |      "slide_type": "-"
 111 |     }
 112 |    },
 113 |    "source": [
 114 |     "In the `numpy` package the terminology used for vectors, matrices and higher-dimensional data sets is *array*. "
 115 |    ]
 116 |   },
 117 |   {
 118 |    "cell_type": "markdown",
 119 |    "metadata": {
 120 |     "slideshow": {
 121 |      "slide_type": "slide"
 122 |     }
 123 |    },
 124 |    "source": [
 125 |     "### Reference Documentation"
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "markdown",
 130 |    "metadata": {
 131 |     "slideshow": {
 132 |      "slide_type": "subslide"
 133 |     }
 134 |    },
 135 |    "source": [
 136 |     "* On the web: [http://docs.scipy.org](http://docs.scipy.org)/\n",
 137 |     "\n",
 138 |     "* Interactive help:"
 139 |    ]
 140 |   },
 141 |   {
 142 |    "cell_type": "code",
 143 |    "execution_count": null,
 144 |    "metadata": {
 145 |     "slideshow": {
 146 |      "slide_type": "fragment"
 147 |     }
 148 |    },
 149 |    "outputs": [],
 150 |    "source": [
 151 |     "np.array?"
 152 |    ]
 153 |   },
 154 |   {
 155 |    "cell_type": "markdown",
 156 |    "metadata": {
 157 |     "slideshow": {
 158 |      "slide_type": "subslide"
 159 |     }
 160 |    },
 161 |    "source": [
 162 |     "If you're looking for something"
 163 |    ]
 164 |   },
 165 |   {
 166 |    "cell_type": "markdown",
 167 |    "metadata": {
 168 |     "slideshow": {
 169 |      "slide_type": "slide"
 170 |     }
 171 |    },
 172 |    "source": [
 173 |     "# Creating `numpy` arrays\n",
 174 |     "\n",
 175 |     "### Get acquainted with NumPy"
 176 |    ]
 177 |   },
 178 |   {
 179 |    "cell_type": "markdown",
 180 |    "metadata": {
 181 |     "slideshow": {
 182 |      "slide_type": "subslide"
 183 |     }
 184 |    },
 185 |    "source": [
 186 |     "Let's start by creating some `numpy.array` objects in order to get our hands into the very details of **numpy basic data structure**.\n",
 187 |     "\n",
 188 |     "NumPy is a very flexible library, and provides many ways to create (and initialize) new numpy arrays. \n",
 189 |     "\n",
 190 |     "One way is **using specific functions dedicated to generate numpy arrays** \n",
 191 |     "(usually, *array of numbers*)\\[+\\]\n",
 192 |     "\n",
 193 |     "\n",
 194 |     "\n",
 195 |     "\\[+\\] More on data types, later on !-)\n",
 196 |     "\n"
 197 |    ]
 198 |   },
 199 |   {
 200 |    "cell_type": "markdown",
 201 |    "metadata": {
 202 |     "slideshow": {
 203 |      "slide_type": "slide"
 204 |     }
 205 |    },
 206 |    "source": [
 207 |     "# First `numpy array` example: array of numbers"
 208 |    ]
 209 |   },
 210 |   {
 211 |    "cell_type": "markdown",
 212 |    "metadata": {
 213 |     "slideshow": {
 214 |      "slide_type": "subslide"
 215 |     }
 216 |    },
 217 |    "source": [
 218 |     "NumPy provides many functions to generate arrays with with specific properties (e.g. `size` or `shape`).\n",
 219 |     "\n",
 220 |     "We will see later examples in which we will generate `ndarray` using explicit Python lists. \n",
 221 |     "\n",
 222 |     "However, for larger arrays, using Python lists is simply inpractical. "
 223 |    ]
 224 |   },
 225 |   {
 226 |    "cell_type": "markdown",
 227 |    "metadata": {
 228 |     "slideshow": {
 229 |      "slide_type": "subslide"
 230 |     }
 231 |    },
 232 |    "source": [
 233 |     "### `np.arange`"
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "markdown",
 238 |    "metadata": {},
 239 |    "source": [
 240 |     "In standard Python, we use the `range` function to generate an **iterable** object of **integers** within a specific range (at a specified `step`, default: `1`)"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "code",
 245 |    "execution_count": 3,
 246 |    "metadata": {},
 247 |    "outputs": [
 248 |     {
 249 |      "name": "stdout",
 250 |      "output_type": "stream",
 251 |      "text": [
 252 |       "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
 253 |       "<class 'range'>\n"
 254 |      ]
 255 |     }
 256 |    ],
 257 |    "source": [
 258 |     "r = range(10)\n",
 259 |     "print(list(r))\n",
 260 |     "\n",
 261 |     "print(type(r))  # NOTE: if this print will return a <type 'list'> it means you're using Py2.7"
 262 |    ]
 263 |   },
 264 |   {
 265 |    "cell_type": "markdown",
 266 |    "metadata": {},
 267 |    "source": [
 268 |     "Similarly, in numpy there is the `arange` function which instead generates a `numpy.ndarray`"
 269 |    ]
 270 |   },
 271 |   {
 272 |    "cell_type": "code",
 273 |    "execution_count": 4,
 274 |    "metadata": {
 275 |     "slideshow": {
 276 |      "slide_type": "subslide"
 277 |     }
 278 |    },
 279 |    "outputs": [
 280 |     {
 281 |      "name": "stdout",
 282 |      "output_type": "stream",
 283 |      "text": [
 284 |       "[0 1 2 3 4 5 6 7 8 9]\n",
 285 |       "<class 'numpy.ndarray'>\n"
 286 |      ]
 287 |     }
 288 |    ],
 289 |    "source": [
 290 |     "ra = np.arange(10) \n",
 291 |     "print(ra)\n",
 292 |     "\n",
 293 |     "print(type(ra))"
 294 |    ]
 295 |   },
 296 |   {
 297 |    "cell_type": "markdown",
 298 |    "metadata": {},
 299 |    "source": [
 300 |     "However, we are working with the **Numerical Python** library, so we should expect more when it comes to numbers.\n",
 301 |     "\n",
 302 |     "In fact, we can create an array within a _floating point step-wise range_:"
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "code",
 307 |    "execution_count": 5,
 308 |    "metadata": {
 309 |     "slideshow": {
 310 |      "slide_type": "fragment"
 311 |     }
 312 |    },
 313 |    "outputs": [
 314 |     {
 315 |      "name": "stdout",
 316 |      "output_type": "stream",
 317 |      "text": [
 318 |       "[-1.00000000e+00 -9.00000000e-01 -8.00000000e-01 -7.00000000e-01\n",
 319 |       " -6.00000000e-01 -5.00000000e-01 -4.00000000e-01 -3.00000000e-01\n",
 320 |       " -2.00000000e-01 -1.00000000e-01 -2.22044605e-16  1.00000000e-01\n",
 321 |       "  2.00000000e-01  3.00000000e-01  4.00000000e-01  5.00000000e-01\n",
 322 |       "  6.00000000e-01  7.00000000e-01  8.00000000e-01  9.00000000e-01]\n"
 323 |      ]
 324 |     }
 325 |    ],
 326 |    "source": [
 327 |     "# floating point step-wise range generatation\n",
 328 |     "raf = np.arange(-1, 1, 0.1)  \n",
 329 |     "print(raf)"
 330 |    ]
 331 |   },
 332 |   {
 333 |    "cell_type": "markdown",
 334 |    "metadata": {},
 335 |    "source": [
 336 |     "### Properties of `numpy array`"
 337 |    ]
 338 |   },
 339 |   {
 340 |    "cell_type": "markdown",
 341 |    "metadata": {},
 342 |    "source": [
 343 |     "Apart from the actual content, which is of course different because specified ranges are different, the `ra` and `raf` arrays differ by their **`dtype`**:"
 344 |    ]
 345 |   },
 346 |   {
 347 |    "cell_type": "code",
 348 |    "execution_count": 6,
 349 |    "metadata": {},
 350 |    "outputs": [
 351 |     {
 352 |      "name": "stdout",
 353 |      "output_type": "stream",
 354 |      "text": [
 355 |       "dtype of 'ra': int64, dtype of 'raf': float64\n"
 356 |      ]
 357 |     }
 358 |    ],
 359 |    "source": [
 360 |     "print(f\"dtype of 'ra': {ra.dtype}, dtype of 'raf': {raf.dtype}\")"
 361 |    ]
 362 |   },
 363 |   {
 364 |    "cell_type": "markdown",
 365 |    "metadata": {
 366 |     "slideshow": {
 367 |      "slide_type": "subslide"
 368 |     }
 369 |    },
 370 |    "source": [
 371 |     "#### More properties of the `numpy array`"
 372 |    ]
 373 |   },
 374 |   {
 375 |    "cell_type": "code",
 376 |    "execution_count": 7,
 377 |    "metadata": {
 378 |     "slideshow": {
 379 |      "slide_type": "fragment"
 380 |     }
 381 |    },
 382 |    "outputs": [
 383 |     {
 384 |      "data": {
 385 |       "text/plain": [
 386 |        "8"
 387 |       ]
 388 |      },
 389 |      "execution_count": 7,
 390 |      "metadata": {},
 391 |      "output_type": "execute_result"
 392 |     }
 393 |    ],
 394 |    "source": [
 395 |     "ra.itemsize # bytes per element"
 396 |    ]
 397 |   },
 398 |   {
 399 |    "cell_type": "code",
 400 |    "execution_count": 8,
 401 |    "metadata": {
 402 |     "slideshow": {
 403 |      "slide_type": "fragment"
 404 |     }
 405 |    },
 406 |    "outputs": [
 407 |     {
 408 |      "data": {
 409 |       "text/plain": [
 410 |        "80"
 411 |       ]
 412 |      },
 413 |      "execution_count": 8,
 414 |      "metadata": {},
 415 |      "output_type": "execute_result"
 416 |     }
 417 |    ],
 418 |    "source": [
 419 |     "ra.nbytes # number of bytes"
 420 |    ]
 421 |   },
 422 |   {
 423 |    "cell_type": "code",
 424 |    "execution_count": 9,
 425 |    "metadata": {
 426 |     "slideshow": {
 427 |      "slide_type": "fragment"
 428 |     }
 429 |    },
 430 |    "outputs": [
 431 |     {
 432 |      "data": {
 433 |       "text/plain": [
 434 |        "1"
 435 |       ]
 436 |      },
 437 |      "execution_count": 9,
 438 |      "metadata": {},
 439 |      "output_type": "execute_result"
 440 |     }
 441 |    ],
 442 |    "source": [
 443 |     "ra.ndim # number of dimensions"
 444 |    ]
 445 |   },
 446 |   {
 447 |    "cell_type": "code",
 448 |    "execution_count": 10,
 449 |    "metadata": {},
 450 |    "outputs": [
 451 |     {
 452 |      "data": {
 453 |       "text/plain": [
 454 |        "(10,)"
 455 |       ]
 456 |      },
 457 |      "execution_count": 10,
 458 |      "metadata": {},
 459 |      "output_type": "execute_result"
 460 |     }
 461 |    ],
 462 |    "source": [
 463 |     "ra.shape # shape, i.e. number of elements per-dimension/axis"
 464 |    ]
 465 |   },
 466 |   {
 467 |    "cell_type": "code",
 468 |    "execution_count": null,
 469 |    "metadata": {},
 470 |    "outputs": [],
 471 |    "source": [
 472 |     "## please replicate the same set of operations here for `raf`\n"
 473 |    ]
 474 |   },
 475 |   {
 476 |    "cell_type": "code",
 477 |    "execution_count": null,
 478 |    "metadata": {},
 479 |    "outputs": [],
 480 |    "source": [
 481 |     "# your code here"
 482 |    ]
 483 |   },
 484 |   {
 485 |    "cell_type": "markdown",
 486 |    "metadata": {},
 487 |    "source": [
 488 |     "**Q**: Do you notice any relevant difference?"
 489 |    ]
 490 |   },
 491 |   {
 492 |    "cell_type": "markdown",
 493 |    "metadata": {
 494 |     "slideshow": {
 495 |      "slide_type": "subslide"
 496 |     }
 497 |    },
 498 |    "source": [
 499 |     "### `np.linspace` and `np.logspace`"
 500 |    ]
 501 |   },
 502 |   {
 503 |    "cell_type": "markdown",
 504 |    "metadata": {},
 505 |    "source": [
 506 |     "Like `np.arange`, in numpy there are other two \"similar\" functions: \n",
 507 |     "\n",
 508 |     "- np.linspace\n",
 509 |     "- np.logspace\n",
 510 |     "\n",
 511 |     "Looking at the examples below, can you spot the difference?"
 512 |    ]
 513 |   },
 514 |   {
 515 |    "cell_type": "code",
 516 |    "execution_count": 11,
 517 |    "metadata": {
 518 |     "slideshow": {
 519 |      "slide_type": "subslide"
 520 |     }
 521 |    },
 522 |    "outputs": [
 523 |     {
 524 |      "data": {
 525 |       "text/plain": [
 526 |        "array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,\n",
 527 |        "        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,\n",
 528 |        "        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,\n",
 529 |        "        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])"
 530 |       ]
 531 |      },
 532 |      "execution_count": 11,
 533 |      "metadata": {},
 534 |      "output_type": "execute_result"
 535 |     }
 536 |    ],
 537 |    "source": [
 538 |     "np.linspace(0, 10, 20)"
 539 |    ]
 540 |   },
 541 |   {
 542 |    "cell_type": "code",
 543 |    "execution_count": 12,
 544 |    "metadata": {
 545 |     "slideshow": {
 546 |      "slide_type": "fragment"
 547 |     }
 548 |    },
 549 |    "outputs": [
 550 |     {
 551 |      "data": {
 552 |       "text/plain": [
 553 |        "array([1.00000000e+00, 2.27278564e+00, 5.16555456e+00, 1.17401982e+01,\n",
 554 |        "       2.66829540e+01, 6.06446346e+01, 1.37832255e+02, 3.13263169e+02,\n",
 555 |        "       7.11980032e+02, 1.61817799e+03])"
 556 |       ]
 557 |      },
 558 |      "execution_count": 12,
 559 |      "metadata": {},
 560 |      "output_type": "execute_result"
 561 |     }
 562 |    ],
 563 |    "source": [
 564 |     "np.logspace(0, np.e**2, 10, base=np.e)"
 565 |    ]
 566 |   },
 567 |   {
 568 |    "cell_type": "markdown",
 569 |    "metadata": {
 570 |     "slideshow": {
 571 |      "slide_type": "subslide"
 572 |     }
 573 |    },
 574 |    "source": [
 575 |     "## Random Number Generation\n",
 576 |     "\n",
 577 |     "### `np.random.rand` & `np.random.randn`"
 578 |    ]
 579 |   },
 580 |   {
 581 |    "cell_type": "code",
 582 |    "execution_count": 13,
 583 |    "metadata": {
 584 |     "slideshow": {
 585 |      "slide_type": "subslide"
 586 |     }
 587 |    },
 588 |    "outputs": [],
 589 |    "source": [
 590 |     "# uniform random numbers in [0,1]\n",
 591 |     "ru = np.random.rand(10)"
 592 |    ]
 593 |   },
 594 |   {
 595 |    "cell_type": "code",
 596 |    "execution_count": 14,
 597 |    "metadata": {},
 598 |    "outputs": [
 599 |     {
 600 |      "data": {
 601 |       "text/plain": [
 602 |        "array([0.06629061, 0.56102955, 0.81081042, 0.80936217, 0.19182628,\n",
 603 |        "       0.78609316, 0.88379009, 0.45329187, 0.84304588, 0.56232631])"
 604 |       ]
 605 |      },
 606 |      "execution_count": 14,
 607 |      "metadata": {},
 608 |      "output_type": "execute_result"
 609 |     }
 610 |    ],
 611 |    "source": [
 612 |     "ru"
 613 |    ]
 614 |   },
 615 |   {
 616 |    "cell_type": "markdown",
 617 |    "metadata": {},
 618 |    "source": [
 619 |     "_Note: numbers and the content of the array may vary_"
 620 |    ]
 621 |   },
 622 |   {
 623 |    "cell_type": "code",
 624 |    "execution_count": 15,
 625 |    "metadata": {
 626 |     "slideshow": {
 627 |      "slide_type": "subslide"
 628 |     }
 629 |    },
 630 |    "outputs": [],
 631 |    "source": [
 632 |     "# standard normal distributed random numbers\n",
 633 |     "rs = np.random.randn(10)"
 634 |    ]
 635 |   },
 636 |   {
 637 |    "cell_type": "code",
 638 |    "execution_count": 16,
 639 |    "metadata": {},
 640 |    "outputs": [
 641 |     {
 642 |      "data": {
 643 |       "text/plain": [
 644 |        "array([ 0.45052791, -0.80566857, -0.10401981,  0.91948746, -0.0329787 ,\n",
 645 |        "       -0.71872119,  1.42738938, -0.63292836,  0.5397375 ,  0.89186053])"
 646 |       ]
 647 |      },
 648 |      "execution_count": 16,
 649 |      "metadata": {},
 650 |      "output_type": "execute_result"
 651 |     }
 652 |    ],
 653 |    "source": [
 654 |     "rs"
 655 |    ]
 656 |   },
 657 |   {
 658 |    "cell_type": "markdown",
 659 |    "metadata": {},
 660 |    "source": [
 661 |     "_Note: numbers and the content of the array may vary_"
 662 |    ]
 663 |   },
 664 |   {
 665 |    "cell_type": "markdown",
 666 |    "metadata": {},
 667 |    "source": [
 668 |     "**Q**: What if I ask you to generate random numbers in a way that we both obtain the __very same__ numbers? (_Provided we share the same CPU architecture_)"
 669 |    ]
 670 |   },
 671 |   {
 672 |    "cell_type": "markdown",
 673 |    "metadata": {
 674 |     "slideshow": {
 675 |      "slide_type": "subslide"
 676 |     }
 677 |    },
 678 |    "source": [
 679 |     "## Zeros and Ones (or Empty)\n",
 680 |     "\n",
 681 |     "### `np.zeros`, `np.ones`, `np.empty`\n",
 682 |     "\n",
 683 |     "Sometimes it may be required to initialise arrays of `zeros`, or of all `ones` or finally just `rubbish` (i.e. `empty`) of a specific shape:"
 684 |    ]
 685 |   },
 686 |   {
 687 |    "cell_type": "code",
 688 |    "execution_count": 17,
 689 |    "metadata": {
 690 |     "slideshow": {
 691 |      "slide_type": "fragment"
 692 |     }
 693 |    },
 694 |    "outputs": [
 695 |     {
 696 |      "name": "stdout",
 697 |      "output_type": "stream",
 698 |      "text": [
 699 |       "[[0. 0. 0.]\n",
 700 |       " [0. 0. 0.]\n",
 701 |       " [0. 0. 0.]]\n"
 702 |      ]
 703 |     }
 704 |    ],
 705 |    "source": [
 706 |     "Z = np.zeros((3,3))\n",
 707 |     "\n",
 708 |     "print(Z)"
 709 |    ]
 710 |   },
 711 |   {
 712 |    "cell_type": "code",
 713 |    "execution_count": 18,
 714 |    "metadata": {
 715 |     "slideshow": {
 716 |      "slide_type": "subslide"
 717 |     }
 718 |    },
 719 |    "outputs": [
 720 |     {
 721 |      "name": "stdout",
 722 |      "output_type": "stream",
 723 |      "text": [
 724 |       "[[1. 1. 1.]\n",
 725 |       " [1. 1. 1.]\n",
 726 |       " [1. 1. 1.]]\n"
 727 |      ]
 728 |     }
 729 |    ],
 730 |    "source": [
 731 |     "O = np.ones((3, 3))\n",
 732 |     "print(O)"
 733 |    ]
 734 |   },
 735 |   {
 736 |    "cell_type": "code",
 737 |    "execution_count": 19,
 738 |    "metadata": {},
 739 |    "outputs": [
 740 |     {
 741 |      "name": "stdout",
 742 |      "output_type": "stream",
 743 |      "text": [
 744 |       "[0.45052791 0.80566857 0.10401981 0.91948746 0.0329787  0.71872119\n",
 745 |       " 1.42738938 0.63292836 0.5397375  0.89186053]\n"
 746 |      ]
 747 |     }
 748 |    ],
 749 |    "source": [
 750 |     "E = np.empty(10)\n",
 751 |     "\n",
 752 |     "print(E)"
 753 |    ]
 754 |   },
 755 |   {
 756 |    "cell_type": "code",
 757 |    "execution_count": null,
 758 |    "metadata": {},
 759 |    "outputs": [],
 760 |    "source": [
 761 |     "# TRY THIS!\n",
 762 |     "\n",
 763 |     "np.empty(9)"
 764 |    ]
 765 |   },
 766 |   {
 767 |    "cell_type": "markdown",
 768 |    "metadata": {
 769 |     "slideshow": {
 770 |      "slide_type": "subslide"
 771 |     }
 772 |    },
 773 |    "source": [
 774 |     "# Other specialised Functions\n",
 775 |     "\n",
 776 |     "## Diagonal Matrices\n",
 777 |     "\n",
 778 |     "### 1. `np.diag`"
 779 |    ]
 780 |   },
 781 |   {
 782 |    "cell_type": "code",
 783 |    "execution_count": 20,
 784 |    "metadata": {
 785 |     "slideshow": {
 786 |      "slide_type": "fragment"
 787 |     }
 788 |    },
 789 |    "outputs": [
 790 |     {
 791 |      "data": {
 792 |       "text/plain": [
 793 |        "array([[1, 0, 0],\n",
 794 |        "       [0, 2, 0],\n",
 795 |        "       [0, 0, 3]])"
 796 |       ]
 797 |      },
 798 |      "execution_count": 20,
 799 |      "metadata": {},
 800 |      "output_type": "execute_result"
 801 |     }
 802 |    ],
 803 |    "source": [
 804 |     "# a diagonal matrix\n",
 805 |     "np.diag([1,2,3])"
 806 |    ]
 807 |   },
 808 |   {
 809 |    "cell_type": "code",
 810 |    "execution_count": 21,
 811 |    "metadata": {
 812 |     "slideshow": {
 813 |      "slide_type": "fragment"
 814 |     }
 815 |    },
 816 |    "outputs": [
 817 |     {
 818 |      "data": {
 819 |       "text/plain": [
 820 |        "array([[0, 0, 3, 0],\n",
 821 |        "       [0, 2, 0, 0],\n",
 822 |        "       [1, 0, 0, 0],\n",
 823 |        "       [0, 0, 0, 0]])"
 824 |       ]
 825 |      },
 826 |      "execution_count": 21,
 827 |      "metadata": {},
 828 |      "output_type": "execute_result"
 829 |     }
 830 |    ],
 831 |    "source": [
 832 |     "# diagonal with offset from the main diagonal\n",
 833 |     "np.diag([1,2,3], k=1)"
 834 |    ]
 835 |   },
 836 |   {
 837 |    "cell_type": "markdown",
 838 |    "metadata": {
 839 |     "slideshow": {
 840 |      "slide_type": "subslide"
 841 |     }
 842 |    },
 843 |    "source": [
 844 |     "### Identity Matrix $\\mathrm{I} \\mapsto$  `np.eye`"
 845 |    ]
 846 |   },
 847 |   {
 848 |    "cell_type": "code",
 849 |    "execution_count": 22,
 850 |    "metadata": {
 851 |     "slideshow": {
 852 |      "slide_type": "fragment"
 853 |     }
 854 |    },
 855 |    "outputs": [
 856 |     {
 857 |      "data": {
 858 |       "text/plain": [
 859 |        "array([[1, 0, 0],\n",
 860 |        "       [0, 1, 0],\n",
 861 |        "       [0, 0, 1]])"
 862 |       ]
 863 |      },
 864 |      "execution_count": 22,
 865 |      "metadata": {},
 866 |      "output_type": "execute_result"
 867 |     }
 868 |    ],
 869 |    "source": [
 870 |     "# a diagonal matrix with ones on the main diagonal\n",
 871 |     "np.eye(3, dtype='int')  # 3 is the "
 872 |    ]
 873 |   },
 874 |   {
 875 |    "cell_type": "markdown",
 876 |    "metadata": {},
 877 |    "source": [
 878 |     "---"
 879 |    ]
 880 |   },
 881 |   {
 882 |    "cell_type": "markdown",
 883 |    "metadata": {
 884 |     "slideshow": {
 885 |      "slide_type": "subslide"
 886 |     }
 887 |    },
 888 |    "source": [
 889 |     "# Create `numpy.ndarray` from `list`"
 890 |    ]
 891 |   },
 892 |   {
 893 |    "cell_type": "markdown",
 894 |    "metadata": {
 895 |     "slideshow": {
 896 |      "slide_type": "fragment"
 897 |     }
 898 |    },
 899 |    "source": [
 900 |     "To create new vector or matrix arrays from Python lists we can use the \n",
 901 |     "`numpy.array` constructor function:"
 902 |    ]
 903 |   },
 904 |   {
 905 |    "cell_type": "code",
 906 |    "execution_count": 23,
 907 |    "metadata": {
 908 |     "slideshow": {
 909 |      "slide_type": "fragment"
 910 |     }
 911 |    },
 912 |    "outputs": [
 913 |     {
 914 |      "data": {
 915 |       "text/plain": [
 916 |        "array([1, 2, 3, 4])"
 917 |       ]
 918 |      },
 919 |      "execution_count": 23,
 920 |      "metadata": {},
 921 |      "output_type": "execute_result"
 922 |     }
 923 |    ],
 924 |    "source": [
 925 |     "v = np.array([1,2,3,4])\n",
 926 |     "v"
 927 |    ]
 928 |   },
 929 |   {
 930 |    "cell_type": "code",
 931 |    "execution_count": 24,
 932 |    "metadata": {},
 933 |    "outputs": [
 934 |     {
 935 |      "name": "stdout",
 936 |      "output_type": "stream",
 937 |      "text": [
 938 |       "<class 'numpy.ndarray'>\n"
 939 |      ]
 940 |     }
 941 |    ],
 942 |    "source": [
 943 |     "print(type(v))"
 944 |    ]
 945 |   },
 946 |   {
 947 |    "cell_type": "markdown",
 948 |    "metadata": {},
 949 |    "source": [
 950 |     "**Alternatively** there is also the `np.asarray` function which easily convert a Python list into a numpy array:\n",
 951 |     "\n"
 952 |    ]
 953 |   },
 954 |   {
 955 |    "cell_type": "code",
 956 |    "execution_count": 25,
 957 |    "metadata": {},
 958 |    "outputs": [
 959 |     {
 960 |      "data": {
 961 |       "text/plain": [
 962 |        "array([1, 2, 3, 4])"
 963 |       ]
 964 |      },
 965 |      "execution_count": 25,
 966 |      "metadata": {},
 967 |      "output_type": "execute_result"
 968 |     }
 969 |    ],
 970 |    "source": [
 971 |     "v = np.asarray([1, 2, 3, 4])\n",
 972 |     "v"
 973 |    ]
 974 |   },
 975 |   {
 976 |    "cell_type": "code",
 977 |    "execution_count": 26,
 978 |    "metadata": {},
 979 |    "outputs": [
 980 |     {
 981 |      "name": "stdout",
 982 |      "output_type": "stream",
 983 |      "text": [
 984 |       "<class 'numpy.ndarray'>\n"
 985 |      ]
 986 |     }
 987 |    ],
 988 |    "source": [
 989 |     "print(type(v))"
 990 |    ]
 991 |   },
 992 |   {
 993 |    "cell_type": "markdown",
 994 |    "metadata": {},
 995 |    "source": [
 996 |     "We can use the very same strategy for higher-dimensional arrays.\n",
 997 |     "\n",
 998 |     "E.g. Let's create a matrix from a list of lists:"
 999 |    ]
1000 |   },
1001 |   {
1002 |    "cell_type": "code",
1003 |    "execution_count": 27,
1004 |    "metadata": {
1005 |     "slideshow": {
1006 |      "slide_type": "fragment"
1007 |     }
1008 |    },
1009 |    "outputs": [
1010 |     {
1011 |      "data": {
1012 |       "text/plain": [
1013 |        "array([[1, 2],\n",
1014 |        "       [3, 4]])"
1015 |       ]
1016 |      },
1017 |      "execution_count": 27,
1018 |      "metadata": {},
1019 |      "output_type": "execute_result"
1020 |     }
1021 |    ],
1022 |    "source": [
1023 |     "M = np.array([[1, 2], [3, 4]])\n",
1024 |     "M"
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "code",
1029 |    "execution_count": 28,
1030 |    "metadata": {
1031 |     "slideshow": {
1032 |      "slide_type": "fragment"
1033 |     }
1034 |    },
1035 |    "outputs": [
1036 |     {
1037 |      "data": {
1038 |       "text/plain": [
1039 |        "((4,), (2, 2))"
1040 |       ]
1041 |      },
1042 |      "execution_count": 28,
1043 |      "metadata": {},
1044 |      "output_type": "execute_result"
1045 |     }
1046 |    ],
1047 |    "source": [
1048 |     "v.shape, M.shape"
1049 |    ]
1050 |   },
1051 |   {
1052 |    "cell_type": "markdown",
1053 |    "metadata": {
1054 |     "slideshow": {
1055 |      "slide_type": "slide"
1056 |     }
1057 |    },
1058 |    "source": [
1059 |     "## So, why is it useful then?"
1060 |    ]
1061 |   },
1062 |   {
1063 |    "cell_type": "markdown",
1064 |    "metadata": {
1065 |     "slideshow": {
1066 |      "slide_type": "subslide"
1067 |     }
1068 |    },
1069 |    "source": [
1070 |     "So far the `numpy.ndarray` looks awefully much like a Python **list** (or **nested list**). \n",
1071 |     "\n",
1072 |     "*Why not simply use Python lists for computations instead of creating a new array type?*"
1073 |    ]
1074 |   },
1075 |   {
1076 |    "cell_type": "markdown",
1077 |    "metadata": {
1078 |     "slideshow": {
1079 |      "slide_type": "subslide"
1080 |     }
1081 |    },
1082 |    "source": [
1083 |     "There are several reasons:\n",
1084 |     "\n",
1085 |     "* Python lists are very general. \n",
1086 |     "    - They can contain any kind of object. \n",
1087 |     "    - They are dynamically typed. \n",
1088 |     "    - They do not support mathematical functions such as matrix and dot multiplications, etc. \n",
1089 |     "    - Implementing such functions for Python lists would not be very efficient because of the dynamic typing.\n",
1090 |     "    \n",
1091 |     "    \n",
1092 |     "* Numpy arrays are **statically typed** and **homogeneous**. \n",
1093 |     "    - The type of the elements is determined when array is created.\n",
1094 |     "    \n",
1095 |     "    \n",
1096 |     "* Numpy arrays are memory efficient.\n",
1097 |     "    - Because of the static typing, fast implementation of mathematical functions such as multiplication and addition of `numpy` arrays can be implemented in a compiled language (C and Fortran is used)."
1098 |    ]
1099 |   },
1100 |   {
1101 |    "cell_type": "code",
1102 |    "execution_count": 29,
1103 |    "metadata": {
1104 |     "slideshow": {
1105 |      "slide_type": "subslide"
1106 |     }
1107 |    },
1108 |    "outputs": [],
1109 |    "source": [
1110 |     "L = range(100000)"
1111 |    ]
1112 |   },
1113 |   {
1114 |    "cell_type": "code",
1115 |    "execution_count": 30,
1116 |    "metadata": {
1117 |     "slideshow": {
1118 |      "slide_type": "fragment"
1119 |     }
1120 |    },
1121 |    "outputs": [
1122 |     {
1123 |      "name": "stdout",
1124 |      "output_type": "stream",
1125 |      "text": [
1126 |       "41.7 ms ± 14.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
1127 |      ]
1128 |     }
1129 |    ],
1130 |    "source": [
1131 |     "%timeit [i**2 for i in L]"
1132 |    ]
1133 |   },
1134 |   {
1135 |    "cell_type": "code",
1136 |    "execution_count": 31,
1137 |    "metadata": {
1138 |     "slideshow": {
1139 |      "slide_type": "fragment"
1140 |     }
1141 |    },
1142 |    "outputs": [],
1143 |    "source": [
1144 |     "a = np.arange(100000)"
1145 |    ]
1146 |   },
1147 |   {
1148 |    "cell_type": "code",
1149 |    "execution_count": 32,
1150 |    "metadata": {
1151 |     "slideshow": {
1152 |      "slide_type": "fragment"
1153 |     }
1154 |    },
1155 |    "outputs": [
1156 |     {
1157 |      "name": "stdout",
1158 |      "output_type": "stream",
1159 |      "text": [
1160 |       "92.9 µs ± 10.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
1161 |      ]
1162 |     }
1163 |    ],
1164 |    "source": [
1165 |     "%timeit a**2  # This operation is called Broadcasting - more on this later!"
1166 |    ]
1167 |   },
1168 |   {
1169 |    "cell_type": "code",
1170 |    "execution_count": 33,
1171 |    "metadata": {},
1172 |    "outputs": [
1173 |     {
1174 |      "name": "stdout",
1175 |      "output_type": "stream",
1176 |      "text": [
1177 |       "48.4 ms ± 18.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
1178 |      ]
1179 |     }
1180 |    ],
1181 |    "source": [
1182 |     "%timeit [element**2 for element in a]"
1183 |    ]
1184 |   },
1185 |   {
1186 |    "cell_type": "markdown",
1187 |    "metadata": {},
1188 |    "source": [
1189 |     "---"
1190 |    ]
1191 |   },
1192 |   {
1193 |    "cell_type": "markdown",
1194 |    "metadata": {
1195 |     "slideshow": {
1196 |      "slide_type": "slide"
1197 |     }
1198 |    },
1199 |    "source": [
1200 |     "## Exercises: DIY"
1201 |    ]
1202 |   },
1203 |   {
1204 |    "cell_type": "markdown",
1205 |    "metadata": {
1206 |     "slideshow": {
1207 |      "slide_type": "-"
1208 |     }
1209 |    },
1210 |    "source": [
1211 |     "### Simple arrays"
1212 |    ]
1213 |   },
1214 |   {
1215 |    "cell_type": "markdown",
1216 |    "metadata": {
1217 |     "slideshow": {
1218 |      "slide_type": "-"
1219 |     }
1220 |    },
1221 |    "source": [
1222 |     "* Create simple one and two dimensional arrays. First, redo the examples\n",
1223 |     "from above. And then create your own.\n",
1224 |     "\n",
1225 |     "* Use the functions `len`, `shape` and `ndim` on some of those arrays and\n",
1226 |     "observe their output."
1227 |    ]
1228 |   },
1229 |   {
1230 |    "cell_type": "code",
1231 |    "execution_count": null,
1232 |    "metadata": {
1233 |     "slideshow": {
1234 |      "slide_type": "skip"
1235 |     }
1236 |    },
1237 |    "outputs": [],
1238 |    "source": []
1239 |   },
1240 |   {
1241 |    "cell_type": "markdown",
1242 |    "metadata": {
1243 |     "slideshow": {
1244 |      "slide_type": "subslide"
1245 |     }
1246 |    },
1247 |    "source": [
1248 |     "### Creating arrays using functions"
1249 |    ]
1250 |   },
1251 |   {
1252 |    "cell_type": "markdown",
1253 |    "metadata": {
1254 |     "slideshow": {
1255 |      "slide_type": "fragment"
1256 |     }
1257 |    },
1258 |    "source": [
1259 |     "* Experiment with `arange`, `linspace`, `ones`, `zeros`, `eye` and `diag`.\n",
1260 |     "\n",
1261 |     "* Create different kinds of arrays with random numbers.\n",
1262 |     "\n",
1263 |     "* Try setting the seed before creating an array with random values \n",
1264 |     "    - *hint*: use `np.random.seed`\n"
1265 |    ]
1266 |   },
1267 |   {
1268 |    "cell_type": "code",
1269 |    "execution_count": null,
1270 |    "metadata": {
1271 |     "collapsed": true,
1272 |     "jupyter": {
1273 |      "outputs_hidden": true
1274 |     },
1275 |     "slideshow": {
1276 |      "slide_type": "skip"
1277 |     }
1278 |    },
1279 |    "outputs": [],
1280 |    "source": []
1281 |   },
1282 |   {
1283 |    "cell_type": "markdown",
1284 |    "metadata": {},
1285 |    "source": [
1286 |     "---"
1287 |    ]
1288 |   },
1289 |   {
1290 |    "cell_type": "markdown",
1291 |    "metadata": {
1292 |     "slideshow": {
1293 |      "slide_type": "slide"
1294 |     }
1295 |    },
1296 |    "source": [
1297 |     "## Numpy Array Object"
1298 |    ]
1299 |   },
1300 |   {
1301 |    "cell_type": "markdown",
1302 |    "metadata": {
1303 |     "slideshow": {
1304 |      "slide_type": "subslide"
1305 |     }
1306 |    },
1307 |    "source": [
1308 |     "`NumPy` has a multidimensional array object called ndarray. It consists of two parts as follows:\n",
1309 |     "   \n",
1310 |     "   * The actual data\n",
1311 |     "   * Some metadata describing the data\n",
1312 |     "    \n",
1313 |     "    \n",
1314 |     "The majority of array operations leave the raw data untouched. The only aspect that changes is the metadata."
1315 |    ]
1316 |   },
1317 |   {
1318 |    "cell_type": "markdown",
1319 |    "metadata": {
1320 |     "slideshow": {
1321 |      "slide_type": "subslide"
1322 |     }
1323 |    },
1324 |    "source": [
1325 |     "<img src=\"images/ndarray_with_details.png\" />"
1326 |    ]
1327 |   },
1328 |   {
1329 |    "cell_type": "markdown",
1330 |    "metadata": {},
1331 |    "source": [
1332 |     "## Data vs Metadata (Attributes)"
1333 |    ]
1334 |   },
1335 |   {
1336 |    "cell_type": "markdown",
1337 |    "metadata": {
1338 |     "slideshow": {
1339 |      "slide_type": "subslide"
1340 |     }
1341 |    },
1342 |    "source": [
1343 |     "This internal separation between actual data (i.e. the content of the array --> the `memory`) and metadata (i.e. properties and attributes of the data), allows for example for an efficient memory management.\n",
1344 |     "\n",
1345 |     "For example, the shape of an Numpy array **can be modified without copying and/or affecting** the actual data, which makes it a fast operation even for large arrays."
1346 |    ]
1347 |   },
1348 |   {
1349 |    "cell_type": "code",
1350 |    "execution_count": 34,
1351 |    "metadata": {
1352 |     "collapsed": false,
1353 |     "jupyter": {
1354 |      "outputs_hidden": false
1355 |     },
1356 |     "slideshow": {
1357 |      "slide_type": "fragment"
1358 |     }
1359 |    },
1360 |    "outputs": [
1361 |     {
1362 |      "data": {
1363 |       "text/plain": [
1364 |        "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
1365 |        "       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
1366 |        "       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44])"
1367 |       ]
1368 |      },
1369 |      "execution_count": 34,
1370 |      "metadata": {},
1371 |      "output_type": "execute_result"
1372 |     }
1373 |    ],
1374 |    "source": [
1375 |     "a = np.arange(45)\n",
1376 |     "\n",
1377 |     "a"
1378 |    ]
1379 |   },
1380 |   {
1381 |    "cell_type": "code",
1382 |    "execution_count": 35,
1383 |    "metadata": {},
1384 |    "outputs": [
1385 |     {
1386 |      "data": {
1387 |       "text/plain": [
1388 |        "(45,)"
1389 |       ]
1390 |      },
1391 |      "execution_count": 35,
1392 |      "metadata": {},
1393 |      "output_type": "execute_result"
1394 |     }
1395 |    ],
1396 |    "source": [
1397 |     "a.shape"
1398 |    ]
1399 |   },
1400 |   {
1401 |    "cell_type": "code",
1402 |    "execution_count": 36,
1403 |    "metadata": {},
1404 |    "outputs": [
1405 |     {
1406 |      "data": {
1407 |       "text/plain": [
1408 |        "array([[ 0,  1,  2,  3,  4],\n",
1409 |        "       [ 5,  6,  7,  8,  9],\n",
1410 |        "       [10, 11, 12, 13, 14],\n",
1411 |        "       [15, 16, 17, 18, 19],\n",
1412 |        "       [20, 21, 22, 23, 24],\n",
1413 |        "       [25, 26, 27, 28, 29],\n",
1414 |        "       [30, 31, 32, 33, 34],\n",
1415 |        "       [35, 36, 37, 38, 39],\n",
1416 |        "       [40, 41, 42, 43, 44]])"
1417 |       ]
1418 |      },
1419 |      "execution_count": 36,
1420 |      "metadata": {},
1421 |      "output_type": "execute_result"
1422 |     }
1423 |    ],
1424 |    "source": [
1425 |     "A = a.reshape(9, 5)\n",
1426 |     "\n",
1427 |     "A"
1428 |    ]
1429 |   },
1430 |   {
1431 |    "cell_type": "code",
1432 |    "execution_count": 37,
1433 |    "metadata": {
1434 |     "collapsed": false,
1435 |     "jupyter": {
1436 |      "outputs_hidden": false
1437 |     },
1438 |     "slideshow": {
1439 |      "slide_type": "subslide"
1440 |     }
1441 |    },
1442 |    "outputs": [],
1443 |    "source": [
1444 |     "n, m = A.shape"
1445 |    ]
1446 |   },
1447 |   {
1448 |    "cell_type": "code",
1449 |    "execution_count": 38,
1450 |    "metadata": {
1451 |     "collapsed": false,
1452 |     "jupyter": {
1453 |      "outputs_hidden": false
1454 |     },
1455 |     "slideshow": {
1456 |      "slide_type": "fragment"
1457 |     }
1458 |    },
1459 |    "outputs": [
1460 |     {
1461 |      "data": {
1462 |       "text/plain": [
1463 |        "array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,\n",
1464 |        "        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,\n",
1465 |        "        32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]])"
1466 |       ]
1467 |      },
1468 |      "execution_count": 38,
1469 |      "metadata": {},
1470 |      "output_type": "execute_result"
1471 |     }
1472 |    ],
1473 |    "source": [
1474 |     "B = A.reshape((1,n*m))\n",
1475 |     "B"
1476 |    ]
1477 |   },
1478 |   {
1479 |    "cell_type": "markdown",
1480 |    "metadata": {},
1481 |    "source": [
1482 |     "**Q**: What is the difference (in terms of shape) between `B` and the original `a`?"
1483 |    ]
1484 |   },
1485 |   {
1486 |    "cell_type": "markdown",
1487 |    "metadata": {
1488 |     "slideshow": {
1489 |      "slide_type": "slide"
1490 |     }
1491 |    },
1492 |    "source": [
1493 |     "### Flattening\n",
1494 |     "\n",
1495 |     "Another (quite common) reshaping operation you will end up performing on n-dimensional arrays is **flattening**.\n",
1496 |     "\n",
1497 |     "Flattening means _collapsing all the axis into a unique one_"
1498 |    ]
1499 |   },
1500 |   {
1501 |    "cell_type": "markdown",
1502 |    "metadata": {
1503 |     "slideshow": {
1504 |      "slide_type": "subslide"
1505 |     }
1506 |    },
1507 |    "source": [
1508 |     "### `np.ravel`\n",
1509 |     "\n",
1510 |     "`numpy.ndarray` objects have a `ravel` method that generates a new version of the array as a `1D` vector. \n",
1511 |     "\n",
1512 |     "Also this time, the original memory is unaffected, and a pointer with different metadata is returned."
1513 |    ]
1514 |   },
1515 |   {
1516 |    "cell_type": "code",
1517 |    "execution_count": 39,
1518 |    "metadata": {
1519 |     "collapsed": false,
1520 |     "jupyter": {
1521 |      "outputs_hidden": false
1522 |     },
1523 |     "slideshow": {
1524 |      "slide_type": "fragment"
1525 |     }
1526 |    },
1527 |    "outputs": [
1528 |     {
1529 |      "data": {
1530 |       "text/plain": [
1531 |        "array([1, 2, 3, 4, 5, 6])"
1532 |       ]
1533 |      },
1534 |      "execution_count": 39,
1535 |      "metadata": {},
1536 |      "output_type": "execute_result"
1537 |     }
1538 |    ],
1539 |    "source": [
1540 |     "A = np.array([[1, 2, 3], [4, 5, 6]])\n",
1541 |     "A.ravel()"
1542 |    ]
1543 |   },
1544 |   {
1545 |    "cell_type": "markdown",
1546 |    "metadata": {},
1547 |    "source": [
1548 |     "By default, the `np.ravel` performs the operation _row-wise_ á-la-C. Numpy also support a Fortran-style order of indices (i.e. _column-major_ indexing)"
1549 |    ]
1550 |   },
1551 |   {
1552 |    "cell_type": "code",
1553 |    "execution_count": 40,
1554 |    "metadata": {},
1555 |    "outputs": [
1556 |     {
1557 |      "data": {
1558 |       "text/plain": [
1559 |        "array([1, 4, 2, 5, 3, 6])"
1560 |       ]
1561 |      },
1562 |      "execution_count": 40,
1563 |      "metadata": {},
1564 |      "output_type": "execute_result"
1565 |     }
1566 |    ],
1567 |    "source": [
1568 |     "A.ravel('F')  # order F (Fortran) is column-major, C (default) row-major"
1569 |    ]
1570 |   },
1571 |   {
1572 |    "cell_type": "markdown",
1573 |    "metadata": {
1574 |     "slideshow": {
1575 |      "slide_type": "subslide"
1576 |     }
1577 |    },
1578 |    "source": [
1579 |     "**Alternatively** We can also use the function `np.flatten` to make a higher-dimensional array into a vector. But this function create a copy of the data."
1580 |    ]
1581 |   },
1582 |   {
1583 |    "cell_type": "markdown",
1584 |    "metadata": {},
1585 |    "source": [
1586 |     "### Transpose\n",
1587 |     "\n",
1588 |     "Similarly, we can transpose a matrix"
1589 |    ]
1590 |   },
1591 |   {
1592 |    "cell_type": "code",
1593 |    "execution_count": 41,
1594 |    "metadata": {
1595 |     "collapsed": false,
1596 |     "jupyter": {
1597 |      "outputs_hidden": false
1598 |     },
1599 |     "slideshow": {
1600 |      "slide_type": "subslide"
1601 |     }
1602 |    },
1603 |    "outputs": [
1604 |     {
1605 |      "data": {
1606 |       "text/plain": [
1607 |        "array([[1, 4],\n",
1608 |        "       [2, 5],\n",
1609 |        "       [3, 6]])"
1610 |       ]
1611 |      },
1612 |      "execution_count": 41,
1613 |      "metadata": {},
1614 |      "output_type": "execute_result"
1615 |     }
1616 |    ],
1617 |    "source": [
1618 |     "A.T"
1619 |    ]
1620 |   },
1621 |   {
1622 |    "cell_type": "code",
1623 |    "execution_count": 42,
1624 |    "metadata": {
1625 |     "collapsed": false,
1626 |     "jupyter": {
1627 |      "outputs_hidden": false
1628 |     },
1629 |     "slideshow": {
1630 |      "slide_type": "subslide"
1631 |     }
1632 |    },
1633 |    "outputs": [
1634 |     {
1635 |      "data": {
1636 |       "text/plain": [
1637 |        "array([1, 4, 2, 5, 3, 6])"
1638 |       ]
1639 |      },
1640 |      "execution_count": 42,
1641 |      "metadata": {},
1642 |      "output_type": "execute_result"
1643 |     }
1644 |    ],
1645 |    "source": [
1646 |     "A.T.ravel()"
1647 |    ]
1648 |   },
1649 |   {
1650 |    "cell_type": "markdown",
1651 |    "metadata": {},
1652 |    "source": [
1653 |     "## Introducing `np.newaxis`\n",
1654 |     "\n",
1655 |     "In addition to shape, we can also manipulate the axis of an array."
1656 |    ]
1657 |   },
1658 |   {
1659 |    "cell_type": "markdown",
1660 |    "metadata": {},
1661 |    "source": [
1662 |     "**(1)** We can always add as many axis as we want:"
1663 |    ]
1664 |   },
1665 |   {
1666 |    "cell_type": "code",
1667 |    "execution_count": 43,
1668 |    "metadata": {},
1669 |    "outputs": [
1670 |     {
1671 |      "name": "stdout",
1672 |      "output_type": "stream",
1673 |      "text": [
1674 |       "(1, 10, 2)\n"
1675 |      ]
1676 |     }
1677 |    ],
1678 |    "source": [
1679 |     "A = np.arange(20).reshape(10, 2)\n",
1680 |     "A = A[np.newaxis, ...]  # this is called ellipsis\n",
1681 |     "\n",
1682 |     "print(A.shape)"
1683 |    ]
1684 |   },
1685 |   {
1686 |    "cell_type": "markdown",
1687 |    "metadata": {},
1688 |    "source": [
1689 |     "**(2)** We can also _permute_ axis:"
1690 |    ]
1691 |   },
1692 |   {
1693 |    "cell_type": "code",
1694 |    "execution_count": 44,
1695 |    "metadata": {},
1696 |    "outputs": [
1697 |     {
1698 |      "name": "stdout",
1699 |      "output_type": "stream",
1700 |      "text": [
1701 |       "(2, 10, 1)\n"
1702 |      ]
1703 |     }
1704 |    ],
1705 |    "source": [
1706 |     "A = A.swapaxes(0, 2)  # swap axis 0 with axis 2 --> new shape: (2, 10, 1)\n",
1707 |     "\n",
1708 |     "print(A.shape)"
1709 |    ]
1710 |   },
1711 |   {
1712 |    "cell_type": "markdown",
1713 |    "metadata": {},
1714 |    "source": [
1715 |     "Again, changin and manipulating the `axis` will not touch the memory, it will just change parameters (i.e. `strides` and `offset`) to navigate data."
1716 |    ]
1717 |   },
1718 |   {
1719 |    "cell_type": "markdown",
1720 |    "metadata": {},
1721 |    "source": [
1722 |     "---"
1723 |    ]
1724 |   },
1725 |   {
1726 |    "cell_type": "markdown",
1727 |    "metadata": {
1728 |     "slideshow": {
1729 |      "slide_type": "slide"
1730 |     }
1731 |    },
1732 |    "source": [
1733 |     "## Numerical Types and Precision\n",
1734 |     "\n",
1735 |     "In NumPy, talking about `int` or `float` does not make \"real sense\". This is mainly for two reasons:\n",
1736 |     "\n",
1737 |     "(a) `int` or `float` are assumed at the maximum precision available on your machine (presumably `int64` and \n",
1738 |     "`float64`, respectively.\n",
1739 |     "\n",
1740 |     "(b) Different precision imply different numerical ranges, and so different memory size (i.e. _number of bytes_ required to represent all the numbers in the corresponding numerical range).\n",
1741 |     "\n",
1742 |     "Numpy support the following numerical types:"
1743 |    ]
1744 |   },
1745 |   {
1746 |    "cell_type": "markdown",
1747 |    "metadata": {
1748 |     "slideshow": {
1749 |      "slide_type": "subslide"
1750 |     }
1751 |    },
1752 |    "source": [
1753 |     "    bool             | This stores boolean (True or False) as a bit\n",
1754 |     "\n",
1755 |     "    int0             | This is a platform integer (normally either int32 or int64)\n",
1756 |     "    int8             | This is an integer ranging from -128 to 127\n",
1757 |     "    int16            | This is an integer ranging from -32768 to 32767\n",
1758 |     "    int32            | This is an integer ranging from -2 ** 31 to 2 ** 31 -1\n",
1759 |     "    int64            | This is an integer ranging from -2 ** 63 to 2 ** 63 -1\n",
1760 |     "    \n",
1761 |     "    uint8            | This is an unsigned integer ranging from 0 to 255\n",
1762 |     "    uint16           | This is an unsigned integer ranging from 0 to 65535\n",
1763 |     "    uint32           | This is an unsigned integer ranging from 0 to 2 ** 32 - 1\n",
1764 |     "    uint64           | This is an unsigned integer ranging from 0 to 2 ** 64 - 1\n",
1765 |     "\n",
1766 |     "    float16          | This is a half precision float with sign bit, 5 bits exponent, and 10 bits mantissa\n",
1767 |     "    float32          | This is a single precision float with sign bit, 8 bits exponent, and 23 bits mantissa\n",
1768 |     "    float64 or float | This is a double precision float with sign bit, 11 bits exponent, and 52 bits mantissa\n",
1769 |     "    complex64        | This is a complex number represented by two 32-bit floats (real and imaginary components)\n",
1770 |     "    complex128       | This is a complex number represented by two 64-bit floats (real and imaginary components)\n",
1771 |     "    (or complex)\n"
1772 |    ]
1773 |   },
1774 |   {
1775 |    "cell_type": "markdown",
1776 |    "metadata": {
1777 |     "slideshow": {
1778 |      "slide_type": "slide"
1779 |     }
1780 |    },
1781 |    "source": [
1782 |     "### Numerical Types and Representation"
1783 |    ]
1784 |   },
1785 |   {
1786 |    "cell_type": "markdown",
1787 |    "metadata": {
1788 |     "slideshow": {
1789 |      "slide_type": "subslide"
1790 |     }
1791 |    },
1792 |    "source": [
1793 |     "The **numerical dtype** of an array should be selected very carefully, as it directly affects the numerical representation of elements, that is: \n",
1794 |     "\n",
1795 |     "   * the number of **bytes** used; \n",
1796 |     "   * the *numerical range*"
1797 |    ]
1798 |   },
1799 |   {
1800 |    "cell_type": "markdown",
1801 |    "metadata": {},
1802 |    "source": [
1803 |     "We can **always specify** the `dtype` of an array when we create one. If we do not, the `dtype` of the array will be inferred, namely `np.int_` or `np.float_` depending on the case."
1804 |    ]
1805 |   },
1806 |   {
1807 |    "cell_type": "code",
1808 |    "execution_count": 45,
1809 |    "metadata": {},
1810 |    "outputs": [
1811 |     {
1812 |      "name": "stdout",
1813 |      "output_type": "stream",
1814 |      "text": [
1815 |       "[0 1 2 3 4 5 6 7 8 9]\n",
1816 |       "int64\n"
1817 |      ]
1818 |     }
1819 |    ],
1820 |    "source": [
1821 |     "a = np.arange(10)\n",
1822 |     "print(a)\n",
1823 |     "\n",
1824 |     "print(a.dtype)"
1825 |    ]
1826 |   },
1827 |   {
1828 |    "cell_type": "code",
1829 |    "execution_count": 46,
1830 |    "metadata": {},
1831 |    "outputs": [
1832 |     {
1833 |      "name": "stdout",
1834 |      "output_type": "stream",
1835 |      "text": [
1836 |       "[0 1 2 3 4 5 6 7 8 9]\n",
1837 |       "uint8\n"
1838 |      ]
1839 |     }
1840 |    ],
1841 |    "source": [
1842 |     "au = np.arange(10, dtype=np.uint8)\n",
1843 |     "print(au)\n",
1844 |     "\n",
1845 |     "print(au.dtype)"
1846 |    ]
1847 |   },
1848 |   {
1849 |    "cell_type": "markdown",
1850 |    "metadata": {
1851 |     "slideshow": {
1852 |      "slide_type": "subslide"
1853 |     }
1854 |    },
1855 |    "source": [
1856 |     "So, then: **What happens if I try to represent a number that is Out of range?**\n",
1857 |     "\n",
1858 |     "Let's have a go with **integers**, i.e., `int8` and `uint8`"
1859 |    ]
1860 |   },
1861 |   {
1862 |    "cell_type": "code",
1863 |    "execution_count": 47,
1864 |    "metadata": {
1865 |     "slideshow": {
1866 |      "slide_type": "fragment"
1867 |     }
1868 |    },
1869 |    "outputs": [
1870 |     {
1871 |      "data": {
1872 |       "text/plain": [
1873 |        "array([0, 0, 0, 0], dtype=int8)"
1874 |       ]
1875 |      },
1876 |      "execution_count": 47,
1877 |      "metadata": {},
1878 |      "output_type": "execute_result"
1879 |     }
1880 |    ],
1881 |    "source": [
1882 |     "x = np.zeros(4, 'int8')  # Integer ranging from -128 to 127\n",
1883 |     "x"
1884 |    ]
1885 |   },
1886 |   {
1887 |    "cell_type": "markdown",
1888 |    "metadata": {},
1889 |    "source": [
1890 |     ">__Spoiler Alert__: _very simple example of indexing in NumPy_\n",
1891 |     ">\n",
1892 |     "> _Well...it works as expected, doesn't it?_"
1893 |    ]
1894 |   },
1895 |   {
1896 |    "cell_type": "code",
1897 |    "execution_count": 48,
1898 |    "metadata": {
1899 |     "slideshow": {
1900 |      "slide_type": "subslide"
1901 |     }
1902 |    },
1903 |    "outputs": [
1904 |     {
1905 |      "data": {
1906 |       "text/plain": [
1907 |        "array([127,   0,   0,   0], dtype=int8)"
1908 |       ]
1909 |      },
1910 |      "execution_count": 48,
1911 |      "metadata": {},
1912 |      "output_type": "execute_result"
1913 |     }
1914 |    ],
1915 |    "source": [
1916 |     "x[0] = 127\n",
1917 |     "x"
1918 |    ]
1919 |   },
1920 |   {
1921 |    "cell_type": "code",
1922 |    "execution_count": 49,
1923 |    "metadata": {
1924 |     "slideshow": {
1925 |      "slide_type": "fragment"
1926 |     }
1927 |    },
1928 |    "outputs": [
1929 |     {
1930 |      "data": {
1931 |       "text/plain": [
1932 |        "array([-128,    0,    0,    0], dtype=int8)"
1933 |       ]
1934 |      },
1935 |      "execution_count": 49,
1936 |      "metadata": {},
1937 |      "output_type": "execute_result"
1938 |     }
1939 |    ],
1940 |    "source": [
1941 |     "x[0] = 128\n",
1942 |     "x"
1943 |    ]
1944 |   },
1945 |   {
1946 |    "cell_type": "code",
1947 |    "execution_count": 50,
1948 |    "metadata": {
1949 |     "slideshow": {
1950 |      "slide_type": "fragment"
1951 |     }
1952 |    },
1953 |    "outputs": [
1954 |     {
1955 |      "data": {
1956 |       "text/plain": [
1957 |        "array([-128, -127,    0,    0], dtype=int8)"
1958 |       ]
1959 |      },
1960 |      "execution_count": 50,
1961 |      "metadata": {},
1962 |      "output_type": "execute_result"
1963 |     }
1964 |    ],
1965 |    "source": [
1966 |     "x[1] = 129\n",
1967 |     "x"
1968 |    ]
1969 |   },
1970 |   {
1971 |    "cell_type": "code",
1972 |    "execution_count": 51,
1973 |    "metadata": {
1974 |     "slideshow": {
1975 |      "slide_type": "fragment"
1976 |     }
1977 |    },
1978 |    "outputs": [
1979 |     {
1980 |      "data": {
1981 |       "text/plain": [
1982 |        "array([-128, -127,    1,    0], dtype=int8)"
1983 |       ]
1984 |      },
1985 |      "execution_count": 51,
1986 |      "metadata": {},
1987 |      "output_type": "execute_result"
1988 |     }
1989 |    ],
1990 |    "source": [
1991 |     "x[2] = 257  # i.e. (128 x 2) + 1\n",
1992 |     "x"
1993 |    ]
1994 |   },
1995 |   {
1996 |    "cell_type": "code",
1997 |    "execution_count": 52,
1998 |    "metadata": {
1999 |     "slideshow": {
2000 |      "slide_type": "subslide"
2001 |     }
2002 |    },
2003 |    "outputs": [
2004 |     {
2005 |      "data": {
2006 |       "text/plain": [
2007 |        "array([0, 0, 0, 0], dtype=uint8)"
2008 |       ]
2009 |      },
2010 |      "execution_count": 52,
2011 |      "metadata": {},
2012 |      "output_type": "execute_result"
2013 |     }
2014 |    ],
2015 |    "source": [
2016 |     "ux = np.zeros(4, 'uint8')  # Integer ranging from 0 to 255, dtype also as string!\n",
2017 |     "ux"
2018 |    ]
2019 |   },
2020 |   {
2021 |    "cell_type": "code",
2022 |    "execution_count": 53,
2023 |    "metadata": {
2024 |     "slideshow": {
2025 |      "slide_type": "subslide"
2026 |     }
2027 |    },
2028 |    "outputs": [
2029 |     {
2030 |      "data": {
2031 |       "text/plain": [
2032 |        "array([255,   0,   1,   1], dtype=uint8)"
2033 |       ]
2034 |      },
2035 |      "execution_count": 53,
2036 |      "metadata": {},
2037 |      "output_type": "execute_result"
2038 |     }
2039 |    ],
2040 |    "source": [
2041 |     "ux[0] = 255\n",
2042 |     "ux[1] = 256\n",
2043 |     "ux[2] = 257\n",
2044 |     "ux[3] = 513  # (256 x 2) + 1\n",
2045 |     "ux"
2046 |    ]
2047 |   },
2048 |   {
2049 |    "cell_type": "markdown",
2050 |    "metadata": {},
2051 |    "source": [
2052 |     "### Machine Info and Supported Numerical Representation"
2053 |    ]
2054 |   },
2055 |   {
2056 |    "cell_type": "markdown",
2057 |    "metadata": {},
2058 |    "source": [
2059 |     "Numpy provides two functions to inspect the information of supported integer and floating-point types, namely `np.iinfo` and `np.finfo`:"
2060 |    ]
2061 |   },
2062 |   {
2063 |    "cell_type": "code",
2064 |    "execution_count": 54,
2065 |    "metadata": {},
2066 |    "outputs": [
2067 |     {
2068 |      "data": {
2069 |       "text/plain": [
2070 |        "iinfo(min=-2147483648, max=2147483647, dtype=int32)"
2071 |       ]
2072 |      },
2073 |      "execution_count": 54,
2074 |      "metadata": {},
2075 |      "output_type": "execute_result"
2076 |     }
2077 |    ],
2078 |    "source": [
2079 |     "np.iinfo(np.int32)"
2080 |    ]
2081 |   },
2082 |   {
2083 |    "cell_type": "code",
2084 |    "execution_count": 55,
2085 |    "metadata": {},
2086 |    "outputs": [
2087 |     {
2088 |      "data": {
2089 |       "text/plain": [
2090 |        "finfo(resolution=0.001, min=-6.55040e+04, max=6.55040e+04, dtype=float16)"
2091 |       ]
2092 |      },
2093 |      "execution_count": 55,
2094 |      "metadata": {},
2095 |      "output_type": "execute_result"
2096 |     }
2097 |    ],
2098 |    "source": [
2099 |     "np.finfo(np.float16)"
2100 |    ]
2101 |   },
2102 |   {
2103 |    "cell_type": "markdown",
2104 |    "metadata": {},
2105 |    "source": [
2106 |     "In addition, the `MachAr` class will provide information on the current machine : "
2107 |    ]
2108 |   },
2109 |   {
2110 |    "cell_type": "code",
2111 |    "execution_count": 56,
2112 |    "metadata": {},
2113 |    "outputs": [],
2114 |    "source": [
2115 |     "machine_info = np.MachAr()"
2116 |    ]
2117 |   },
2118 |   {
2119 |    "cell_type": "code",
2120 |    "execution_count": 57,
2121 |    "metadata": {},
2122 |    "outputs": [
2123 |     {
2124 |      "data": {
2125 |       "text/plain": [
2126 |        "2.220446049250313e-16"
2127 |       ]
2128 |      },
2129 |      "execution_count": 57,
2130 |      "metadata": {},
2131 |      "output_type": "execute_result"
2132 |     }
2133 |    ],
2134 |    "source": [
2135 |     "machine_info.epsilon"
2136 |    ]
2137 |   },
2138 |   {
2139 |    "cell_type": "code",
2140 |    "execution_count": 58,
2141 |    "metadata": {},
2142 |    "outputs": [
2143 |     {
2144 |      "data": {
2145 |       "text/plain": [
2146 |        "1.7976931348623157e+308"
2147 |       ]
2148 |      },
2149 |      "execution_count": 58,
2150 |      "metadata": {},
2151 |      "output_type": "execute_result"
2152 |     }
2153 |    ],
2154 |    "source": [
2155 |     "machine_info.huge"
2156 |    ]
2157 |   },
2158 |   {
2159 |    "cell_type": "code",
2160 |    "execution_count": 59,
2161 |    "metadata": {},
2162 |    "outputs": [
2163 |     {
2164 |      "data": {
2165 |       "text/plain": [
2166 |        "True"
2167 |       ]
2168 |      },
2169 |      "execution_count": 59,
2170 |      "metadata": {},
2171 |      "output_type": "execute_result"
2172 |     }
2173 |    ],
2174 |    "source": [
2175 |     "np.finfo(np.float64).max == machine_info.huge"
2176 |    ]
2177 |   },
2178 |   {
2179 |    "cell_type": "code",
2180 |    "execution_count": null,
2181 |    "metadata": {},
2182 |    "outputs": [],
2183 |    "source": [
2184 |     "# TRY THIS!\n",
2185 |     "\n",
2186 |     "help(machine_info)"
2187 |    ]
2188 |   },
2189 |   {
2190 |    "cell_type": "markdown",
2191 |    "metadata": {
2192 |     "slideshow": {
2193 |      "slide_type": "slide"
2194 |     }
2195 |    },
2196 |    "source": [
2197 |     "# Data Type Object"
2198 |    ]
2199 |   },
2200 |   {
2201 |    "cell_type": "markdown",
2202 |    "metadata": {
2203 |     "slideshow": {
2204 |      "slide_type": "subslide"
2205 |     }
2206 |    },
2207 |    "source": [
2208 |     "**Data type objects** are instances of the `numpy.dtype` class. \n",
2209 |     "\n",
2210 |     "Once again, arrays have a data type. \n",
2211 |     "<br>\n",
2212 |     "To be precise, *every element* in a NumPy array has the same data type. \n",
2213 |     "\n",
2214 |     "The data type object can tell you the `size` of the data in bytes.\n",
2215 |     "<br>\n",
2216 |     "(**Recall**: The size in bytes is given by the `itemsize` attribute of the dtype class)"
2217 |    ]
2218 |   },
2219 |   {
2220 |    "cell_type": "code",
2221 |    "execution_count": 60,
2222 |    "metadata": {
2223 |     "collapsed": false,
2224 |     "jupyter": {
2225 |      "outputs_hidden": false
2226 |     },
2227 |     "slideshow": {
2228 |      "slide_type": "subslide"
2229 |     }
2230 |    },
2231 |    "outputs": [
2232 |     {
2233 |      "name": "stdout",
2234 |      "output_type": "stream",
2235 |      "text": [
2236 |       "a itemsize:  2\n",
2237 |       "a.dtype.itemsize:  2\n"
2238 |      ]
2239 |     }
2240 |    ],
2241 |    "source": [
2242 |     "a = np.arange(7, dtype=np.uint16)\n",
2243 |     "print('a itemsize: ', a.itemsize)\n",
2244 |     "print('a.dtype.itemsize: ', a.dtype.itemsize)"
2245 |    ]
2246 |   },
2247 |   {
2248 |    "cell_type": "markdown",
2249 |    "metadata": {
2250 |     "slideshow": {
2251 |      "slide_type": "slide"
2252 |     }
2253 |    },
2254 |    "source": [
2255 |     "### Character Codes\n",
2256 |     "\n",
2257 |     "Character codes are included for backward compatibility with **Numeric**. \n",
2258 |     "<br>\n",
2259 |     "Numeric is the predecessor of NumPy. Their use is not recommended, but these codes pop up in several places. \n",
2260 |     "\n",
2261 |     "Btw, You should instead use the **dtype** objects. \n",
2262 |     "\n",
2263 |     "    integer                     i\n",
2264 |     "    Unsigned integer            u\n",
2265 |     "    Single precision float      f\n",
2266 |     "    Double precision float      d\n",
2267 |     "    bool                        b\n",
2268 |     "    complex                     D\n",
2269 |     "    string                      S\n",
2270 |     "    unicode                     U"
2271 |    ]
2272 |   },
2273 |   {
2274 |    "cell_type": "markdown",
2275 |    "metadata": {
2276 |     "slideshow": {
2277 |      "slide_type": "subslide"
2278 |     }
2279 |    },
2280 |    "source": [
2281 |     "### `dtype` contructors"
2282 |    ]
2283 |   },
2284 |   {
2285 |    "cell_type": "code",
2286 |    "execution_count": 61,
2287 |    "metadata": {
2288 |     "collapsed": false,
2289 |     "jupyter": {
2290 |      "outputs_hidden": false
2291 |     },
2292 |     "slideshow": {
2293 |      "slide_type": "fragment"
2294 |     }
2295 |    },
2296 |    "outputs": [
2297 |     {
2298 |      "data": {
2299 |       "text/plain": [
2300 |        "dtype('float64')"
2301 |       ]
2302 |      },
2303 |      "execution_count": 61,
2304 |      "metadata": {},
2305 |      "output_type": "execute_result"
2306 |     }
2307 |    ],
2308 |    "source": [
2309 |     "np.dtype(float)"
2310 |    ]
2311 |   },
2312 |   {
2313 |    "cell_type": "code",
2314 |    "execution_count": 62,
2315 |    "metadata": {
2316 |     "collapsed": false,
2317 |     "jupyter": {
2318 |      "outputs_hidden": false
2319 |     },
2320 |     "slideshow": {
2321 |      "slide_type": "fragment"
2322 |     }
2323 |    },
2324 |    "outputs": [
2325 |     {
2326 |      "data": {
2327 |       "text/plain": [
2328 |        "dtype('float32')"
2329 |       ]
2330 |      },
2331 |      "execution_count": 62,
2332 |      "metadata": {},
2333 |      "output_type": "execute_result"
2334 |     }
2335 |    ],
2336 |    "source": [
2337 |     "np.dtype('f')"
2338 |    ]
2339 |   },
2340 |   {
2341 |    "cell_type": "code",
2342 |    "execution_count": 63,
2343 |    "metadata": {
2344 |     "collapsed": false,
2345 |     "jupyter": {
2346 |      "outputs_hidden": false
2347 |     },
2348 |     "slideshow": {
2349 |      "slide_type": "fragment"
2350 |     }
2351 |    },
2352 |    "outputs": [
2353 |     {
2354 |      "data": {
2355 |       "text/plain": [
2356 |        "dtype('float64')"
2357 |       ]
2358 |      },
2359 |      "execution_count": 63,
2360 |      "metadata": {},
2361 |      "output_type": "execute_result"
2362 |     }
2363 |    ],
2364 |    "source": [
2365 |     "np.dtype('d')"
2366 |    ]
2367 |   },
2368 |   {
2369 |    "cell_type": "code",
2370 |    "execution_count": 64,
2371 |    "metadata": {
2372 |     "collapsed": false,
2373 |     "jupyter": {
2374 |      "outputs_hidden": false
2375 |     },
2376 |     "slideshow": {
2377 |      "slide_type": "fragment"
2378 |     }
2379 |    },
2380 |    "outputs": [
2381 |     {
2382 |      "data": {
2383 |       "text/plain": [
2384 |        "dtype('float64')"
2385 |       ]
2386 |      },
2387 |      "execution_count": 64,
2388 |      "metadata": {},
2389 |      "output_type": "execute_result"
2390 |     }
2391 |    ],
2392 |    "source": [
2393 |     "np.dtype('f8')"
2394 |    ]
2395 |   },
2396 |   {
2397 |    "cell_type": "code",
2398 |    "execution_count": 65,
2399 |    "metadata": {
2400 |     "collapsed": false,
2401 |     "jupyter": {
2402 |      "outputs_hidden": false
2403 |     },
2404 |     "slideshow": {
2405 |      "slide_type": "fragment"
2406 |     }
2407 |    },
2408 |    "outputs": [
2409 |     {
2410 |      "data": {
2411 |       "text/plain": [
2412 |        "dtype('<U10')"
2413 |       ]
2414 |      },
2415 |      "execution_count": 65,
2416 |      "metadata": {},
2417 |      "output_type": "execute_result"
2418 |     }
2419 |    ],
2420 |    "source": [
2421 |     "np.dtype('U10')  # Unicode string of up to 10 chars"
2422 |    ]
2423 |   },
2424 |   {
2425 |    "cell_type": "markdown",
2426 |    "metadata": {
2427 |     "slideshow": {
2428 |      "slide_type": "subslide"
2429 |     }
2430 |    },
2431 |    "source": [
2432 |     "**Note**: A listing of all data type names can be found by calling `np.sctypeDict.keys()`"
2433 |    ]
2434 |   },
2435 |   {
2436 |    "cell_type": "markdown",
2437 |    "metadata": {
2438 |     "slideshow": {
2439 |      "slide_type": "slide"
2440 |     }
2441 |    },
2442 |    "source": [
2443 |     "## Custom `dtype`"
2444 |    ]
2445 |   },
2446 |   {
2447 |    "cell_type": "markdown",
2448 |    "metadata": {
2449 |     "slideshow": {
2450 |      "slide_type": "subslide"
2451 |     }
2452 |    },
2453 |    "source": [
2454 |     "We can use the `np.dtype` constructor to create a **custom** record type."
2455 |    ]
2456 |   },
2457 |   {
2458 |    "cell_type": "code",
2459 |    "execution_count": 66,
2460 |    "metadata": {
2461 |     "collapsed": true,
2462 |     "jupyter": {
2463 |      "outputs_hidden": true
2464 |     },
2465 |     "slideshow": {
2466 |      "slide_type": "fragment"
2467 |     }
2468 |    },
2469 |    "outputs": [],
2470 |    "source": [
2471 |     "rt = np.dtype([('name', np.str_, 40), ('numitems', np.int32), ('price', np.float32)])"
2472 |    ]
2473 |   },
2474 |   {
2475 |    "cell_type": "code",
2476 |    "execution_count": 67,
2477 |    "metadata": {
2478 |     "collapsed": false,
2479 |     "jupyter": {
2480 |      "outputs_hidden": false
2481 |     },
2482 |     "slideshow": {
2483 |      "slide_type": "subslide"
2484 |     }
2485 |    },
2486 |    "outputs": [
2487 |     {
2488 |      "data": {
2489 |       "text/plain": [
2490 |        "dtype('<U40')"
2491 |       ]
2492 |      },
2493 |      "execution_count": 67,
2494 |      "metadata": {},
2495 |      "output_type": "execute_result"
2496 |     }
2497 |    ],
2498 |    "source": [
2499 |     "rt['name']  # see the difference with Python 2"
2500 |    ]
2501 |   },
2502 |   {
2503 |    "cell_type": "code",
2504 |    "execution_count": 68,
2505 |    "metadata": {
2506 |     "collapsed": false,
2507 |     "jupyter": {
2508 |      "outputs_hidden": false
2509 |     },
2510 |     "slideshow": {
2511 |      "slide_type": "fragment"
2512 |     }
2513 |    },
2514 |    "outputs": [
2515 |     {
2516 |      "data": {
2517 |       "text/plain": [
2518 |        "dtype('int32')"
2519 |       ]
2520 |      },
2521 |      "execution_count": 68,
2522 |      "metadata": {},
2523 |      "output_type": "execute_result"
2524 |     }
2525 |    ],
2526 |    "source": [
2527 |     "rt['numitems']"
2528 |    ]
2529 |   },
2530 |   {
2531 |    "cell_type": "code",
2532 |    "execution_count": 69,
2533 |    "metadata": {
2534 |     "collapsed": false,
2535 |     "jupyter": {
2536 |      "outputs_hidden": false
2537 |     },
2538 |     "slideshow": {
2539 |      "slide_type": "fragment"
2540 |     }
2541 |    },
2542 |    "outputs": [
2543 |     {
2544 |      "data": {
2545 |       "text/plain": [
2546 |        "dtype('float32')"
2547 |       ]
2548 |      },
2549 |      "execution_count": 69,
2550 |      "metadata": {},
2551 |      "output_type": "execute_result"
2552 |     }
2553 |    ],
2554 |    "source": [
2555 |     "rt['price']"
2556 |    ]
2557 |   },
2558 |   {
2559 |    "cell_type": "markdown",
2560 |    "metadata": {
2561 |     "slideshow": {
2562 |      "slide_type": "subslide"
2563 |     }
2564 |    },
2565 |    "source": [
2566 |     "* Instantiate an array of `dtype` equal to `t` (record type)\n"
2567 |    ]
2568 |   },
2569 |   {
2570 |    "cell_type": "code",
2571 |    "execution_count": 70,
2572 |    "metadata": {
2573 |     "collapsed": false,
2574 |     "jupyter": {
2575 |      "outputs_hidden": false
2576 |     },
2577 |     "slideshow": {
2578 |      "slide_type": "fragment"
2579 |     }
2580 |    },
2581 |    "outputs": [],
2582 |    "source": [
2583 |     "record_items = np.array([('Meaning of life DVD', 42, 3.14), ('Butter', 13, 2.72)], \n",
2584 |     "                        dtype=rt)"
2585 |    ]
2586 |   },
2587 |   {
2588 |    "cell_type": "code",
2589 |    "execution_count": 71,
2590 |    "metadata": {
2591 |     "collapsed": false,
2592 |     "jupyter": {
2593 |      "outputs_hidden": false
2594 |     },
2595 |     "slideshow": {
2596 |      "slide_type": "fragment"
2597 |     }
2598 |    },
2599 |    "outputs": [
2600 |     {
2601 |      "name": "stdout",
2602 |      "output_type": "stream",
2603 |      "text": [
2604 |       "[('Meaning of life DVD', 42, 3.14) ('Butter', 13, 2.72)]\n"
2605 |      ]
2606 |     }
2607 |    ],
2608 |    "source": [
2609 |     "print(record_items)"
2610 |    ]
2611 |   },
2612 |   {
2613 |    "cell_type": "markdown",
2614 |    "metadata": {
2615 |     "slideshow": {
2616 |      "slide_type": "skip"
2617 |     }
2618 |    },
2619 |    "source": [
2620 |     "---"
2621 |    ]
2622 |   },
2623 |   {
2624 |    "cell_type": "markdown",
2625 |    "metadata": {
2626 |     "slideshow": {
2627 |      "slide_type": "slide"
2628 |     }
2629 |    },
2630 |    "source": [
2631 |     "# Exercises - Basic Numpy"
2632 |    ]
2633 |   },
2634 |   {
2635 |    "cell_type": "markdown",
2636 |    "metadata": {
2637 |     "slideshow": {
2638 |      "slide_type": "subslide"
2639 |     }
2640 |    },
2641 |    "source": [
2642 |     "## Ex 1.1\n",
2643 |     "\n",
2644 |     "Create an array containing integers from $2$ to $2^6$\n"
2645 |    ]
2646 |   },
2647 |   {
2648 |    "cell_type": "code",
2649 |    "execution_count": null,
2650 |    "metadata": {
2651 |     "collapsed": true,
2652 |     "jupyter": {
2653 |      "outputs_hidden": true
2654 |     },
2655 |     "slideshow": {
2656 |      "slide_type": "skip"
2657 |     }
2658 |    },
2659 |    "outputs": [],
2660 |    "source": []
2661 |   },
2662 |   {
2663 |    "cell_type": "markdown",
2664 |    "metadata": {
2665 |     "slideshow": {
2666 |      "slide_type": "subslide"
2667 |     }
2668 |    },
2669 |    "source": [
2670 |     "## Ex 1.2 \n",
2671 |     "\n",
2672 |     "Print `ndarray` attributes and properties\n",
2673 |     "(e.g. `type`, `dtype`, `shape...`) using previous on"
2674 |    ]
2675 |   },
2676 |   {
2677 |    "cell_type": "code",
2678 |    "execution_count": null,
2679 |    "metadata": {
2680 |     "collapsed": true,
2681 |     "jupyter": {
2682 |      "outputs_hidden": true
2683 |     },
2684 |     "slideshow": {
2685 |      "slide_type": "skip"
2686 |     }
2687 |    },
2688 |    "outputs": [],
2689 |    "source": []
2690 |   },
2691 |   {
2692 |    "cell_type": "markdown",
2693 |    "metadata": {
2694 |     "slideshow": {
2695 |      "slide_type": "subslide"
2696 |     }
2697 |    },
2698 |    "source": [
2699 |     "## Ex 1.3\n",
2700 |     "\n",
2701 |     "Create a 3x3 Matrix array and fill it with random integer numbers\n",
2702 |     "\n",
2703 |     "- _hint_: Take a look at `np.random.randint`"
2704 |    ]
2705 |   },
2706 |   {
2707 |    "cell_type": "code",
2708 |    "execution_count": null,
2709 |    "metadata": {
2710 |     "collapsed": true,
2711 |     "jupyter": {
2712 |      "outputs_hidden": true
2713 |     },
2714 |     "slideshow": {
2715 |      "slide_type": "skip"
2716 |     }
2717 |    },
2718 |    "outputs": [],
2719 |    "source": []
2720 |   },
2721 |   {
2722 |    "cell_type": "markdown",
2723 |    "metadata": {
2724 |     "slideshow": {
2725 |      "slide_type": "subslide"
2726 |     }
2727 |    },
2728 |    "source": [
2729 |     "## Ex 1.4\n",
2730 |     "\n",
2731 |     "Create a list containing $5$ others lists of integers, all of the same size. Convert this list of lists into a matrix (i.e. `numpy.ndarray`)\n"
2732 |    ]
2733 |   },
2734 |   {
2735 |    "cell_type": "code",
2736 |    "execution_count": null,
2737 |    "metadata": {
2738 |     "collapsed": true,
2739 |     "jupyter": {
2740 |      "outputs_hidden": true
2741 |     },
2742 |     "slideshow": {
2743 |      "slide_type": "skip"
2744 |     }
2745 |    },
2746 |    "outputs": [],
2747 |    "source": []
2748 |   },
2749 |   {
2750 |    "cell_type": "markdown",
2751 |    "metadata": {},
2752 |    "source": [
2753 |     "## Ex 1.5\n",
2754 |     "\n",
2755 |     "What happens if we generate an array converting a list of lists of different lengths?"
2756 |    ]
2757 |   },
2758 |   {
2759 |    "cell_type": "code",
2760 |    "execution_count": null,
2761 |    "metadata": {},
2762 |    "outputs": [],
2763 |    "source": []
2764 |   }
2765 |  ],
2766 |  "metadata": {
2767 |   "kernelspec": {
2768 |    "display_name": "Python 3.7 (NumPy EuroSciPy)",
2769 |    "language": "python",
2770 |    "name": "numpy-euroscipy"
2771 |   },
2772 |   "language_info": {
2773 |    "codemirror_mode": {
2774 |     "name": "ipython",
2775 |     "version": 3
2776 |    },
2777 |    "file_extension": ".py",
2778 |    "mimetype": "text/x-python",
2779 |    "name": "python",
2780 |    "nbconvert_exporter": "python",
2781 |    "pygments_lexer": "ipython3",
2782 |    "version": "3.7.3"
2783 |   }
2784 |  },
2785 |  "nbformat": 4,
2786 |  "nbformat_minor": 4
2787 | }
2788 | 


--------------------------------------------------------------------------------