├── .gitignore ├── images ├── scipy.png ├── ndarray.png ├── df_inside.png ├── df_outside.png ├── reference.png ├── iris_setosa.jpg ├── storage_index.png ├── df_inside_numpy.png ├── euroscipy_logo.png ├── iris_versicolor.jpg ├── iris_virginica.jpg ├── storage_simple.png └── ndarray_with_details.png ├── requirements.txt ├── LICENSE ├── README.md ├── 00_tutorial_intro.ipynb ├── 06_numpy_internals.ipynb ├── 04_sparse_matrices.ipynb ├── 05_memmapping.ipynb ├── extra_torch_tensor.ipynb ├── 03_numpy_io_matlab.ipynb └── 01_numpy_basics.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/* 2 | *.pyc 3 | 4 | -------------------------------------------------------------------------------- /images/scipy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/scipy.png -------------------------------------------------------------------------------- /images/ndarray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/ndarray.png -------------------------------------------------------------------------------- /images/df_inside.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/df_inside.png -------------------------------------------------------------------------------- /images/df_outside.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/df_outside.png -------------------------------------------------------------------------------- /images/reference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/reference.png -------------------------------------------------------------------------------- /images/iris_setosa.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/iris_setosa.jpg -------------------------------------------------------------------------------- /images/storage_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/storage_index.png -------------------------------------------------------------------------------- /images/df_inside_numpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/df_inside_numpy.png -------------------------------------------------------------------------------- /images/euroscipy_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/euroscipy_logo.png -------------------------------------------------------------------------------- /images/iris_versicolor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/iris_versicolor.jpg -------------------------------------------------------------------------------- /images/iris_virginica.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/iris_virginica.jpg -------------------------------------------------------------------------------- /images/storage_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/storage_simple.png -------------------------------------------------------------------------------- /images/ndarray_with_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/numpy-euroscipy/master/images/ndarray_with_details.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipykernel==5.1.2 2 | jupyter-client==5.3.1 3 | jupyter-console==6.0.0 4 | jupyter-core==4.4.0 5 | notebook==6.4.12 6 | numpy==1.22.0 7 | # requirements for the "advanced" part 8 | scipy==1.3.1 9 | torch==1.2.0 10 | 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Valerio Maggio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Never get in battle of bits without ammunitions 2 | 3 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leriomaggio/numpy-euroscipy/master) 4 | 5 | **Title Credits**: Gentle reference to the homonymous [talk](https://pyvideo.org/europython-2013/never-get-in-a-battle-of-bits-without-ammunition.html) presented at 6 | **EuroPython 2013** in Florence by my friend **riko** (_a.k.a._ Enrico Franchi ). 7 | 8 | ## Abstract 9 | 10 | The `numpy` package takes a central role in Python scientific ecosystem. 11 | This is mainly because `numpy` code has been designed with 12 | high performance in mind. 13 | 14 | This tutorial will provide materials for the most essential concepts 15 | to become confident with `numpy` and `ndarray` in (a matter of) `90 mins`. 16 | 17 | # Outline 18 | 19 | **Part I** Numpy Basics 20 | 21 | - Introduction to NumPy Arrays 22 | - numpy internals schematics 23 | - Reshaping and Resizing 24 | - Numerical Data Types 25 | - Record Array 26 | 27 | 28 | **Part II** Indexing and Slicing 29 | 30 | - Indexing numpy arrays 31 | - fancy indexing 32 | - array masking 33 | - Slicing & Stacking 34 | - Vectorization & Broadcasting 35 | 36 | **Part III** "Advanced NumPy" 37 | 38 | - Serialisation & I/O 39 | - `.mat` files 40 | - Array and Matrix 41 | - Matlab compatibility 42 | - Memmap 43 | - Bits of Data Science with NumPy 44 | - NumPy beyond `numpy` 45 | 46 | ### Python version 47 | 48 | The minimum recommended version of Python to use for this tutorial is **Python 3.5**, although 49 | Python 2.7 should be fine, as well as previous versions of Python 3. 50 | 51 | Py3.5+ is recommended due to a reference to the `@` operator in the linear algebra notebook. 52 | 53 | ## License and Sharing Material 54 | 55 | Creative Commons License
This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. 56 | -------------------------------------------------------------------------------- /00_tutorial_intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Tutorial @ EuroSciPy 2019\n", 8 | "\n", 9 | "\n", 10 | "\n", 11 | " " 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Goal of this Tutorial\n", 19 | "\n", 20 | "- Introduce the basics of scientific and numerical computation in Python using **Numpy**\n", 21 | "- Understand why `numpy` has a central role in the Python scientific ecosystem\n", 22 | " " 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Outline\n", 30 | "\n", 31 | "**11:00 - 11:45** (_45 mins_) Numpy Basics\n", 32 | "\n", 33 | "- Introduction to NumPy Arrays\n", 34 | " - numpy internals schematics\n", 35 | " - Reshaping and Resizing\n", 36 | "- Numerical Data Types\n", 37 | " - Record Array\n", 38 | " \n", 39 | "**11:50 - 12:30** (_40 mins_) Indexing and Slicing\n", 40 | " \n", 41 | "- Indexing numpy arrays\n", 42 | " - fancy indexing\n", 43 | " - array masking\n", 44 | "- Slicing & Stacking\n", 45 | "- Vectorization & Broadcasting\n", 46 | "\n", 47 | "**Follow up** \"Advanced NumPy: Bits of Data Science with NumPy\n", 48 | "\n", 49 | "- Serialisation & I/O\n", 50 | " - `.mat` files\n", 51 | "- Array and Matrix\n", 52 | " - Matlab compatibility\n", 53 | "- Sparse Matrices\n", 54 | "- Memmap \n", 55 | "- Ubiquitous NumPy: NumPy beyond `numpy`" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "# Requirements" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "This tutorial has one main requirement: `numpy`.\n", 70 | "\n", 71 | "Materials are provided as Jupyter notebooks, so IPython notebook (`pip install notebook`) is also required.\n", 72 | "\n", 73 | "#### Advanced Part\n", 74 | "\n", 75 | "This part has more dependencies: `scipy`, `scikit-learn`, `matplotlib`, `torch`.\n", 76 | "All these dependencies have been collected in the `requirements.txt` file:\n", 77 | "\n", 78 | "```\n", 79 | "$ pip install -r requirements.txt\n", 80 | "```\n", 81 | "\n", 82 | "\n", 83 | "### Python version\n", 84 | "\n", 85 | "The minimum recommended version of Python to use for this tutorial is **Python 3.5**, although \n", 86 | "Python 2.7 should be fine, as well as previous versions of Python 3. \n", 87 | "\n", 88 | "Py3.5+ is recommended due to a reference to the `@` operator in the linear algebra notebook.\n", 89 | "\n", 90 | "\n" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "# MyBinder\n", 98 | "\n", 99 | "If you don't want to bother setting up the environment on your own computer, you can use MyBinder\n", 100 | "\n", 101 | "(**Note**: recommended only with a proper Wi-Fi connection)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leriomaggio/numpy-euroscipy/master)" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3.7 (NumPy EuroSciPy)", 115 | "language": "python", 116 | "name": "numpy-euroscipy" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.7.3" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 4 133 | } 134 | -------------------------------------------------------------------------------- /06_numpy_internals.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Understanding NumPy Internals\n", 8 | "\n", 9 | "We can achieve significant performance speed enhancement with NumPy over native Python code, particularly when our computations follow the **Single Instruction, Multiple Data (SIMD)** paradigm. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "slideshow": { 25 | "slide_type": "slide" 26 | } 27 | }, 28 | "source": [ 29 | "## Copy and \"deep copy\"" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "slideshow": { 36 | "slide_type": "subslide" 37 | } 38 | }, 39 | "source": [ 40 | "To achieve high performance, assignments in Python usually do not copy the underlaying objects. \n", 41 | "\n", 42 | "This is important for example when objects are passed between functions, to avoid an excessive amount of memory copying when it is not necessary (techincal term: **pass by reference**).\n", 43 | "\n", 44 | "" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "First, we need a way to check whether two arrays share the same underlying data buffer in memory. \n", 52 | "\n", 53 | "Let's define a function `aid()` that returns the memory location of the underlying data buffer:" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "def aid(x):\n", 63 | " # This function returns the memory\n", 64 | " # block address of an array.\n", 65 | " return x.__array_interface__['data'][0]" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Two arrays with the same data location (as returned by `aid()`) share the same underlying data buffer. \n", 73 | "\n", 74 | "However, the opposite is true only if the arrays have the same offset (meaning that they have the same first element). " 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": { 81 | "collapsed": false, 82 | "jupyter": { 83 | "outputs_hidden": false 84 | }, 85 | "slideshow": { 86 | "slide_type": "subslide" 87 | } 88 | }, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "array([[1, 2],\n", 94 | " [3, 4]])" 95 | ] 96 | }, 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "A = np.array([[1, 2], [3, 4]])\n", 104 | "\n", 105 | "A" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": { 112 | "collapsed": false, 113 | "jupyter": { 114 | "outputs_hidden": false 115 | }, 116 | "slideshow": { 117 | "slide_type": "fragment" 118 | } 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "# now B is referring to the same array data as A \n", 123 | "B = A " 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 6, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "True" 135 | ] 136 | }, 137 | "execution_count": 6, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "aid(A) == aid(B)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": { 150 | "collapsed": false, 151 | "jupyter": { 152 | "outputs_hidden": false 153 | }, 154 | "slideshow": { 155 | "slide_type": "fragment" 156 | } 157 | }, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "array([[10, 2],\n", 163 | " [ 3, 4]])" 164 | ] 165 | }, 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "# changing B affects A\n", 173 | "B[0,0] = 10\n", 174 | "\n", 175 | "B" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 8, 181 | "metadata": { 182 | "collapsed": false, 183 | "jupyter": { 184 | "outputs_hidden": false 185 | }, 186 | "slideshow": { 187 | "slide_type": "fragment" 188 | } 189 | }, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "array([[10, 2],\n", 195 | " [ 3, 4]])" 196 | ] 197 | }, 198 | "execution_count": 8, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "A" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "slideshow": { 211 | "slide_type": "subslide" 212 | } 213 | }, 214 | "source": [ 215 | "* If we want to **avoid** this behavior, so that when we get a new completely independent object `B` copied from `A`, then we need to do a so-called **deep copy** using the function `np.copy`:" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 9, 221 | "metadata": { 222 | "collapsed": false, 223 | "jupyter": { 224 | "outputs_hidden": false 225 | }, 226 | "slideshow": { 227 | "slide_type": "fragment" 228 | } 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "B = np.copy(A)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 10, 238 | "metadata": { 239 | "collapsed": false, 240 | "jupyter": { 241 | "outputs_hidden": false 242 | }, 243 | "slideshow": { 244 | "slide_type": "fragment" 245 | } 246 | }, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "array([[-5, 2],\n", 252 | " [ 3, 4]])" 253 | ] 254 | }, 255 | "execution_count": 10, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "# now, if we modify B, A is not affected\n", 262 | "B[0,0] = -5\n", 263 | "\n", 264 | "B" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 11, 270 | "metadata": { 271 | "collapsed": false, 272 | "jupyter": { 273 | "outputs_hidden": false 274 | }, 275 | "slideshow": { 276 | "slide_type": "fragment" 277 | } 278 | }, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": [ 283 | "array([[10, 2],\n", 284 | " [ 3, 4]])" 285 | ] 286 | }, 287 | "execution_count": 11, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "A" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 13, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "False" 305 | ] 306 | }, 307 | "execution_count": 13, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "aid(A) == aid(B)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "---" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [] 329 | } 330 | ], 331 | "metadata": { 332 | "kernelspec": { 333 | "display_name": "Python 3.7 (NumPy EuroSciPy)", 334 | "language": "python", 335 | "name": "numpy-euroscipy" 336 | }, 337 | "language_info": { 338 | "codemirror_mode": { 339 | "name": "ipython", 340 | "version": 3 341 | }, 342 | "file_extension": ".py", 343 | "mimetype": "text/x-python", 344 | "name": "python", 345 | "nbconvert_exporter": "python", 346 | "pygments_lexer": "ipython3", 347 | "version": "3.7.3" 348 | } 349 | }, 350 | "nbformat": 4, 351 | "nbformat_minor": 4 352 | } 353 | -------------------------------------------------------------------------------- /04_sparse_matrices.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![scipy](images/scipy.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "slideshow": { 14 | "slide_type": "slide" 15 | } 16 | }, 17 | "source": [ 18 | "# Scipy Sparse Matrices" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "slideshow": { 25 | "slide_type": "subslide" 26 | } 27 | }, 28 | "source": [ 29 | "**Sparse Matrices** are very nice in some situations. \n", 30 | "\n", 31 | "For example, in some machine learning tasks, especially those associated\n", 32 | "with textual analysis, the data may be mostly zeros. \n", 33 | "\n", 34 | "Storing all these zeros is very inefficient. \n", 35 | "\n", 36 | "We can create and manipulate sparse matrices as follows:" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 1, 42 | "metadata": { 43 | "slideshow": { 44 | "slide_type": "skip" 45 | } 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import numpy as np" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": { 56 | "collapsed": false, 57 | "jupyter": { 58 | "outputs_hidden": false 59 | }, 60 | "slideshow": { 61 | "slide_type": "subslide" 62 | } 63 | }, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "[[0.52508939 0.55969684 0.38059541 0.14994033 0.3561533 ]\n", 70 | " [0.94612104 0.20796991 0.18345058 0.03266521 0.71642811]\n", 71 | " [0.76801146 0.18143891 0.44346617 0.3509763 0.70771478]\n", 72 | " [0.96785438 0.64010409 0.20666769 0.99005094 0.42858088]\n", 73 | " [0.24971981 0.88585392 0.1683662 0.70119483 0.48374682]\n", 74 | " [0.01736319 0.87369042 0.19830546 0.56395574 0.20060824]\n", 75 | " [0.11881578 0.65524562 0.21570217 0.02114718 0.8527528 ]\n", 76 | " [0.7722977 0.44208694 0.01126588 0.80556187 0.07607147]\n", 77 | " [0.75409907 0.78761663 0.41863968 0.30373673 0.63332945]\n", 78 | " [0.99874432 0.37336682 0.14359151 0.76142434 0.1988419 ]]\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "# Create a random array with a lot of zeros\n", 84 | "X = np.random.random((10, 5))\n", 85 | "print(X)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": { 92 | "collapsed": false, 93 | "jupyter": { 94 | "outputs_hidden": false 95 | }, 96 | "slideshow": { 97 | "slide_type": "subslide" 98 | } 99 | }, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "[[0. 0. 0. 0. 0. ]\n", 106 | " [0.94612104 0. 0. 0. 0.71642811]\n", 107 | " [0.76801146 0. 0. 0. 0.70771478]\n", 108 | " [0.96785438 0. 0. 0.99005094 0. ]\n", 109 | " [0. 0.88585392 0. 0.70119483 0. ]\n", 110 | " [0. 0.87369042 0. 0. 0. ]\n", 111 | " [0. 0. 0. 0. 0.8527528 ]\n", 112 | " [0.7722977 0. 0. 0.80556187 0. ]\n", 113 | " [0.75409907 0.78761663 0. 0. 0. ]\n", 114 | " [0.99874432 0. 0. 0.76142434 0. ]]\n" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "X[X < 0.7] = 0 # note: fancy indexing\n", 120 | "print(X)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "metadata": { 127 | "collapsed": false, 128 | "jupyter": { 129 | "outputs_hidden": false 130 | }, 131 | "slideshow": { 132 | "slide_type": "subslide" 133 | } 134 | }, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | " (1, 0)\t0.9461210440608149\n", 141 | " (1, 4)\t0.7164281142304602\n", 142 | " (2, 0)\t0.7680114556976801\n", 143 | " (2, 4)\t0.7077147754658187\n", 144 | " (3, 0)\t0.9678543752795629\n", 145 | " (3, 3)\t0.9900509407165115\n", 146 | " (4, 1)\t0.8858539179438214\n", 147 | " (4, 3)\t0.7011948276939008\n", 148 | " (5, 1)\t0.8736904234085155\n", 149 | " (6, 4)\t0.8527528049269587\n", 150 | " (7, 0)\t0.7722977020522017\n", 151 | " (7, 3)\t0.8055618728634483\n", 152 | " (8, 0)\t0.7540990714791828\n", 153 | " (8, 1)\t0.7876166309534933\n", 154 | " (9, 0)\t0.9987443167367364\n", 155 | " (9, 3)\t0.7614243372618548\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "from scipy import sparse\n", 161 | "\n", 162 | "# turn X into a csr (Compressed-Sparse-Row) matrix\n", 163 | "X_csr = sparse.csr_matrix(X)\n", 164 | "print(X_csr)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 6, 170 | "metadata": { 171 | "collapsed": false, 172 | "jupyter": { 173 | "outputs_hidden": false 174 | }, 175 | "slideshow": { 176 | "slide_type": "subslide" 177 | } 178 | }, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "[[0. 0. 0. 0. 0. ]\n", 185 | " [0.94612104 0. 0. 0. 0.71642811]\n", 186 | " [0.76801146 0. 0. 0. 0.70771478]\n", 187 | " [0.96785438 0. 0. 0.99005094 0. ]\n", 188 | " [0. 0.88585392 0. 0.70119483 0. ]\n", 189 | " [0. 0.87369042 0. 0. 0. ]\n", 190 | " [0. 0. 0. 0. 0.8527528 ]\n", 191 | " [0.7722977 0. 0. 0.80556187 0. ]\n", 192 | " [0.75409907 0.78761663 0. 0. 0. ]\n", 193 | " [0.99874432 0. 0. 0.76142434 0. ]]\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "# convert the sparse matrix to a dense array\n", 199 | "print(X_csr.toarray())" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 7, 205 | "metadata": { 206 | "collapsed": false, 207 | "jupyter": { 208 | "outputs_hidden": false 209 | }, 210 | "slideshow": { 211 | "slide_type": "subslide" 212 | } 213 | }, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "True" 219 | ] 220 | }, 221 | "execution_count": 7, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "# Sparse matrices support linear algebra:\n", 228 | "y = np.random.random(X_csr.shape[1])\n", 229 | "z1 = X_csr.dot(y)\n", 230 | "z2 = X.dot(y)\n", 231 | "np.allclose(z1, z2)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": { 237 | "slideshow": { 238 | "slide_type": "subslide" 239 | } 240 | }, 241 | "source": [ 242 | "* The CSR representation can be very efficient for computations, but it is not as good for adding elements. \n", 243 | "\n", 244 | "* For that, the **LIL** (List-In-List) representation is better:" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 8, 250 | "metadata": { 251 | "collapsed": false, 252 | "jupyter": { 253 | "outputs_hidden": false 254 | }, 255 | "slideshow": { 256 | "slide_type": "fragment" 257 | } 258 | }, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | " (0, 1)\t1.0\n", 265 | " (0, 2)\t2.0\n", 266 | " (1, 1)\t2.0\n", 267 | " (1, 3)\t4.0\n", 268 | " (2, 0)\t2.0\n", 269 | " (2, 1)\t3.0\n", 270 | " (2, 2)\t4.0\n", 271 | " (2, 3)\t5.0\n", 272 | " (3, 0)\t3.0\n", 273 | " (4, 0)\t4.0\n", 274 | " (4, 1)\t5.0\n", 275 | " (4, 2)\t6.0\n", 276 | "[[0. 1. 2. 0. 0.]\n", 277 | " [0. 2. 0. 4. 0.]\n", 278 | " [2. 3. 4. 5. 0.]\n", 279 | " [3. 0. 0. 0. 0.]\n", 280 | " [4. 5. 6. 0. 0.]]\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "# Create an empty LIL matrix and add some items\n", 286 | "X_lil = sparse.lil_matrix((5, 5))\n", 287 | "\n", 288 | "for i, j in np.random.randint(0, 5, (15, 2)):\n", 289 | " X_lil[i, j] = i + j\n", 290 | "\n", 291 | "print(X_lil)\n", 292 | "print(X_lil.toarray())" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": { 298 | "slideshow": { 299 | "slide_type": "subslide" 300 | } 301 | }, 302 | "source": [ 303 | "* Often, once an LIL matrix is created, it is useful to convert it to a CSR format \n", 304 | " * **Note**: many scikit-learn algorithms require CSR or CSC format" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 9, 310 | "metadata": { 311 | "collapsed": false, 312 | "jupyter": { 313 | "outputs_hidden": false 314 | }, 315 | "slideshow": { 316 | "slide_type": "fragment" 317 | } 318 | }, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | " (0, 1)\t1.0\n", 325 | " (0, 2)\t2.0\n", 326 | " (1, 1)\t2.0\n", 327 | " (1, 3)\t4.0\n", 328 | " (2, 0)\t2.0\n", 329 | " (2, 1)\t3.0\n", 330 | " (2, 2)\t4.0\n", 331 | " (2, 3)\t5.0\n", 332 | " (3, 0)\t3.0\n", 333 | " (4, 0)\t4.0\n", 334 | " (4, 1)\t5.0\n", 335 | " (4, 2)\t6.0\n" 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "X_csr = X_lil.tocsr()\n", 341 | "print(X_csr)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": { 347 | "slideshow": { 348 | "slide_type": "subslide" 349 | } 350 | }, 351 | "source": [ 352 | "There are several other sparse formats that can be useful for various problems:\n", 353 | "\n", 354 | "- `CSC` (compressed sparse column)\n", 355 | "- `BSR` (block sparse row)\n", 356 | "- `COO` (coordinate)\n", 357 | "- `DIA` (diagonal)\n", 358 | "- `DOK` (dictionary of keys)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": { 364 | "slideshow": { 365 | "slide_type": "slide" 366 | } 367 | }, 368 | "source": [ 369 | "## CSC - Compressed Sparse Column\n", 370 | "\n", 371 | "**Advantages of the CSC format**\n", 372 | "\n", 373 | " * efficient arithmetic operations CSC + CSC, CSC * CSC, etc.\n", 374 | " * efficient column slicing\n", 375 | " * fast matrix vector products (CSR, BSR may be faster)\n", 376 | "\n", 377 | "**Disadvantages of the CSC format**\n", 378 | "\n", 379 | " * slow row slicing operations (consider CSR)\n", 380 | " * changes to the sparsity structure are expensive (consider LIL or DOK)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "slideshow": { 387 | "slide_type": "subslide" 388 | } 389 | }, 390 | "source": [ 391 | "### BSR - Block Sparse Row\n", 392 | "\n", 393 | "The Block Compressed Row (`BSR`) format is very similar to the Compressed Sparse Row (`CSR`) format. \n", 394 | "\n", 395 | "BSR is appropriate for sparse matrices with *dense sub matrices* like the example below. \n", 396 | "\n", 397 | "Block matrices often arise in *vector-valued* finite element discretizations. \n", 398 | "\n", 399 | "In such cases, BSR is **considerably more efficient** than CSR and CSC for many sparse arithmetic operations." 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 10, 405 | "metadata": { 406 | "collapsed": false, 407 | "jupyter": { 408 | "outputs_hidden": false 409 | }, 410 | "slideshow": { 411 | "slide_type": "subslide" 412 | } 413 | }, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "array([[1, 1, 0, 0, 2, 2],\n", 419 | " [1, 1, 0, 0, 2, 2],\n", 420 | " [0, 0, 0, 0, 3, 3],\n", 421 | " [0, 0, 0, 0, 3, 3],\n", 422 | " [4, 4, 5, 5, 6, 6],\n", 423 | " [4, 4, 5, 5, 6, 6]])" 424 | ] 425 | }, 426 | "execution_count": 10, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "from scipy.sparse import bsr_matrix\n", 433 | "\n", 434 | "indptr = np.array([0, 2, 3, 6])\n", 435 | "indices = np.array([0, 2, 2, 0, 1, 2])\n", 436 | "data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2)\n", 437 | "bsr_matrix((data,indices,indptr), shape=(6, 6)).toarray()" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": { 443 | "slideshow": { 444 | "slide_type": "slide" 445 | } 446 | }, 447 | "source": [ 448 | "## COO - Coordinate Sparse Matrix\n", 449 | "\n", 450 | "**Advantages of the CSC format**\n", 451 | "\n", 452 | " * facilitates fast conversion among sparse formats\n", 453 | " * permits duplicate entries (see example)\n", 454 | " * very fast conversion to and from CSR/CSC formats\n", 455 | "\n", 456 | "**Disadvantages of the CSC format**\n", 457 | "\n", 458 | " * does not directly support arithmetic operations and slicing\n", 459 | " \n", 460 | "** Intended Usage**\n", 461 | "\n", 462 | " * COO is a fast format for constructing sparse matrices\n", 463 | " * Once a matrix has been constructed, convert to CSR or CSC format for fast arithmetic and matrix vector\n", 464 | " operations\n", 465 | " * By default when converting to CSR or CSC format, duplicate (i,j) entries will be summed together. \n", 466 | " This facilitates efficient construction of finite element matrices and the like.\n" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": { 472 | "slideshow": { 473 | "slide_type": "slide" 474 | } 475 | }, 476 | "source": [ 477 | "## DOK - Dictionary of Keys\n", 478 | "\n", 479 | "Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power.\n", 480 | "\n", 481 | "Allows for efficient O(1) access of individual elements. Duplicates are not allowed. Can be efficiently converted to a coo_matrix once constructed." 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 11, 487 | "metadata": { 488 | "collapsed": false, 489 | "jupyter": { 490 | "outputs_hidden": false 491 | }, 492 | "slideshow": { 493 | "slide_type": "subslide" 494 | } 495 | }, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/plain": [ 500 | "array([[0., 1., 2., 3., 4.],\n", 501 | " [0., 2., 3., 4., 5.],\n", 502 | " [0., 0., 4., 5., 6.],\n", 503 | " [0., 0., 0., 6., 7.],\n", 504 | " [0., 0., 0., 0., 8.]], dtype=float32)" 505 | ] 506 | }, 507 | "execution_count": 11, 508 | "metadata": {}, 509 | "output_type": "execute_result" 510 | } 511 | ], 512 | "source": [ 513 | "from scipy.sparse import dok_matrix\n", 514 | "S = dok_matrix((5, 5), dtype=np.float32)\n", 515 | "for i in range(5):\n", 516 | " for j in range(i, 5):\n", 517 | " S[i,j] = i+j\n", 518 | " \n", 519 | "S.toarray()" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": { 525 | "slideshow": { 526 | "slide_type": "subslide" 527 | } 528 | }, 529 | "source": [ 530 | "The ``scipy.sparse`` submodule also has a lot of functions for sparse matrices\n", 531 | "including linear algebra, sparse solvers, graph algorithms, and much more." 532 | ] 533 | } 534 | ], 535 | "metadata": { 536 | "celltoolbar": "Slideshow", 537 | "kernelspec": { 538 | "display_name": "Python 3.7 (NumPy EuroSciPy)", 539 | "language": "python", 540 | "name": "numpy-euroscipy" 541 | }, 542 | "language_info": { 543 | "codemirror_mode": { 544 | "name": "ipython", 545 | "version": 3 546 | }, 547 | "file_extension": ".py", 548 | "mimetype": "text/x-python", 549 | "name": "python", 550 | "nbconvert_exporter": "python", 551 | "pygments_lexer": "ipython3", 552 | "version": "3.7.3" 553 | } 554 | }, 555 | "nbformat": 4, 556 | "nbformat_minor": 4 557 | } 558 | -------------------------------------------------------------------------------- /05_memmapping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Processing large NumPy arrays with memory mapping\n", 12 | "\n", 13 | "\n", 14 | "**Reference**: _IPython Interactive Computing and Visualization Cookbook - Second Edition, by Cyrille Rossant_" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "---" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Sometimes, we need to deal with NumPy arrays that are too big to fit in the system memory. \n", 29 | "\n", 30 | "A common solution is to use memory mapping and implement **out-of-core** computations. \n", 31 | "\n", 32 | "The array is stored in a file on the hard drive, and we create a **memory-mapped** object to this file that can be used as a regular NumPy array. \n", 33 | "\n", 34 | "Accessing a portion of the array results in the corresponding data being automatically fetched from the hard drive. Therefore, we only consume what we use." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": { 41 | "collapsed": false, 42 | "jupyter": { 43 | "outputs_hidden": false 44 | }, 45 | "slideshow": { 46 | "slide_type": "subslide" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "import numpy as np" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 2, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "# Let's create a Memory-Mapped Array in write mode\n", 61 | "\n", 62 | "nrows, ncols = 1000000, 100\n", 63 | "f = np.memmap('memmapped.dat', dtype=np.float32, mode='w+', shape=(nrows, ncols))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Let's feed the array with random values, one column at a time because our system's memory is limited!" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "for i in range(ncols):\n", 80 | " f[:, i] = np.random.rand(nrows)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "Save the last column of the Array" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 4, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "x = f[:, -1]" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Now, we flush memory changes to disk by deleting the object:" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "del f" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Reading a memory-mapped array from disk involves the same memmap() function. The data type and the shape need to be specified again, as this information is not stored in the file:" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 8, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "f = np.memmap('memmapped.dat', dtype=np.float32,\n", 129 | " shape=(nrows, ncols))" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 9, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "True" 141 | ] 142 | }, 143 | "execution_count": 9, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "np.array_equal(f[:, -1], x)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 10, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "del f" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "**Note**:\n", 166 | "\n", 167 | ">This method is not adapted for long-term storage of data and data sharing. \n", 168 | ">A better file format for this specific case will be the **HDF5**." 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "## How `memmap` works" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Memory mapping lets you work with huge arrays almost as if they were regular arrays. Python code that accepts a NumPy array as input will also accept a `memmap` array. However, we need to ensure that the array is used efficiently. That is, the array is never loaded as a whole (otherwise, it would waste system memory and would obviate any advantage of the technique)." 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "Memory mapping is also useful when you have a huge file containing raw data in a homogeneous binary format with a known **data type and shape**. \n", 190 | "\n", 191 | "In this case, an alternative solution is to use NumPy's `fromfile()` function with a file handle created with Python's native `open()` function. \n", 192 | "\n", 193 | "Using `f.seek()` lets you position the cursor at any location and load a given number of bytes into a NumPy array." 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "slideshow": { 200 | "slide_type": "subslide" 201 | } 202 | }, 203 | "source": [ 204 | "The numpy package makes it possible to memory map large contiguous chunks of binary files as shared memory for all the Python processes running on a given host:" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "slideshow": { 211 | "slide_type": "slide" 212 | } 213 | }, 214 | "source": [ 215 | "### Memmap Operations" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 11, 221 | "metadata": { 222 | "collapsed": false, 223 | "jupyter": { 224 | "outputs_hidden": false 225 | }, 226 | "slideshow": { 227 | "slide_type": "subslide" 228 | } 229 | }, 230 | "outputs": [ 231 | { 232 | "name": "stdout", 233 | "output_type": "stream", 234 | "text": [ 235 | "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "mm_w = np.memmap('small_test.mmap', shape=10, dtype=np.float32, mode='w+')\n", 241 | "print(mm_w)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": { 247 | "slideshow": { 248 | "slide_type": "subslide" 249 | } 250 | }, 251 | "source": [ 252 | "* This binary file can then be mapped as a new numpy array by all the engines having access to the same filesystem. \n", 253 | "* The `mode='r+'` opens this shared memory area in read write mode:" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 12, 259 | "metadata": { 260 | "collapsed": false, 261 | "jupyter": { 262 | "outputs_hidden": false 263 | }, 264 | "slideshow": { 265 | "slide_type": "subslide" 266 | } 267 | }, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "mm_r = np.memmap('small_test.mmap', dtype=np.float32, mode='r+')\n", 279 | "print(mm_r)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 13, 285 | "metadata": { 286 | "collapsed": false, 287 | "jupyter": { 288 | "outputs_hidden": false 289 | }, 290 | "slideshow": { 291 | "slide_type": "fragment" 292 | } 293 | }, 294 | "outputs": [ 295 | { 296 | "name": "stdout", 297 | "output_type": "stream", 298 | "text": [ 299 | "[42. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "mm_w[0] = 42\n", 305 | "print(mm_w)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 14, 311 | "metadata": { 312 | "collapsed": false, 313 | "jupyter": { 314 | "outputs_hidden": false 315 | }, 316 | "slideshow": { 317 | "slide_type": "fragment" 318 | } 319 | }, 320 | "outputs": [ 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "[42. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "print(mm_r)" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "slideshow": { 337 | "slide_type": "subslide" 338 | } 339 | }, 340 | "source": [ 341 | "* Memory mapped arrays created with `mode='r+'` can be modified and the modifications are shared \n", 342 | " - in case of multiple process" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 15, 348 | "metadata": { 349 | "collapsed": false, 350 | "jupyter": { 351 | "outputs_hidden": false 352 | }, 353 | "slideshow": { 354 | "slide_type": "fragment" 355 | } 356 | }, 357 | "outputs": [], 358 | "source": [ 359 | "mm_r[1] = 43" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 16, 365 | "metadata": { 366 | "collapsed": false, 367 | "jupyter": { 368 | "outputs_hidden": false 369 | }, 370 | "slideshow": { 371 | "slide_type": "fragment" 372 | } 373 | }, 374 | "outputs": [ 375 | { 376 | "name": "stdout", 377 | "output_type": "stream", 378 | "text": [ 379 | "[42. 43. 0. 0. 0. 0. 0. 0. 0. 0.]\n" 380 | ] 381 | } 382 | ], 383 | "source": [ 384 | "print(mm_r)" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": { 390 | "slideshow": { 391 | "slide_type": "subslide" 392 | } 393 | }, 394 | "source": [ 395 | "Memmap arrays generally behave very much like regular in-memory numpy arrays:" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 17, 401 | "metadata": { 402 | "collapsed": false, 403 | "jupyter": { 404 | "outputs_hidden": false 405 | }, 406 | "slideshow": { 407 | "slide_type": "subslide" 408 | } 409 | }, 410 | "outputs": [ 411 | { 412 | "name": "stdout", 413 | "output_type": "stream", 414 | "text": [ 415 | "85.0\n", 416 | "sum=85.0, mean=8.5, std=17.0014705657959\n" 417 | ] 418 | } 419 | ], 420 | "source": [ 421 | "print(mm_r.sum())\n", 422 | "print(\"sum={0}, mean={1}, std={2}\".format(mm_r.sum(), \n", 423 | " np.mean(mm_r), np.std(mm_r)))" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": { 429 | "slideshow": { 430 | "slide_type": "subslide" 431 | } 432 | }, 433 | "source": [ 434 | "Before allocating more data let us define a couple of utility functions from the previous exercise (and more) to monitor what is used by which engine and what is still free on the cluster as a whole:" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": { 440 | "slideshow": { 441 | "slide_type": "subslide" 442 | } 443 | }, 444 | "source": [ 445 | "* Let's allocate a 80MB memmap array:" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 18, 451 | "metadata": { 452 | "collapsed": false, 453 | "jupyter": { 454 | "outputs_hidden": false 455 | }, 456 | "slideshow": { 457 | "slide_type": "fragment" 458 | } 459 | }, 460 | "outputs": [ 461 | { 462 | "data": { 463 | "text/plain": [ 464 | "memmap([0., 0., 0., ..., 0., 0., 0.])" 465 | ] 466 | }, 467 | "execution_count": 18, 468 | "metadata": {}, 469 | "output_type": "execute_result" 470 | } 471 | ], 472 | "source": [ 473 | "np.memmap('bigger_test.mmap', shape=10 * int(1e6), dtype=np.float64, mode='w+')" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": { 479 | "slideshow": { 480 | "slide_type": "subslide" 481 | } 482 | }, 483 | "source": [ 484 | "No significant memory was used in this operation as we just asked the OS to allocate the buffer on the hard drive and just maitain a virtual memory area as a cheap reference to this buffer.\n", 485 | "\n", 486 | "Let's open new references to the same buffer from all the engines at once:" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 19, 492 | "metadata": { 493 | "collapsed": false, 494 | "jupyter": { 495 | "outputs_hidden": false 496 | }, 497 | "slideshow": { 498 | "slide_type": "subslide" 499 | } 500 | }, 501 | "outputs": [ 502 | { 503 | "name": "stdout", 504 | "output_type": "stream", 505 | "text": [ 506 | "CPU times: user 616 µs, sys: 778 µs, total: 1.39 ms\n", 507 | "Wall time: 17.3 ms\n" 508 | ] 509 | } 510 | ], 511 | "source": [ 512 | "%time big_mmap = np.memmap('bigger_test.mmap', dtype=np.float64, mode='r+')" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 20, 518 | "metadata": { 519 | "collapsed": false, 520 | "jupyter": { 521 | "outputs_hidden": false 522 | }, 523 | "slideshow": { 524 | "slide_type": "subslide" 525 | } 526 | }, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/plain": [ 531 | "memmap([0., 0., 0., ..., 0., 0., 0.])" 532 | ] 533 | }, 534 | "execution_count": 20, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "big_mmap" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": { 546 | "slideshow": { 547 | "slide_type": "subslide" 548 | } 549 | }, 550 | "source": [ 551 | "* Let's trigger an actual load of the data from the drive into the in-memory disk cache of the OS, this can take some time depending on the speed of the hard drive (on the order of 100MB/s to 300MB/s hence 3s to 8s for this dataset):" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 21, 557 | "metadata": { 558 | "collapsed": false, 559 | "jupyter": { 560 | "outputs_hidden": false 561 | }, 562 | "slideshow": { 563 | "slide_type": "subslide" 564 | } 565 | }, 566 | "outputs": [ 567 | { 568 | "name": "stdout", 569 | "output_type": "stream", 570 | "text": [ 571 | "CPU times: user 20.5 ms, sys: 32.9 ms, total: 53.5 ms\n", 572 | "Wall time: 54.3 ms\n" 573 | ] 574 | }, 575 | { 576 | "data": { 577 | "text/plain": [ 578 | "0.0" 579 | ] 580 | }, 581 | "execution_count": 21, 582 | "metadata": {}, 583 | "output_type": "execute_result" 584 | } 585 | ], 586 | "source": [ 587 | "%time np.sum(big_mmap)" 588 | ] 589 | }, 590 | { 591 | "cell_type": "markdown", 592 | "metadata": { 593 | "slideshow": { 594 | "slide_type": "subslide" 595 | } 596 | }, 597 | "source": [ 598 | "* Now back into memory" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 22, 604 | "metadata": { 605 | "collapsed": false, 606 | "jupyter": { 607 | "outputs_hidden": false 608 | }, 609 | "slideshow": { 610 | "slide_type": "fragment" 611 | } 612 | }, 613 | "outputs": [ 614 | { 615 | "name": "stdout", 616 | "output_type": "stream", 617 | "text": [ 618 | "CPU times: user 15 ms, sys: 1.36 ms, total: 16.4 ms\n", 619 | "Wall time: 14.7 ms\n" 620 | ] 621 | }, 622 | { 623 | "data": { 624 | "text/plain": [ 625 | "0.0" 626 | ] 627 | }, 628 | "execution_count": 22, 629 | "metadata": {}, 630 | "output_type": "execute_result" 631 | } 632 | ], 633 | "source": [ 634 | "%time np.sum(big_mmap)" 635 | ] 636 | } 637 | ], 638 | "metadata": { 639 | "celltoolbar": "Slideshow", 640 | "kernelspec": { 641 | "display_name": "Python 3.7 (NumPy EuroSciPy)", 642 | "language": "python", 643 | "name": "numpy-euroscipy" 644 | }, 645 | "language_info": { 646 | "codemirror_mode": { 647 | "name": "ipython", 648 | "version": 3 649 | }, 650 | "file_extension": ".py", 651 | "mimetype": "text/x-python", 652 | "name": "python", 653 | "nbconvert_exporter": "python", 654 | "pygments_lexer": "ipython3", 655 | "version": "3.7.3" 656 | } 657 | }, 658 | "nbformat": 4, 659 | "nbformat_minor": 4 660 | } 661 | -------------------------------------------------------------------------------- /extra_torch_tensor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Original Notebook\n", 8 | "\n", 9 | "### Introduction to PyTorch Tensor\n", 10 | "\n", 11 | "**Reference**: [\"What is PyTorch?\"](https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py) by [Soumith Chintala](http://soumith.ch)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "\n", 19 | "What is PyTorch?\n", 20 | "================\n", 21 | "\n", 22 | "It’s a Python-based scientific computing package targeted at two sets of\n", 23 | "audiences:\n", 24 | "\n", 25 | "- A replacement for NumPy to use the power of GPUs\n", 26 | "- a deep learning research platform that provides maximum flexibility\n", 27 | " and speed\n", 28 | "\n", 29 | "Getting Started\n", 30 | "---------------\n", 31 | "\n", 32 | "Tensors\n", 33 | "^^^^^^^\n", 34 | "\n", 35 | "Tensors are similar to NumPy’s ndarrays, with the addition being that\n", 36 | "Tensors can also be used on a GPU to accelerate computing.\n", 37 | "\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 1, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import torch" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "

Note

An uninitialized matrix is declared,\n", 54 | " but does not contain definite known\n", 55 | " values before it is used. When an\n", 56 | " uninitialized matrix is created,\n", 57 | " whatever values were in the allocated\n", 58 | " memory at the time will appear as the initial values.

\n", 59 | "\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Construct a 5x3 matrix, uninitialized:\n", 67 | "\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 2, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "tensor([[0., 0., 0.],\n", 80 | " [0., 0., 0.],\n", 81 | " [0., 0., 0.],\n", 82 | " [0., 0., 0.],\n", 83 | " [0., 0., 0.]])\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "x = torch.empty(5, 3)\n", 89 | "print(x)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "torch.Tensor" 101 | ] 102 | }, 103 | "execution_count": 3, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "type(x)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Construct a randomly initialized matrix:\n", 117 | "\n" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "tensor([[0.1698, 0.9210, 0.6316],\n", 130 | " [0.3320, 0.5131, 0.0979],\n", 131 | " [0.7772, 0.0237, 0.9043],\n", 132 | " [0.8486, 0.5272, 0.0556],\n", 133 | " [0.6724, 0.9832, 0.3684]])\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "x = torch.rand(5, 3)\n", 139 | "print(x)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "Construct a matrix filled zeros and of dtype long:\n", 147 | "\n" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 5, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "tensor([[0, 0, 0],\n", 160 | " [0, 0, 0],\n", 161 | " [0, 0, 0],\n", 162 | " [0, 0, 0],\n", 163 | " [0, 0, 0]])\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "x = torch.zeros(5, 3, dtype=torch.long)\n", 169 | "print(x)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "Construct a tensor directly from data:\n", 177 | "\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 6, 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | "tensor([5.5000, 3.0000])\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "x = torch.tensor([5.5, 3])\n", 195 | "print(x)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "or create a tensor based on an existing tensor. These methods\n", 203 | "will reuse properties of the input tensor, e.g. dtype, unless\n", 204 | "new values are provided by user\n", 205 | "\n" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 7, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "tensor([[1., 1., 1.],\n", 218 | " [1., 1., 1.],\n", 219 | " [1., 1., 1.],\n", 220 | " [1., 1., 1.],\n", 221 | " [1., 1., 1.]], dtype=torch.float64)\n", 222 | "tensor([[ 0.5752, 1.0416, 1.3851],\n", 223 | " [ 1.3198, 0.5191, 0.9312],\n", 224 | " [ 0.7598, -1.3322, -0.6987],\n", 225 | " [-1.6433, 0.9256, -0.1461],\n", 226 | " [-0.0300, -0.1440, 0.7353]])\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes\n", 232 | "print(x)\n", 233 | "\n", 234 | "x = torch.randn_like(x, dtype=torch.float) # override dtype!\n", 235 | "print(x) # result has the same size" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "Get its size:\n", 243 | "\n" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 8, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "torch.Size([5, 3])\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "print(x.size())" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "

Note

``torch.Size`` is in fact a tuple, so it supports all tuple operations.

\n", 268 | "\n", 269 | "Operations\n", 270 | "^^^^^^^^^^\n", 271 | "There are multiple syntaxes for operations. In the following\n", 272 | "example, we will take a look at the addition operation.\n", 273 | "\n", 274 | "Addition: syntax 1\n", 275 | "\n" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 9, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "tensor([[ 1.1527, 1.2403, 1.4093],\n", 288 | " [ 1.7279, 0.7558, 1.0488],\n", 289 | " [ 0.9757, -0.4742, -0.0138],\n", 290 | " [-1.4919, 1.5009, 0.1834],\n", 291 | " [ 0.2617, 0.2694, 1.3259]])\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "y = torch.rand(5, 3)\n", 297 | "print(x + y)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "Addition: syntax 2\n", 305 | "\n" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 10, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "tensor([[ 1.1527, 1.2403, 1.4093],\n", 318 | " [ 1.7279, 0.7558, 1.0488],\n", 319 | " [ 0.9757, -0.4742, -0.0138],\n", 320 | " [-1.4919, 1.5009, 0.1834],\n", 321 | " [ 0.2617, 0.2694, 1.3259]])\n" 322 | ] 323 | } 324 | ], 325 | "source": [ 326 | "print(torch.add(x, y))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "Addition: providing an output tensor as argument\n", 334 | "\n" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 11, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "name": "stdout", 344 | "output_type": "stream", 345 | "text": [ 346 | "tensor([[ 1.1527, 1.2403, 1.4093],\n", 347 | " [ 1.7279, 0.7558, 1.0488],\n", 348 | " [ 0.9757, -0.4742, -0.0138],\n", 349 | " [-1.4919, 1.5009, 0.1834],\n", 350 | " [ 0.2617, 0.2694, 1.3259]])\n" 351 | ] 352 | } 353 | ], 354 | "source": [ 355 | "result = torch.empty(5, 3)\n", 356 | "torch.add(x, y, out=result)\n", 357 | "print(result)" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "Addition: in-place\n", 365 | "\n" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 12, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "tensor([[ 1.1527, 1.2403, 1.4093],\n", 378 | " [ 1.7279, 0.7558, 1.0488],\n", 379 | " [ 0.9757, -0.4742, -0.0138],\n", 380 | " [-1.4919, 1.5009, 0.1834],\n", 381 | " [ 0.2617, 0.2694, 1.3259]])\n" 382 | ] 383 | } 384 | ], 385 | "source": [ 386 | "# adds x to y\n", 387 | "y.add_(x)\n", 388 | "print(y)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "

Note

Any operation that mutates a tensor in-place is post-fixed with an ``_``.\n", 396 | " For example: ``x.copy_(y)``, ``x.t_()``, will change ``x``.

\n", 397 | "\n", 398 | "You can use standard NumPy-like indexing with all bells and whistles!\n", 399 | "\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 13, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "name": "stdout", 409 | "output_type": "stream", 410 | "text": [ 411 | "tensor([ 1.0416, 0.5191, -1.3322, 0.9256, -0.1440])\n" 412 | ] 413 | } 414 | ], 415 | "source": [ 416 | "print(x[:, 1])" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "Resizing: If you want to resize/reshape tensor, you can use ``torch.view``:\n", 424 | "\n" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 14, 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "name": "stdout", 434 | "output_type": "stream", 435 | "text": [ 436 | "torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])\n" 437 | ] 438 | } 439 | ], 440 | "source": [ 441 | "x = torch.randn(4, 4)\n", 442 | "y = x.view(16)\n", 443 | "z = x.view(-1, 8) # the size -1 is inferred from other dimensions\n", 444 | "print(x.size(), y.size(), z.size())" 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": {}, 450 | "source": [ 451 | "If you have a one element tensor, use ``.item()`` to get the value as a\n", 452 | "Python number\n", 453 | "\n" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 15, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | "tensor([1.0556])\n", 466 | "1.0555715560913086\n" 467 | ] 468 | } 469 | ], 470 | "source": [ 471 | "x = torch.randn(1)\n", 472 | "print(x)\n", 473 | "print(x.item())" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": {}, 479 | "source": [ 480 | "**Read later:**\n", 481 | "\n", 482 | "\n", 483 | " 100+ Tensor operations, including transposing, indexing, slicing,\n", 484 | " mathematical operations, linear algebra, random numbers, etc.,\n", 485 | " are described\n", 486 | " `here `_.\n", 487 | "\n", 488 | "NumPy Bridge\n", 489 | "------------\n", 490 | "\n", 491 | "Converting a Torch Tensor to a NumPy array and vice versa is a breeze.\n", 492 | "\n", 493 | "The Torch Tensor and NumPy array will share their underlying memory\n", 494 | "locations (if the Torch Tensor is on CPU), and changing one will change\n", 495 | "the other.\n", 496 | "\n", 497 | "Converting a Torch Tensor to a NumPy Array\n", 498 | "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", 499 | "\n" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 16, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "name": "stdout", 509 | "output_type": "stream", 510 | "text": [ 511 | "tensor([1., 1., 1., 1., 1.])\n" 512 | ] 513 | } 514 | ], 515 | "source": [ 516 | "a = torch.ones(5)\n", 517 | "print(a)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 17, 523 | "metadata": {}, 524 | "outputs": [ 525 | { 526 | "name": "stdout", 527 | "output_type": "stream", 528 | "text": [ 529 | "[1. 1. 1. 1. 1.]\n" 530 | ] 531 | } 532 | ], 533 | "source": [ 534 | "b = a.numpy()\n", 535 | "print(b)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "See how the numpy array changed in value.\n", 543 | "\n" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 18, 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "name": "stdout", 553 | "output_type": "stream", 554 | "text": [ 555 | "tensor([2., 2., 2., 2., 2.])\n", 556 | "[2. 2. 2. 2. 2.]\n" 557 | ] 558 | } 559 | ], 560 | "source": [ 561 | "a.add_(1)\n", 562 | "print(a)\n", 563 | "print(b)" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "Converting NumPy Array to Torch Tensor\n", 571 | "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", 572 | "See how changing the np array changed the Torch Tensor automatically\n", 573 | "\n" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 19, 579 | "metadata": {}, 580 | "outputs": [ 581 | { 582 | "name": "stdout", 583 | "output_type": "stream", 584 | "text": [ 585 | "[2. 2. 2. 2. 2.]\n", 586 | "tensor([2., 2., 2., 2., 2.], dtype=torch.float64)\n" 587 | ] 588 | } 589 | ], 590 | "source": [ 591 | "import numpy as np\n", 592 | "a = np.ones(5)\n", 593 | "b = torch.from_numpy(a)\n", 594 | "np.add(a, 1, out=a)\n", 595 | "print(a)\n", 596 | "print(b)" 597 | ] 598 | }, 599 | { 600 | "cell_type": "markdown", 601 | "metadata": {}, 602 | "source": [ 603 | "All the Tensors on the CPU except a CharTensor support converting to\n", 604 | "NumPy and back.\n", 605 | "\n", 606 | "CUDA Tensors\n", 607 | "------------\n", 608 | "\n", 609 | "Tensors can be moved onto any device using the ``.to`` method.\n", 610 | "\n" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 20, 616 | "metadata": {}, 617 | "outputs": [], 618 | "source": [ 619 | "# let us run this cell only if CUDA is available\n", 620 | "# We will use ``torch.device`` objects to move tensors in and out of GPU\n", 621 | "if torch.cuda.is_available():\n", 622 | " device = torch.device(\"cuda\") # a CUDA device object\n", 623 | " y = torch.ones_like(x, device=device) # directly create a tensor on GPU\n", 624 | " x = x.to(device) # or just use strings ``.to(\"cuda\")``\n", 625 | " z = x + y\n", 626 | " print(z)\n", 627 | " print(z.to(\"cpu\", torch.double)) # ``.to`` can also change dtype together!" 628 | ] 629 | } 630 | ], 631 | "metadata": { 632 | "kernelspec": { 633 | "display_name": "Python 3.7 (NumPy EuroSciPy)", 634 | "language": "python", 635 | "name": "numpy-euroscipy" 636 | }, 637 | "language_info": { 638 | "codemirror_mode": { 639 | "name": "ipython", 640 | "version": 3 641 | }, 642 | "file_extension": ".py", 643 | "mimetype": "text/x-python", 644 | "name": "python", 645 | "nbconvert_exporter": "python", 646 | "pygments_lexer": "ipython3", 647 | "version": "3.7.3" 648 | } 649 | }, 650 | "nbformat": 4, 651 | "nbformat_minor": 1 652 | } 653 | -------------------------------------------------------------------------------- /03_numpy_io_matlab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Seralisation and I/O\n", 8 | "\n", 9 | "In this notebook we will focus on NumPy built-in support for **Serialisation** and **I/O**. In other words, we will learn how to save and load NumPy `ndarray` objects in native (binary) format for easy sharing. Moreover we are going to discover how NumPy can load data from external files." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "slideshow": { 25 | "slide_type": "subslide" 26 | } 27 | }, 28 | "source": [ 29 | "## Comma-separated values (CSV)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "slideshow": { 36 | "slide_type": "subslide" 37 | } 38 | }, 39 | "source": [ 40 | "A very common file format for data files are the comma-separated values (CSV), or related format such as TSV (tab-separated values). \n", 41 | "\n", 42 | "To read data from such file into Numpy arrays we can use the `numpy.genfromtxt` function." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": { 49 | "jupyter": { 50 | "outputs_hidden": false 51 | }, 52 | "slideshow": { 53 | "slide_type": "subslide" 54 | } 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "Year Month Day T_6 T12 T18 Valid \r\n", 62 | "1800 1 1 -6.1 -6.1 -6.1 1\r\n", 63 | "1800 1 2 -15.4 -15.4 -15.4 1\r\n", 64 | "1800 1 3 -15.0 -15.0 -15.0 1\r\n", 65 | "1800 1 4 -19.3 -19.3 -19.3 1\r\n", 66 | "1800 1 5 -16.8 -16.8 -16.8 1\r\n", 67 | "1800 1 6 -11.4 -11.4 -11.4 1\r\n", 68 | "1800 1 7 -7.6 -7.6 -7.6 1\r\n", 69 | "1800 1 8 -7.1 -7.1 -7.1 1\r\n", 70 | "1800 1 9 -10.1 -10.1 -10.1 1\r\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "# In Jupyter, all commands starting with ! are mapped as SHELL commands\n", 76 | "!head stockholm_td_adj.dat" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "np.genfromtxt?" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": { 92 | "jupyter": { 93 | "outputs_hidden": false 94 | }, 95 | "slideshow": { 96 | "slide_type": "subslide" 97 | } 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "st_temperatures = np.genfromtxt('stockholm_td_adj.dat', \n", 102 | " skip_header=1)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": { 109 | "jupyter": { 110 | "outputs_hidden": false 111 | }, 112 | "slideshow": { 113 | "slide_type": "fragment" 114 | } 115 | }, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "(77431, 7)" 121 | ] 122 | }, 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "st_temperatures.shape" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### DYI\n", 137 | "\n", 138 | "Let's play a bit with the data loaded `st_temperatures` to combine **fancy indexing** (i.e. defining conditions to get subset of data) and very simple statistics.\n", 139 | "\n", 140 | "For example:" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 6, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "array([[ 1.80e+03, 1.00e+00, 1.00e+00, -6.10e+00, -6.10e+00, -6.10e+00,\n", 152 | " 1.00e+00],\n", 153 | " [ 1.80e+03, 1.00e+00, 2.00e+00, -1.54e+01, -1.54e+01, -1.54e+01,\n", 154 | " 1.00e+00],\n", 155 | " [ 1.80e+03, 1.00e+00, 3.00e+00, -1.50e+01, -1.50e+01, -1.50e+01,\n", 156 | " 1.00e+00],\n", 157 | " [ 1.80e+03, 1.00e+00, 4.00e+00, -1.93e+01, -1.93e+01, -1.93e+01,\n", 158 | " 1.00e+00],\n", 159 | " [ 1.80e+03, 1.00e+00, 5.00e+00, -1.68e+01, -1.68e+01, -1.68e+01,\n", 160 | " 1.00e+00],\n", 161 | " [ 1.80e+03, 1.00e+00, 6.00e+00, -1.14e+01, -1.14e+01, -1.14e+01,\n", 162 | " 1.00e+00],\n", 163 | " [ 1.80e+03, 1.00e+00, 7.00e+00, -7.60e+00, -7.60e+00, -7.60e+00,\n", 164 | " 1.00e+00],\n", 165 | " [ 1.80e+03, 1.00e+00, 8.00e+00, -7.10e+00, -7.10e+00, -7.10e+00,\n", 166 | " 1.00e+00],\n", 167 | " [ 1.80e+03, 1.00e+00, 9.00e+00, -1.01e+01, -1.01e+01, -1.01e+01,\n", 168 | " 1.00e+00],\n", 169 | " [ 1.80e+03, 1.00e+00, 1.00e+01, -9.50e+00, -9.50e+00, -9.50e+00,\n", 170 | " 1.00e+00]])" 171 | ] 172 | }, 173 | "execution_count": 6, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "st_temperatures[:10, ]" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 7, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "dtype('float64')" 191 | ] 192 | }, 193 | "execution_count": 7, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "st_temperatures.dtype" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 8, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "(array([1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,\n", 211 | " 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821,\n", 212 | " 1822, 1823, 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832,\n", 213 | " 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, 1843,\n", 214 | " 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854,\n", 215 | " 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865,\n", 216 | " 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876,\n", 217 | " 1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887,\n", 218 | " 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898,\n", 219 | " 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909,\n", 220 | " 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920,\n", 221 | " 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931,\n", 222 | " 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942,\n", 223 | " 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953,\n", 224 | " 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964,\n", 225 | " 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975,\n", 226 | " 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,\n", 227 | " 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,\n", 228 | " 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,\n", 229 | " 2009, 2010, 2011]), 212)" 230 | ] 231 | }, 232 | "execution_count": 8, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "## Calculate which and how many years we have in our data\n", 239 | "years = np.unique(st_temperatures[:, 0]).astype(np.int)\n", 240 | "years, len(years)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 10, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "(1800, 2011)" 252 | ] 253 | }, 254 | "execution_count": 10, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "years.min(), years.max()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 11, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "Year Month Day T_6 T12 T18 Valid \r\n", 273 | "1800 1 1 -6.1 -6.1 -6.1 1\r\n", 274 | "1800 1 2 -15.4 -15.4 -15.4 1\r\n", 275 | "1800 1 3 -15.0 -15.0 -15.0 1\r\n", 276 | "1800 1 4 -19.3 -19.3 -19.3 1\r\n", 277 | "1800 1 5 -16.8 -16.8 -16.8 1\r\n", 278 | "1800 1 6 -11.4 -11.4 -11.4 1\r\n", 279 | "1800 1 7 -7.6 -7.6 -7.6 1\r\n", 280 | "1800 1 8 -7.1 -7.1 -7.1 1\r\n", 281 | "1800 1 9 -10.1 -10.1 -10.1 1\r\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "!head stockholm_td_adj.dat" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 12, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "mask_year = st_temperatures[:, 0] == 1984" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 24, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "mask_feb = st_temperatures[:, 1] == 2" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 25, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "(77431,)" 316 | ] 317 | }, 318 | "execution_count": 25, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "mask_feb.shape" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 26, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/plain": [ 335 | "dtype('bool')" 336 | ] 337 | }, 338 | "execution_count": 26, 339 | "metadata": {}, 340 | "output_type": "execute_result" 341 | } 342 | ], 343 | "source": [ 344 | "mask_year.dtype" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 27, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "numpy.ndarray" 356 | ] 357 | }, 358 | "execution_count": 27, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "type(mask_year)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 28, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "## Calculate the mean temperature of mid-days on February in 1984\n", 374 | "feb_noon_temps = st_temperatures[(mask_year & mask_feb), 4]" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 29, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "data": { 384 | "text/plain": [ 385 | "numpy.ndarray" 386 | ] 387 | }, 388 | "execution_count": 29, 389 | "metadata": {}, 390 | "output_type": "execute_result" 391 | } 392 | ], 393 | "source": [ 394 | "type(feb_noon_temps)" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 30, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": [ 405 | "dtype('float64')" 406 | ] 407 | }, 408 | "execution_count": 30, 409 | "metadata": {}, 410 | "output_type": "execute_result" 411 | } 412 | ], 413 | "source": [ 414 | "feb_noon_temps.dtype" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 31, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "text/plain": [ 425 | "-1.7344827586206901" 426 | ] 427 | }, 428 | "execution_count": 31, 429 | "metadata": {}, 430 | "output_type": "execute_result" 431 | } 432 | ], 433 | "source": [ 434 | "feb_noon_temps.mean()" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 21, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "## ...." 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": { 449 | "slideshow": { 450 | "slide_type": "slide" 451 | } 452 | }, 453 | "source": [ 454 | "## Numpy's native file format" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": { 460 | "slideshow": { 461 | "slide_type": "subslide" 462 | } 463 | }, 464 | "source": [ 465 | "* Useful when storing and reading back numpy array data. \n", 466 | "\n", 467 | "* Use the functions `np.save` and `np.load`:" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": { 473 | "slideshow": { 474 | "slide_type": "subslide" 475 | } 476 | }, 477 | "source": [ 478 | "### `np.save`" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 22, 484 | "metadata": { 485 | "jupyter": { 486 | "outputs_hidden": false 487 | }, 488 | "slideshow": { 489 | "slide_type": "fragment" 490 | } 491 | }, 492 | "outputs": [], 493 | "source": [ 494 | "np.save(\"st_temperatures.npy\", st_temperatures)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "markdown", 499 | "metadata": {}, 500 | "source": [ 501 | "**See also**:\n", 502 | "\n", 503 | "- `np.savez` : save several NumPy arrays into one single file\n", 504 | "- `np.savez_compressed`\n", 505 | "- `np.savetxt`" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": { 511 | "slideshow": { 512 | "slide_type": "subslide" 513 | } 514 | }, 515 | "source": [ 516 | "### `np.load`" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 23, 522 | "metadata": { 523 | "jupyter": { 524 | "outputs_hidden": false 525 | }, 526 | "slideshow": { 527 | "slide_type": "fragment" 528 | } 529 | }, 530 | "outputs": [ 531 | { 532 | "name": "stdout", 533 | "output_type": "stream", 534 | "text": [ 535 | "(77431, 7) float64\n" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "T = np.load(\"st_temperatures.npy\")\n", 541 | "print(T.shape, T.dtype)" 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": {}, 547 | "source": [ 548 | "---" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": { 554 | "slideshow": { 555 | "slide_type": "subslide" 556 | } 557 | }, 558 | "source": [ 559 | "## NumPy for Matlab Users (really?)\n", 560 | "\n", 561 | "\n", 562 | "If you are a MATLAB® user I do recommend to read [Numpy for MATLAB Users](https://docs.scipy.org/doc/numpy-1.15.0/user/numpy-for-matlab-users.html)." 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### Numpy can load and save native MATLAB® files:" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "---" 577 | ] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": { 582 | "slideshow": { 583 | "slide_type": "slide" 584 | } 585 | }, 586 | "source": [ 587 | "### The `Matrix` Array Type" 588 | ] 589 | }, 590 | { 591 | "cell_type": "markdown", 592 | "metadata": { 593 | "slideshow": { 594 | "slide_type": "subslide" 595 | } 596 | }, 597 | "source": [ 598 | "In addition to the `numpy.ndarray` type, NumPy also support a very specific data type called `Matrix`. \n", 599 | "\n", 600 | "This special type of object has been introduced to allow for API and programming compatibility with\n", 601 | "MATLAB®. \n", 602 | "\n", 603 | "**Note**: The most relevant feature of this new _array type_ is the behavior of the standard arithmetic operators `+, -, *` to use matrix algebra, which work as they would in MATLAB." 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 2, 609 | "metadata": { 610 | "slideshow": { 611 | "slide_type": "subslide" 612 | } 613 | }, 614 | "outputs": [], 615 | "source": [ 616 | "from numpy import matrix" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 3, 622 | "metadata": {}, 623 | "outputs": [], 624 | "source": [ 625 | "a = np.arange(0, 5)\n", 626 | "A = np.array([[n+m*10 for n in range(5)] for m in range(5)])" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 4, 632 | "metadata": {}, 633 | "outputs": [ 634 | { 635 | "data": { 636 | "text/plain": [ 637 | "array([0, 1, 2, 3, 4])" 638 | ] 639 | }, 640 | "execution_count": 4, 641 | "metadata": {}, 642 | "output_type": "execute_result" 643 | } 644 | ], 645 | "source": [ 646 | "a" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 5, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/plain": [ 657 | "array([[ 0, 1, 2, 3, 4],\n", 658 | " [10, 11, 12, 13, 14],\n", 659 | " [20, 21, 22, 23, 24],\n", 660 | " [30, 31, 32, 33, 34],\n", 661 | " [40, 41, 42, 43, 44]])" 662 | ] 663 | }, 664 | "execution_count": 5, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "A" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 6, 676 | "metadata": { 677 | "jupyter": { 678 | "outputs_hidden": false 679 | }, 680 | "slideshow": { 681 | "slide_type": "fragment" 682 | } 683 | }, 684 | "outputs": [], 685 | "source": [ 686 | "M = matrix(A)\n", 687 | "v = matrix(a).T # make it a column vector" 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": 7, 693 | "metadata": { 694 | "jupyter": { 695 | "outputs_hidden": false 696 | }, 697 | "slideshow": { 698 | "slide_type": "fragment" 699 | } 700 | }, 701 | "outputs": [ 702 | { 703 | "data": { 704 | "text/plain": [ 705 | "array([0, 1, 2, 3, 4])" 706 | ] 707 | }, 708 | "execution_count": 7, 709 | "metadata": {}, 710 | "output_type": "execute_result" 711 | } 712 | ], 713 | "source": [ 714 | "a" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 8, 720 | "metadata": { 721 | "jupyter": { 722 | "outputs_hidden": false 723 | }, 724 | "slideshow": { 725 | "slide_type": "subslide" 726 | } 727 | }, 728 | "outputs": [ 729 | { 730 | "data": { 731 | "text/plain": [ 732 | "matrix([[ 300, 310, 320, 330, 340],\n", 733 | " [1300, 1360, 1420, 1480, 1540],\n", 734 | " [2300, 2410, 2520, 2630, 2740],\n", 735 | " [3300, 3460, 3620, 3780, 3940],\n", 736 | " [4300, 4510, 4720, 4930, 5140]])" 737 | ] 738 | }, 739 | "execution_count": 8, 740 | "metadata": {}, 741 | "output_type": "execute_result" 742 | } 743 | ], 744 | "source": [ 745 | "M * M" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 9, 751 | "metadata": {}, 752 | "outputs": [ 753 | { 754 | "data": { 755 | "text/plain": [ 756 | "array([[ 300, 310, 320, 330, 340],\n", 757 | " [1300, 1360, 1420, 1480, 1540],\n", 758 | " [2300, 2410, 2520, 2630, 2740],\n", 759 | " [3300, 3460, 3620, 3780, 3940],\n", 760 | " [4300, 4510, 4720, 4930, 5140]])" 761 | ] 762 | }, 763 | "execution_count": 9, 764 | "metadata": {}, 765 | "output_type": "execute_result" 766 | } 767 | ], 768 | "source": [ 769 | "A @ A # @ operator equivalent to np.dot(A, A)" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": 10, 775 | "metadata": {}, 776 | "outputs": [ 777 | { 778 | "data": { 779 | "text/plain": [ 780 | "array([[ 0, 1, 4, 9, 16],\n", 781 | " [ 100, 121, 144, 169, 196],\n", 782 | " [ 400, 441, 484, 529, 576],\n", 783 | " [ 900, 961, 1024, 1089, 1156],\n", 784 | " [1600, 1681, 1764, 1849, 1936]])" 785 | ] 786 | }, 787 | "execution_count": 10, 788 | "metadata": {}, 789 | "output_type": "execute_result" 790 | } 791 | ], 792 | "source": [ 793 | "# Element wise multiplication in NumPy\n", 794 | "A * A" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": 11, 800 | "metadata": { 801 | "jupyter": { 802 | "outputs_hidden": false 803 | }, 804 | "slideshow": { 805 | "slide_type": "subslide" 806 | } 807 | }, 808 | "outputs": [ 809 | { 810 | "data": { 811 | "text/plain": [ 812 | "matrix([[ 30],\n", 813 | " [130],\n", 814 | " [230],\n", 815 | " [330],\n", 816 | " [430]])" 817 | ] 818 | }, 819 | "execution_count": 11, 820 | "metadata": {}, 821 | "output_type": "execute_result" 822 | } 823 | ], 824 | "source": [ 825 | "M * v" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": 12, 831 | "metadata": {}, 832 | "outputs": [ 833 | { 834 | "data": { 835 | "text/plain": [ 836 | "array([[ 0, 1, 4, 9, 16],\n", 837 | " [ 0, 11, 24, 39, 56],\n", 838 | " [ 0, 21, 44, 69, 96],\n", 839 | " [ 0, 31, 64, 99, 136],\n", 840 | " [ 0, 41, 84, 129, 176]])" 841 | ] 842 | }, 843 | "execution_count": 12, 844 | "metadata": {}, 845 | "output_type": "execute_result" 846 | } 847 | ], 848 | "source": [ 849 | "A * a" 850 | ] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "execution_count": 13, 855 | "metadata": { 856 | "jupyter": { 857 | "outputs_hidden": false 858 | }, 859 | "slideshow": { 860 | "slide_type": "subslide" 861 | } 862 | }, 863 | "outputs": [ 864 | { 865 | "data": { 866 | "text/plain": [ 867 | "matrix([[30]])" 868 | ] 869 | }, 870 | "execution_count": 13, 871 | "metadata": {}, 872 | "output_type": "execute_result" 873 | } 874 | ], 875 | "source": [ 876 | "# inner product\n", 877 | "v.T * v" 878 | ] 879 | }, 880 | { 881 | "cell_type": "code", 882 | "execution_count": 14, 883 | "metadata": { 884 | "jupyter": { 885 | "outputs_hidden": false 886 | }, 887 | "slideshow": { 888 | "slide_type": "fragment" 889 | } 890 | }, 891 | "outputs": [ 892 | { 893 | "data": { 894 | "text/plain": [ 895 | "matrix([[ 30],\n", 896 | " [131],\n", 897 | " [232],\n", 898 | " [333],\n", 899 | " [434]])" 900 | ] 901 | }, 902 | "execution_count": 14, 903 | "metadata": {}, 904 | "output_type": "execute_result" 905 | } 906 | ], 907 | "source": [ 908 | "# with matrix objects, standard matrix algebra applies\n", 909 | "v + M*v" 910 | ] 911 | }, 912 | { 913 | "cell_type": "markdown", 914 | "metadata": { 915 | "slideshow": { 916 | "slide_type": "subslide" 917 | } 918 | }, 919 | "source": [ 920 | "If we try to add, subtract or multiply objects with incomplatible shapes we get an error:" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": 15, 926 | "metadata": { 927 | "jupyter": { 928 | "outputs_hidden": false 929 | }, 930 | "slideshow": { 931 | "slide_type": "fragment" 932 | } 933 | }, 934 | "outputs": [], 935 | "source": [ 936 | "v_incompat = matrix(list(range(1, 7))).T" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": 16, 942 | "metadata": { 943 | "jupyter": { 944 | "outputs_hidden": false 945 | }, 946 | "slideshow": { 947 | "slide_type": "fragment" 948 | } 949 | }, 950 | "outputs": [ 951 | { 952 | "data": { 953 | "text/plain": [ 954 | "((5, 5), (6, 1))" 955 | ] 956 | }, 957 | "execution_count": 16, 958 | "metadata": {}, 959 | "output_type": "execute_result" 960 | } 961 | ], 962 | "source": [ 963 | "M.shape, v_incompat.shape" 964 | ] 965 | }, 966 | { 967 | "cell_type": "code", 968 | "execution_count": 17, 969 | "metadata": { 970 | "jupyter": { 971 | "outputs_hidden": false 972 | }, 973 | "slideshow": { 974 | "slide_type": "subslide" 975 | } 976 | }, 977 | "outputs": [ 978 | { 979 | "ename": "ValueError", 980 | "evalue": "shapes (5,5) and (6,1) not aligned: 5 (dim 1) != 6 (dim 0)", 981 | "output_type": "error", 982 | "traceback": [ 983 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 984 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 985 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mM\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mv_incompat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 986 | "\u001b[0;32m~/anaconda3/envs/numpy-euroscipy/lib/python3.7/site-packages/numpy/matrixlib/defmatrix.py\u001b[0m in \u001b[0;36m__mul__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0;31m# This promotes 1-D vectors to row vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0masmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misscalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'__rmul__'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 987 | "\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mdot\u001b[0;34m(*args, **kwargs)\u001b[0m\n", 988 | "\u001b[0;31mValueError\u001b[0m: shapes (5,5) and (6,1) not aligned: 5 (dim 1) != 6 (dim 0)" 989 | ] 990 | } 991 | ], 992 | "source": [ 993 | "M * v_incompat" 994 | ] 995 | }, 996 | { 997 | "cell_type": "markdown", 998 | "metadata": { 999 | "slideshow": { 1000 | "slide_type": "subslide" 1001 | } 1002 | }, 1003 | "source": [ 1004 | "See also the related functions: `inner`, `outer`, `cross`, `kron`, `tensordot`. \n", 1005 | "\n", 1006 | "Try for example `help(inner)`." 1007 | ] 1008 | }, 1009 | { 1010 | "cell_type": "markdown", 1011 | "metadata": {}, 1012 | "source": [ 1013 | "---" 1014 | ] 1015 | }, 1016 | { 1017 | "cell_type": "markdown", 1018 | "metadata": {}, 1019 | "source": [ 1020 | "## Loading and Saving `.mat` file" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "markdown", 1025 | "metadata": {}, 1026 | "source": [ 1027 | "Let's create a `numpy.ndarray` object" 1028 | ] 1029 | }, 1030 | { 1031 | "cell_type": "code", 1032 | "execution_count": 21, 1033 | "metadata": { 1034 | "slideshow": { 1035 | "slide_type": "fragment" 1036 | } 1037 | }, 1038 | "outputs": [], 1039 | "source": [ 1040 | "A = np.random.rand(10000, 300, 50) # note: this may take a while" 1041 | ] 1042 | }, 1043 | { 1044 | "cell_type": "code", 1045 | "execution_count": 22, 1046 | "metadata": { 1047 | "jupyter": { 1048 | "outputs_hidden": false 1049 | }, 1050 | "slideshow": { 1051 | "slide_type": "subslide" 1052 | } 1053 | }, 1054 | "outputs": [ 1055 | { 1056 | "data": { 1057 | "text/plain": [ 1058 | "array([[[0.30788845, 0.60569692, 0.74159203, ..., 0.99513856,\n", 1059 | " 0.86615676, 0.65581839],\n", 1060 | " [0.29972906, 0.1727805 , 0.73877596, ..., 0.57321798,\n", 1061 | " 0.52657155, 0.15148499],\n", 1062 | " [0.91677054, 0.30289045, 0.47086303, ..., 0.91076997,\n", 1063 | " 0.15659756, 0.74502433],\n", 1064 | " ...,\n", 1065 | " [0.16246413, 0.57601666, 0.64519549, ..., 0.04166688,\n", 1066 | " 0.71115738, 0.75984878],\n", 1067 | " [0.99626814, 0.89529207, 0.89520696, ..., 0.927474 ,\n", 1068 | " 0.46998733, 0.809978 ],\n", 1069 | " [0.52545775, 0.42922203, 0.40999633, ..., 0.7497839 ,\n", 1070 | " 0.26582518, 0.68821719]],\n", 1071 | "\n", 1072 | " [[0.93763072, 0.68660253, 0.03060252, ..., 0.08489496,\n", 1073 | " 0.3368953 , 0.0040575 ],\n", 1074 | " [0.17680589, 0.44922269, 0.32552186, ..., 0.49081397,\n", 1075 | " 0.7718607 , 0.91216332],\n", 1076 | " [0.48935017, 0.28293444, 0.57762148, ..., 0.64988995,\n", 1077 | " 0.96036063, 0.62395338],\n", 1078 | " ...,\n", 1079 | " [0.77554755, 0.23174591, 0.80126054, ..., 0.34982511,\n", 1080 | " 0.13648038, 0.63953428],\n", 1081 | " [0.4502637 , 0.74376194, 0.47531237, ..., 0.94077276,\n", 1082 | " 0.64544446, 0.20241967],\n", 1083 | " [0.65158873, 0.93520847, 0.1153165 , ..., 0.92607143,\n", 1084 | " 0.42194542, 0.49231582]],\n", 1085 | "\n", 1086 | " [[0.60652634, 0.55707594, 0.7861307 , ..., 0.49618863,\n", 1087 | " 0.26073645, 0.57230289],\n", 1088 | " [0.33445447, 0.51254754, 0.89760192, ..., 0.20161607,\n", 1089 | " 0.54935607, 0.97355349],\n", 1090 | " [0.82742407, 0.13811956, 0.77549593, ..., 0.97417726,\n", 1091 | " 0.75828111, 0.20726388],\n", 1092 | " ...,\n", 1093 | " [0.89885131, 0.95168761, 0.04908857, ..., 0.26560786,\n", 1094 | " 0.19828306, 0.34056713],\n", 1095 | " [0.37462286, 0.00294645, 0.46417234, ..., 0.98287275,\n", 1096 | " 0.63560479, 0.37498829],\n", 1097 | " [0.80824186, 0.77414402, 0.27137252, ..., 0.97397635,\n", 1098 | " 0.73792667, 0.47235421]],\n", 1099 | "\n", 1100 | " ...,\n", 1101 | "\n", 1102 | " [[0.79534194, 0.19495982, 0.69419483, ..., 0.98484659,\n", 1103 | " 0.07524489, 0.35898295],\n", 1104 | " [0.75246125, 0.1448565 , 0.31596133, ..., 0.97989236,\n", 1105 | " 0.66466035, 0.09253075],\n", 1106 | " [0.13218267, 0.24674062, 0.93687433, ..., 0.26530807,\n", 1107 | " 0.64653497, 0.25848279],\n", 1108 | " ...,\n", 1109 | " [0.01839164, 0.4127106 , 0.36428583, ..., 0.97212349,\n", 1110 | " 0.867556 , 0.58971199],\n", 1111 | " [0.49075206, 0.80264193, 0.82420669, ..., 0.13249282,\n", 1112 | " 0.70465219, 0.97575252],\n", 1113 | " [0.2735621 , 0.37780973, 0.19581884, ..., 0.55415141,\n", 1114 | " 0.33630774, 0.62376131]],\n", 1115 | "\n", 1116 | " [[0.95740591, 0.6409855 , 0.29668168, ..., 0.85582114,\n", 1117 | " 0.02653775, 0.07433918],\n", 1118 | " [0.97968508, 0.7192658 , 0.96627464, ..., 0.25708965,\n", 1119 | " 0.60037787, 0.8001345 ],\n", 1120 | " [0.98598865, 0.7660025 , 0.05743886, ..., 0.84864957,\n", 1121 | " 0.5717346 , 0.48107095],\n", 1122 | " ...,\n", 1123 | " [0.04048004, 0.24279597, 0.43556563, ..., 0.74962769,\n", 1124 | " 0.71872639, 0.08429666],\n", 1125 | " [0.09697323, 0.51034331, 0.6199531 , ..., 0.95157892,\n", 1126 | " 0.52082535, 0.36331146],\n", 1127 | " [0.91967882, 0.47842183, 0.55403126, ..., 0.99053768,\n", 1128 | " 0.68606411, 0.4186365 ]],\n", 1129 | "\n", 1130 | " [[0.83101977, 0.7800826 , 0.52552153, ..., 0.45411436,\n", 1131 | " 0.96688267, 0.14787061],\n", 1132 | " [0.76365986, 0.97841123, 0.99583821, ..., 0.96043423,\n", 1133 | " 0.72406206, 0.97100977],\n", 1134 | " [0.92772653, 0.01373546, 0.59448744, ..., 0.64587074,\n", 1135 | " 0.13641851, 0.40625453],\n", 1136 | " ...,\n", 1137 | " [0.24169963, 0.22511255, 0.85599095, ..., 0.75448232,\n", 1138 | " 0.42633244, 0.31373371],\n", 1139 | " [0.28480721, 0.83815003, 0.77828307, ..., 0.52597019,\n", 1140 | " 0.88834579, 0.09847287],\n", 1141 | " [0.32613764, 0.67313394, 0.82862416, ..., 0.87137257,\n", 1142 | " 0.13503096, 0.0888404 ]]])" 1143 | ] 1144 | }, 1145 | "execution_count": 22, 1146 | "metadata": {}, 1147 | "output_type": "execute_result" 1148 | } 1149 | ], 1150 | "source": [ 1151 | "A" 1152 | ] 1153 | }, 1154 | { 1155 | "cell_type": "markdown", 1156 | "metadata": {}, 1157 | "source": [ 1158 | "### Introducing SciPy (ecosystem)" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "markdown", 1163 | "metadata": {}, 1164 | "source": [ 1165 | "![scipy](images/scipy.png)" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": {}, 1171 | "source": [ 1172 | "### `scipy.io`" 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "execution_count": 20, 1178 | "metadata": { 1179 | "jupyter": { 1180 | "outputs_hidden": false 1181 | }, 1182 | "slideshow": { 1183 | "slide_type": "fragment" 1184 | } 1185 | }, 1186 | "outputs": [], 1187 | "source": [ 1188 | "from scipy import io as spio" 1189 | ] 1190 | }, 1191 | { 1192 | "cell_type": "markdown", 1193 | "metadata": {}, 1194 | "source": [ 1195 | "### NumPy $\\mapsto$ MATLAB : `scipy.io.savemat`" 1196 | ] 1197 | }, 1198 | { 1199 | "cell_type": "code", 1200 | "execution_count": 23, 1201 | "metadata": {}, 1202 | "outputs": [], 1203 | "source": [ 1204 | "spio.savemat('numpy_to.mat', {'A': A}, oned_as='row') # savemat expects a dictionary" 1205 | ] 1206 | }, 1207 | { 1208 | "cell_type": "markdown", 1209 | "metadata": {}, 1210 | "source": [ 1211 | "MATLAB $\\mapsto$ NumPy: `scipy.io.loadmat`" 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "code", 1216 | "execution_count": 24, 1217 | "metadata": {}, 1218 | "outputs": [], 1219 | "source": [ 1220 | "data_dictionary = spio.loadmat('numpy_to.mat')\n" 1221 | ] 1222 | }, 1223 | { 1224 | "cell_type": "code", 1225 | "execution_count": 25, 1226 | "metadata": {}, 1227 | "outputs": [ 1228 | { 1229 | "data": { 1230 | "text/plain": [ 1231 | "['__header__', '__version__', '__globals__', 'A']" 1232 | ] 1233 | }, 1234 | "execution_count": 25, 1235 | "metadata": {}, 1236 | "output_type": "execute_result" 1237 | } 1238 | ], 1239 | "source": [ 1240 | "list(data_dictionary.keys())" 1241 | ] 1242 | }, 1243 | { 1244 | "cell_type": "code", 1245 | "execution_count": 26, 1246 | "metadata": {}, 1247 | "outputs": [ 1248 | { 1249 | "data": { 1250 | "text/plain": [ 1251 | "array([[[0.30788845, 0.60569692, 0.74159203, ..., 0.99513856,\n", 1252 | " 0.86615676, 0.65581839],\n", 1253 | " [0.29972906, 0.1727805 , 0.73877596, ..., 0.57321798,\n", 1254 | " 0.52657155, 0.15148499],\n", 1255 | " [0.91677054, 0.30289045, 0.47086303, ..., 0.91076997,\n", 1256 | " 0.15659756, 0.74502433],\n", 1257 | " ...,\n", 1258 | " [0.16246413, 0.57601666, 0.64519549, ..., 0.04166688,\n", 1259 | " 0.71115738, 0.75984878],\n", 1260 | " [0.99626814, 0.89529207, 0.89520696, ..., 0.927474 ,\n", 1261 | " 0.46998733, 0.809978 ],\n", 1262 | " [0.52545775, 0.42922203, 0.40999633, ..., 0.7497839 ,\n", 1263 | " 0.26582518, 0.68821719]],\n", 1264 | "\n", 1265 | " [[0.93763072, 0.68660253, 0.03060252, ..., 0.08489496,\n", 1266 | " 0.3368953 , 0.0040575 ],\n", 1267 | " [0.17680589, 0.44922269, 0.32552186, ..., 0.49081397,\n", 1268 | " 0.7718607 , 0.91216332],\n", 1269 | " [0.48935017, 0.28293444, 0.57762148, ..., 0.64988995,\n", 1270 | " 0.96036063, 0.62395338],\n", 1271 | " ...,\n", 1272 | " [0.77554755, 0.23174591, 0.80126054, ..., 0.34982511,\n", 1273 | " 0.13648038, 0.63953428],\n", 1274 | " [0.4502637 , 0.74376194, 0.47531237, ..., 0.94077276,\n", 1275 | " 0.64544446, 0.20241967],\n", 1276 | " [0.65158873, 0.93520847, 0.1153165 , ..., 0.92607143,\n", 1277 | " 0.42194542, 0.49231582]],\n", 1278 | "\n", 1279 | " [[0.60652634, 0.55707594, 0.7861307 , ..., 0.49618863,\n", 1280 | " 0.26073645, 0.57230289],\n", 1281 | " [0.33445447, 0.51254754, 0.89760192, ..., 0.20161607,\n", 1282 | " 0.54935607, 0.97355349],\n", 1283 | " [0.82742407, 0.13811956, 0.77549593, ..., 0.97417726,\n", 1284 | " 0.75828111, 0.20726388],\n", 1285 | " ...,\n", 1286 | " [0.89885131, 0.95168761, 0.04908857, ..., 0.26560786,\n", 1287 | " 0.19828306, 0.34056713],\n", 1288 | " [0.37462286, 0.00294645, 0.46417234, ..., 0.98287275,\n", 1289 | " 0.63560479, 0.37498829],\n", 1290 | " [0.80824186, 0.77414402, 0.27137252, ..., 0.97397635,\n", 1291 | " 0.73792667, 0.47235421]],\n", 1292 | "\n", 1293 | " ...,\n", 1294 | "\n", 1295 | " [[0.79534194, 0.19495982, 0.69419483, ..., 0.98484659,\n", 1296 | " 0.07524489, 0.35898295],\n", 1297 | " [0.75246125, 0.1448565 , 0.31596133, ..., 0.97989236,\n", 1298 | " 0.66466035, 0.09253075],\n", 1299 | " [0.13218267, 0.24674062, 0.93687433, ..., 0.26530807,\n", 1300 | " 0.64653497, 0.25848279],\n", 1301 | " ...,\n", 1302 | " [0.01839164, 0.4127106 , 0.36428583, ..., 0.97212349,\n", 1303 | " 0.867556 , 0.58971199],\n", 1304 | " [0.49075206, 0.80264193, 0.82420669, ..., 0.13249282,\n", 1305 | " 0.70465219, 0.97575252],\n", 1306 | " [0.2735621 , 0.37780973, 0.19581884, ..., 0.55415141,\n", 1307 | " 0.33630774, 0.62376131]],\n", 1308 | "\n", 1309 | " [[0.95740591, 0.6409855 , 0.29668168, ..., 0.85582114,\n", 1310 | " 0.02653775, 0.07433918],\n", 1311 | " [0.97968508, 0.7192658 , 0.96627464, ..., 0.25708965,\n", 1312 | " 0.60037787, 0.8001345 ],\n", 1313 | " [0.98598865, 0.7660025 , 0.05743886, ..., 0.84864957,\n", 1314 | " 0.5717346 , 0.48107095],\n", 1315 | " ...,\n", 1316 | " [0.04048004, 0.24279597, 0.43556563, ..., 0.74962769,\n", 1317 | " 0.71872639, 0.08429666],\n", 1318 | " [0.09697323, 0.51034331, 0.6199531 , ..., 0.95157892,\n", 1319 | " 0.52082535, 0.36331146],\n", 1320 | " [0.91967882, 0.47842183, 0.55403126, ..., 0.99053768,\n", 1321 | " 0.68606411, 0.4186365 ]],\n", 1322 | "\n", 1323 | " [[0.83101977, 0.7800826 , 0.52552153, ..., 0.45411436,\n", 1324 | " 0.96688267, 0.14787061],\n", 1325 | " [0.76365986, 0.97841123, 0.99583821, ..., 0.96043423,\n", 1326 | " 0.72406206, 0.97100977],\n", 1327 | " [0.92772653, 0.01373546, 0.59448744, ..., 0.64587074,\n", 1328 | " 0.13641851, 0.40625453],\n", 1329 | " ...,\n", 1330 | " [0.24169963, 0.22511255, 0.85599095, ..., 0.75448232,\n", 1331 | " 0.42633244, 0.31373371],\n", 1332 | " [0.28480721, 0.83815003, 0.77828307, ..., 0.52597019,\n", 1333 | " 0.88834579, 0.09847287],\n", 1334 | " [0.32613764, 0.67313394, 0.82862416, ..., 0.87137257,\n", 1335 | " 0.13503096, 0.0888404 ]]])" 1336 | ] 1337 | }, 1338 | "execution_count": 26, 1339 | "metadata": {}, 1340 | "output_type": "execute_result" 1341 | } 1342 | ], 1343 | "source": [ 1344 | "data_dictionary['A']" 1345 | ] 1346 | }, 1347 | { 1348 | "cell_type": "code", 1349 | "execution_count": 27, 1350 | "metadata": {}, 1351 | "outputs": [], 1352 | "source": [ 1353 | "A_load = data_dictionary['A']" 1354 | ] 1355 | }, 1356 | { 1357 | "cell_type": "code", 1358 | "execution_count": 28, 1359 | "metadata": {}, 1360 | "outputs": [ 1361 | { 1362 | "data": { 1363 | "text/plain": [ 1364 | "True" 1365 | ] 1366 | }, 1367 | "execution_count": 28, 1368 | "metadata": {}, 1369 | "output_type": "execute_result" 1370 | } 1371 | ], 1372 | "source": [ 1373 | "np.all(A == A_load)" 1374 | ] 1375 | }, 1376 | { 1377 | "cell_type": "code", 1378 | "execution_count": 30, 1379 | "metadata": {}, 1380 | "outputs": [ 1381 | { 1382 | "data": { 1383 | "text/plain": [ 1384 | "numpy.ndarray" 1385 | ] 1386 | }, 1387 | "execution_count": 30, 1388 | "metadata": {}, 1389 | "output_type": "execute_result" 1390 | } 1391 | ], 1392 | "source": [ 1393 | "type(A_load)" 1394 | ] 1395 | } 1396 | ], 1397 | "metadata": { 1398 | "kernelspec": { 1399 | "display_name": "Python 3.7 (NumPy EuroSciPy)", 1400 | "language": "python", 1401 | "name": "numpy-euroscipy" 1402 | }, 1403 | "language_info": { 1404 | "codemirror_mode": { 1405 | "name": "ipython", 1406 | "version": 3 1407 | }, 1408 | "file_extension": ".py", 1409 | "mimetype": "text/x-python", 1410 | "name": "python", 1411 | "nbconvert_exporter": "python", 1412 | "pygments_lexer": "ipython3", 1413 | "version": "3.7.3" 1414 | } 1415 | }, 1416 | "nbformat": 4, 1417 | "nbformat_minor": 4 1418 | } 1419 | -------------------------------------------------------------------------------- /01_numpy_basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# What is Numpy" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "NumPy is the fundamental package for scientific computing with Python. \n", 23 | "It is a package that provide high-performance vector, matrix and higher-dimensional data structures for Python. \n", 24 | "It is implemented in C and Fortran so when calculations are **vectorized**, performance is very good.\n", 25 | "\n", 26 | "So, in a nutshell:\n", 27 | "\n", 28 | "* a powerful Python extension for N-dimensional array\n", 29 | "* a tool for integrating C/C++ and Fortran code\n", 30 | "* designed for scientific computation: linear algebra and Signal Analysis\n", 31 | "\n", 32 | "If you are a MATLAB® user I do recommend to read [Numpy for MATLAB Users](https://docs.scipy.org/doc/numpy-1.15.0/user/numpy-for-matlab-users.html). \n", 33 | "\n", 34 | "I'm a supporter of the **Open Science Movement**, thus I humbly suggest you to take a look at the [Science Code Manifesto](http://sciencecodemanifesto.org/)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "slide" 42 | } 43 | }, 44 | "source": [ 45 | "# Getting Started with Numpy Arrays" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "source": [ 56 | "NumPy's main object is the **homogeneous** ***multidimensional array***. It is a table of elements (usually numbers), all of the same type. \n", 57 | "\n", 58 | "In Numpy dimensions are called **axes**. \n", 59 | "\n", 60 | "The number of axes is called **rank**. \n", 61 | "\n", 62 | "The most important attributes of an ndarray object are:\n", 63 | "\n", 64 | "* **ndarray.ndim** - the number of axes (dimensions) of the array. \n", 65 | "* **ndarray.shape** - the dimensions of the array. For a matrix with n rows and m columns, shape will be (n,m). \n", 66 | "* **ndarray.size** - the total number of elements of the array. \n", 67 | "* **ndarray.dtype** - numpy.int32, numpy.int16, and numpy.float64 are some examples. \n", 68 | "* **ndarray.itemsize** - the size in bytes of elements of the array. For example, elements of type float64 has itemsize 8 (=64/8) " 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "slideshow": { 75 | "slide_type": "slide" 76 | } 77 | }, 78 | "source": [ 79 | "To use `numpy` need to import the module it using of example:" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 2, 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "fragment" 88 | } 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "import numpy as np # naming import convention" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "slideshow": { 99 | "slide_type": "slide" 100 | } 101 | }, 102 | "source": [ 103 | "### Terminology Assumption" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "slideshow": { 110 | "slide_type": "-" 111 | } 112 | }, 113 | "source": [ 114 | "In the `numpy` package the terminology used for vectors, matrices and higher-dimensional data sets is *array*. " 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "slideshow": { 121 | "slide_type": "slide" 122 | } 123 | }, 124 | "source": [ 125 | "### Reference Documentation" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "slideshow": { 132 | "slide_type": "subslide" 133 | } 134 | }, 135 | "source": [ 136 | "* On the web: [http://docs.scipy.org](http://docs.scipy.org)/\n", 137 | "\n", 138 | "* Interactive help:" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "slideshow": { 146 | "slide_type": "fragment" 147 | } 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "np.array?" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": { 157 | "slideshow": { 158 | "slide_type": "subslide" 159 | } 160 | }, 161 | "source": [ 162 | "If you're looking for something" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "slideshow": { 169 | "slide_type": "slide" 170 | } 171 | }, 172 | "source": [ 173 | "# Creating `numpy` arrays\n", 174 | "\n", 175 | "### Get acquainted with NumPy" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "slideshow": { 182 | "slide_type": "subslide" 183 | } 184 | }, 185 | "source": [ 186 | "Let's start by creating some `numpy.array` objects in order to get our hands into the very details of **numpy basic data structure**.\n", 187 | "\n", 188 | "NumPy is a very flexible library, and provides many ways to create (and initialize) new numpy arrays. \n", 189 | "\n", 190 | "One way is **using specific functions dedicated to generate numpy arrays** \n", 191 | "(usually, *array of numbers*)\\[+\\]\n", 192 | "\n", 193 | "\n", 194 | "\n", 195 | "\\[+\\] More on data types, later on !-)\n", 196 | "\n" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": { 202 | "slideshow": { 203 | "slide_type": "slide" 204 | } 205 | }, 206 | "source": [ 207 | "# First `numpy array` example: array of numbers" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": { 213 | "slideshow": { 214 | "slide_type": "subslide" 215 | } 216 | }, 217 | "source": [ 218 | "NumPy provides many functions to generate arrays with with specific properties (e.g. `size` or `shape`).\n", 219 | "\n", 220 | "We will see later examples in which we will generate `ndarray` using explicit Python lists. \n", 221 | "\n", 222 | "However, for larger arrays, using Python lists is simply inpractical. " 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": { 228 | "slideshow": { 229 | "slide_type": "subslide" 230 | } 231 | }, 232 | "source": [ 233 | "### `np.arange`" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "In standard Python, we use the `range` function to generate an **iterable** object of **integers** within a specific range (at a specified `step`, default: `1`)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 3, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 253 | "\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "r = range(10)\n", 259 | "print(list(r))\n", 260 | "\n", 261 | "print(type(r)) # NOTE: if this print will return a it means you're using Py2.7" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "Similarly, in numpy there is the `arange` function which instead generates a `numpy.ndarray`" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 4, 274 | "metadata": { 275 | "slideshow": { 276 | "slide_type": "subslide" 277 | } 278 | }, 279 | "outputs": [ 280 | { 281 | "name": "stdout", 282 | "output_type": "stream", 283 | "text": [ 284 | "[0 1 2 3 4 5 6 7 8 9]\n", 285 | "\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "ra = np.arange(10) \n", 291 | "print(ra)\n", 292 | "\n", 293 | "print(type(ra))" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "However, we are working with the **Numerical Python** library, so we should expect more when it comes to numbers.\n", 301 | "\n", 302 | "In fact, we can create an array within a _floating point step-wise range_:" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 5, 308 | "metadata": { 309 | "slideshow": { 310 | "slide_type": "fragment" 311 | } 312 | }, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "[-1.00000000e+00 -9.00000000e-01 -8.00000000e-01 -7.00000000e-01\n", 319 | " -6.00000000e-01 -5.00000000e-01 -4.00000000e-01 -3.00000000e-01\n", 320 | " -2.00000000e-01 -1.00000000e-01 -2.22044605e-16 1.00000000e-01\n", 321 | " 2.00000000e-01 3.00000000e-01 4.00000000e-01 5.00000000e-01\n", 322 | " 6.00000000e-01 7.00000000e-01 8.00000000e-01 9.00000000e-01]\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "# floating point step-wise range generatation\n", 328 | "raf = np.arange(-1, 1, 0.1) \n", 329 | "print(raf)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "### Properties of `numpy array`" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "Apart from the actual content, which is of course different because specified ranges are different, the `ra` and `raf` arrays differ by their **`dtype`**:" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 6, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "dtype of 'ra': int64, dtype of 'raf': float64\n" 356 | ] 357 | } 358 | ], 359 | "source": [ 360 | "print(f\"dtype of 'ra': {ra.dtype}, dtype of 'raf': {raf.dtype}\")" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": { 366 | "slideshow": { 367 | "slide_type": "subslide" 368 | } 369 | }, 370 | "source": [ 371 | "#### More properties of the `numpy array`" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 7, 377 | "metadata": { 378 | "slideshow": { 379 | "slide_type": "fragment" 380 | } 381 | }, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "8" 387 | ] 388 | }, 389 | "execution_count": 7, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "ra.itemsize # bytes per element" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 8, 401 | "metadata": { 402 | "slideshow": { 403 | "slide_type": "fragment" 404 | } 405 | }, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/plain": [ 410 | "80" 411 | ] 412 | }, 413 | "execution_count": 8, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "ra.nbytes # number of bytes" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 9, 425 | "metadata": { 426 | "slideshow": { 427 | "slide_type": "fragment" 428 | } 429 | }, 430 | "outputs": [ 431 | { 432 | "data": { 433 | "text/plain": [ 434 | "1" 435 | ] 436 | }, 437 | "execution_count": 9, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "ra.ndim # number of dimensions" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 10, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "(10,)" 455 | ] 456 | }, 457 | "execution_count": 10, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "ra.shape # shape, i.e. number of elements per-dimension/axis" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "## please replicate the same set of operations here for `raf`\n" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": [ 481 | "# your code here" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "**Q**: Do you notice any relevant difference?" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": { 494 | "slideshow": { 495 | "slide_type": "subslide" 496 | } 497 | }, 498 | "source": [ 499 | "### `np.linspace` and `np.logspace`" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "Like `np.arange`, in numpy there are other two \"similar\" functions: \n", 507 | "\n", 508 | "- np.linspace\n", 509 | "- np.logspace\n", 510 | "\n", 511 | "Looking at the examples below, can you spot the difference?" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 11, 517 | "metadata": { 518 | "slideshow": { 519 | "slide_type": "subslide" 520 | } 521 | }, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/plain": [ 526 | "array([ 0. , 0.52631579, 1.05263158, 1.57894737, 2.10526316,\n", 527 | " 2.63157895, 3.15789474, 3.68421053, 4.21052632, 4.73684211,\n", 528 | " 5.26315789, 5.78947368, 6.31578947, 6.84210526, 7.36842105,\n", 529 | " 7.89473684, 8.42105263, 8.94736842, 9.47368421, 10. ])" 530 | ] 531 | }, 532 | "execution_count": 11, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "np.linspace(0, 10, 20)" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 12, 544 | "metadata": { 545 | "slideshow": { 546 | "slide_type": "fragment" 547 | } 548 | }, 549 | "outputs": [ 550 | { 551 | "data": { 552 | "text/plain": [ 553 | "array([1.00000000e+00, 2.27278564e+00, 5.16555456e+00, 1.17401982e+01,\n", 554 | " 2.66829540e+01, 6.06446346e+01, 1.37832255e+02, 3.13263169e+02,\n", 555 | " 7.11980032e+02, 1.61817799e+03])" 556 | ] 557 | }, 558 | "execution_count": 12, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | } 562 | ], 563 | "source": [ 564 | "np.logspace(0, np.e**2, 10, base=np.e)" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": { 570 | "slideshow": { 571 | "slide_type": "subslide" 572 | } 573 | }, 574 | "source": [ 575 | "## Random Number Generation\n", 576 | "\n", 577 | "### `np.random.rand` & `np.random.randn`" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": 13, 583 | "metadata": { 584 | "slideshow": { 585 | "slide_type": "subslide" 586 | } 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "# uniform random numbers in [0,1]\n", 591 | "ru = np.random.rand(10)" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 14, 597 | "metadata": {}, 598 | "outputs": [ 599 | { 600 | "data": { 601 | "text/plain": [ 602 | "array([0.06629061, 0.56102955, 0.81081042, 0.80936217, 0.19182628,\n", 603 | " 0.78609316, 0.88379009, 0.45329187, 0.84304588, 0.56232631])" 604 | ] 605 | }, 606 | "execution_count": 14, 607 | "metadata": {}, 608 | "output_type": "execute_result" 609 | } 610 | ], 611 | "source": [ 612 | "ru" 613 | ] 614 | }, 615 | { 616 | "cell_type": "markdown", 617 | "metadata": {}, 618 | "source": [ 619 | "_Note: numbers and the content of the array may vary_" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 15, 625 | "metadata": { 626 | "slideshow": { 627 | "slide_type": "subslide" 628 | } 629 | }, 630 | "outputs": [], 631 | "source": [ 632 | "# standard normal distributed random numbers\n", 633 | "rs = np.random.randn(10)" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 16, 639 | "metadata": {}, 640 | "outputs": [ 641 | { 642 | "data": { 643 | "text/plain": [ 644 | "array([ 0.45052791, -0.80566857, -0.10401981, 0.91948746, -0.0329787 ,\n", 645 | " -0.71872119, 1.42738938, -0.63292836, 0.5397375 , 0.89186053])" 646 | ] 647 | }, 648 | "execution_count": 16, 649 | "metadata": {}, 650 | "output_type": "execute_result" 651 | } 652 | ], 653 | "source": [ 654 | "rs" 655 | ] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "metadata": {}, 660 | "source": [ 661 | "_Note: numbers and the content of the array may vary_" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "**Q**: What if I ask you to generate random numbers in a way that we both obtain the __very same__ numbers? (_Provided we share the same CPU architecture_)" 669 | ] 670 | }, 671 | { 672 | "cell_type": "markdown", 673 | "metadata": { 674 | "slideshow": { 675 | "slide_type": "subslide" 676 | } 677 | }, 678 | "source": [ 679 | "## Zeros and Ones (or Empty)\n", 680 | "\n", 681 | "### `np.zeros`, `np.ones`, `np.empty`\n", 682 | "\n", 683 | "Sometimes it may be required to initialise arrays of `zeros`, or of all `ones` or finally just `rubbish` (i.e. `empty`) of a specific shape:" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 17, 689 | "metadata": { 690 | "slideshow": { 691 | "slide_type": "fragment" 692 | } 693 | }, 694 | "outputs": [ 695 | { 696 | "name": "stdout", 697 | "output_type": "stream", 698 | "text": [ 699 | "[[0. 0. 0.]\n", 700 | " [0. 0. 0.]\n", 701 | " [0. 0. 0.]]\n" 702 | ] 703 | } 704 | ], 705 | "source": [ 706 | "Z = np.zeros((3,3))\n", 707 | "\n", 708 | "print(Z)" 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": 18, 714 | "metadata": { 715 | "slideshow": { 716 | "slide_type": "subslide" 717 | } 718 | }, 719 | "outputs": [ 720 | { 721 | "name": "stdout", 722 | "output_type": "stream", 723 | "text": [ 724 | "[[1. 1. 1.]\n", 725 | " [1. 1. 1.]\n", 726 | " [1. 1. 1.]]\n" 727 | ] 728 | } 729 | ], 730 | "source": [ 731 | "O = np.ones((3, 3))\n", 732 | "print(O)" 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": 19, 738 | "metadata": {}, 739 | "outputs": [ 740 | { 741 | "name": "stdout", 742 | "output_type": "stream", 743 | "text": [ 744 | "[0.45052791 0.80566857 0.10401981 0.91948746 0.0329787 0.71872119\n", 745 | " 1.42738938 0.63292836 0.5397375 0.89186053]\n" 746 | ] 747 | } 748 | ], 749 | "source": [ 750 | "E = np.empty(10)\n", 751 | "\n", 752 | "print(E)" 753 | ] 754 | }, 755 | { 756 | "cell_type": "code", 757 | "execution_count": null, 758 | "metadata": {}, 759 | "outputs": [], 760 | "source": [ 761 | "# TRY THIS!\n", 762 | "\n", 763 | "np.empty(9)" 764 | ] 765 | }, 766 | { 767 | "cell_type": "markdown", 768 | "metadata": { 769 | "slideshow": { 770 | "slide_type": "subslide" 771 | } 772 | }, 773 | "source": [ 774 | "# Other specialised Functions\n", 775 | "\n", 776 | "## Diagonal Matrices\n", 777 | "\n", 778 | "### 1. `np.diag`" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 20, 784 | "metadata": { 785 | "slideshow": { 786 | "slide_type": "fragment" 787 | } 788 | }, 789 | "outputs": [ 790 | { 791 | "data": { 792 | "text/plain": [ 793 | "array([[1, 0, 0],\n", 794 | " [0, 2, 0],\n", 795 | " [0, 0, 3]])" 796 | ] 797 | }, 798 | "execution_count": 20, 799 | "metadata": {}, 800 | "output_type": "execute_result" 801 | } 802 | ], 803 | "source": [ 804 | "# a diagonal matrix\n", 805 | "np.diag([1,2,3])" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": 21, 811 | "metadata": { 812 | "slideshow": { 813 | "slide_type": "fragment" 814 | } 815 | }, 816 | "outputs": [ 817 | { 818 | "data": { 819 | "text/plain": [ 820 | "array([[0, 0, 3, 0],\n", 821 | " [0, 2, 0, 0],\n", 822 | " [1, 0, 0, 0],\n", 823 | " [0, 0, 0, 0]])" 824 | ] 825 | }, 826 | "execution_count": 21, 827 | "metadata": {}, 828 | "output_type": "execute_result" 829 | } 830 | ], 831 | "source": [ 832 | "# diagonal with offset from the main diagonal\n", 833 | "np.diag([1,2,3], k=1)" 834 | ] 835 | }, 836 | { 837 | "cell_type": "markdown", 838 | "metadata": { 839 | "slideshow": { 840 | "slide_type": "subslide" 841 | } 842 | }, 843 | "source": [ 844 | "### Identity Matrix $\\mathrm{I} \\mapsto$ `np.eye`" 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": 22, 850 | "metadata": { 851 | "slideshow": { 852 | "slide_type": "fragment" 853 | } 854 | }, 855 | "outputs": [ 856 | { 857 | "data": { 858 | "text/plain": [ 859 | "array([[1, 0, 0],\n", 860 | " [0, 1, 0],\n", 861 | " [0, 0, 1]])" 862 | ] 863 | }, 864 | "execution_count": 22, 865 | "metadata": {}, 866 | "output_type": "execute_result" 867 | } 868 | ], 869 | "source": [ 870 | "# a diagonal matrix with ones on the main diagonal\n", 871 | "np.eye(3, dtype='int') # 3 is the " 872 | ] 873 | }, 874 | { 875 | "cell_type": "markdown", 876 | "metadata": {}, 877 | "source": [ 878 | "---" 879 | ] 880 | }, 881 | { 882 | "cell_type": "markdown", 883 | "metadata": { 884 | "slideshow": { 885 | "slide_type": "subslide" 886 | } 887 | }, 888 | "source": [ 889 | "# Create `numpy.ndarray` from `list`" 890 | ] 891 | }, 892 | { 893 | "cell_type": "markdown", 894 | "metadata": { 895 | "slideshow": { 896 | "slide_type": "fragment" 897 | } 898 | }, 899 | "source": [ 900 | "To create new vector or matrix arrays from Python lists we can use the \n", 901 | "`numpy.array` constructor function:" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": 23, 907 | "metadata": { 908 | "slideshow": { 909 | "slide_type": "fragment" 910 | } 911 | }, 912 | "outputs": [ 913 | { 914 | "data": { 915 | "text/plain": [ 916 | "array([1, 2, 3, 4])" 917 | ] 918 | }, 919 | "execution_count": 23, 920 | "metadata": {}, 921 | "output_type": "execute_result" 922 | } 923 | ], 924 | "source": [ 925 | "v = np.array([1,2,3,4])\n", 926 | "v" 927 | ] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": 24, 932 | "metadata": {}, 933 | "outputs": [ 934 | { 935 | "name": "stdout", 936 | "output_type": "stream", 937 | "text": [ 938 | "\n" 939 | ] 940 | } 941 | ], 942 | "source": [ 943 | "print(type(v))" 944 | ] 945 | }, 946 | { 947 | "cell_type": "markdown", 948 | "metadata": {}, 949 | "source": [ 950 | "**Alternatively** there is also the `np.asarray` function which easily convert a Python list into a numpy array:\n", 951 | "\n" 952 | ] 953 | }, 954 | { 955 | "cell_type": "code", 956 | "execution_count": 25, 957 | "metadata": {}, 958 | "outputs": [ 959 | { 960 | "data": { 961 | "text/plain": [ 962 | "array([1, 2, 3, 4])" 963 | ] 964 | }, 965 | "execution_count": 25, 966 | "metadata": {}, 967 | "output_type": "execute_result" 968 | } 969 | ], 970 | "source": [ 971 | "v = np.asarray([1, 2, 3, 4])\n", 972 | "v" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 26, 978 | "metadata": {}, 979 | "outputs": [ 980 | { 981 | "name": "stdout", 982 | "output_type": "stream", 983 | "text": [ 984 | "\n" 985 | ] 986 | } 987 | ], 988 | "source": [ 989 | "print(type(v))" 990 | ] 991 | }, 992 | { 993 | "cell_type": "markdown", 994 | "metadata": {}, 995 | "source": [ 996 | "We can use the very same strategy for higher-dimensional arrays.\n", 997 | "\n", 998 | "E.g. Let's create a matrix from a list of lists:" 999 | ] 1000 | }, 1001 | { 1002 | "cell_type": "code", 1003 | "execution_count": 27, 1004 | "metadata": { 1005 | "slideshow": { 1006 | "slide_type": "fragment" 1007 | } 1008 | }, 1009 | "outputs": [ 1010 | { 1011 | "data": { 1012 | "text/plain": [ 1013 | "array([[1, 2],\n", 1014 | " [3, 4]])" 1015 | ] 1016 | }, 1017 | "execution_count": 27, 1018 | "metadata": {}, 1019 | "output_type": "execute_result" 1020 | } 1021 | ], 1022 | "source": [ 1023 | "M = np.array([[1, 2], [3, 4]])\n", 1024 | "M" 1025 | ] 1026 | }, 1027 | { 1028 | "cell_type": "code", 1029 | "execution_count": 28, 1030 | "metadata": { 1031 | "slideshow": { 1032 | "slide_type": "fragment" 1033 | } 1034 | }, 1035 | "outputs": [ 1036 | { 1037 | "data": { 1038 | "text/plain": [ 1039 | "((4,), (2, 2))" 1040 | ] 1041 | }, 1042 | "execution_count": 28, 1043 | "metadata": {}, 1044 | "output_type": "execute_result" 1045 | } 1046 | ], 1047 | "source": [ 1048 | "v.shape, M.shape" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "markdown", 1053 | "metadata": { 1054 | "slideshow": { 1055 | "slide_type": "slide" 1056 | } 1057 | }, 1058 | "source": [ 1059 | "## So, why is it useful then?" 1060 | ] 1061 | }, 1062 | { 1063 | "cell_type": "markdown", 1064 | "metadata": { 1065 | "slideshow": { 1066 | "slide_type": "subslide" 1067 | } 1068 | }, 1069 | "source": [ 1070 | "So far the `numpy.ndarray` looks awefully much like a Python **list** (or **nested list**). \n", 1071 | "\n", 1072 | "*Why not simply use Python lists for computations instead of creating a new array type?*" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "markdown", 1077 | "metadata": { 1078 | "slideshow": { 1079 | "slide_type": "subslide" 1080 | } 1081 | }, 1082 | "source": [ 1083 | "There are several reasons:\n", 1084 | "\n", 1085 | "* Python lists are very general. \n", 1086 | " - They can contain any kind of object. \n", 1087 | " - They are dynamically typed. \n", 1088 | " - They do not support mathematical functions such as matrix and dot multiplications, etc. \n", 1089 | " - Implementing such functions for Python lists would not be very efficient because of the dynamic typing.\n", 1090 | " \n", 1091 | " \n", 1092 | "* Numpy arrays are **statically typed** and **homogeneous**. \n", 1093 | " - The type of the elements is determined when array is created.\n", 1094 | " \n", 1095 | " \n", 1096 | "* Numpy arrays are memory efficient.\n", 1097 | " - Because of the static typing, fast implementation of mathematical functions such as multiplication and addition of `numpy` arrays can be implemented in a compiled language (C and Fortran is used)." 1098 | ] 1099 | }, 1100 | { 1101 | "cell_type": "code", 1102 | "execution_count": 29, 1103 | "metadata": { 1104 | "slideshow": { 1105 | "slide_type": "subslide" 1106 | } 1107 | }, 1108 | "outputs": [], 1109 | "source": [ 1110 | "L = range(100000)" 1111 | ] 1112 | }, 1113 | { 1114 | "cell_type": "code", 1115 | "execution_count": 30, 1116 | "metadata": { 1117 | "slideshow": { 1118 | "slide_type": "fragment" 1119 | } 1120 | }, 1121 | "outputs": [ 1122 | { 1123 | "name": "stdout", 1124 | "output_type": "stream", 1125 | "text": [ 1126 | "41.7 ms ± 14.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" 1127 | ] 1128 | } 1129 | ], 1130 | "source": [ 1131 | "%timeit [i**2 for i in L]" 1132 | ] 1133 | }, 1134 | { 1135 | "cell_type": "code", 1136 | "execution_count": 31, 1137 | "metadata": { 1138 | "slideshow": { 1139 | "slide_type": "fragment" 1140 | } 1141 | }, 1142 | "outputs": [], 1143 | "source": [ 1144 | "a = np.arange(100000)" 1145 | ] 1146 | }, 1147 | { 1148 | "cell_type": "code", 1149 | "execution_count": 32, 1150 | "metadata": { 1151 | "slideshow": { 1152 | "slide_type": "fragment" 1153 | } 1154 | }, 1155 | "outputs": [ 1156 | { 1157 | "name": "stdout", 1158 | "output_type": "stream", 1159 | "text": [ 1160 | "92.9 µs ± 10.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 1161 | ] 1162 | } 1163 | ], 1164 | "source": [ 1165 | "%timeit a**2 # This operation is called Broadcasting - more on this later!" 1166 | ] 1167 | }, 1168 | { 1169 | "cell_type": "code", 1170 | "execution_count": 33, 1171 | "metadata": {}, 1172 | "outputs": [ 1173 | { 1174 | "name": "stdout", 1175 | "output_type": "stream", 1176 | "text": [ 1177 | "48.4 ms ± 18.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" 1178 | ] 1179 | } 1180 | ], 1181 | "source": [ 1182 | "%timeit [element**2 for element in a]" 1183 | ] 1184 | }, 1185 | { 1186 | "cell_type": "markdown", 1187 | "metadata": {}, 1188 | "source": [ 1189 | "---" 1190 | ] 1191 | }, 1192 | { 1193 | "cell_type": "markdown", 1194 | "metadata": { 1195 | "slideshow": { 1196 | "slide_type": "slide" 1197 | } 1198 | }, 1199 | "source": [ 1200 | "## Exercises: DIY" 1201 | ] 1202 | }, 1203 | { 1204 | "cell_type": "markdown", 1205 | "metadata": { 1206 | "slideshow": { 1207 | "slide_type": "-" 1208 | } 1209 | }, 1210 | "source": [ 1211 | "### Simple arrays" 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "markdown", 1216 | "metadata": { 1217 | "slideshow": { 1218 | "slide_type": "-" 1219 | } 1220 | }, 1221 | "source": [ 1222 | "* Create simple one and two dimensional arrays. First, redo the examples\n", 1223 | "from above. And then create your own.\n", 1224 | "\n", 1225 | "* Use the functions `len`, `shape` and `ndim` on some of those arrays and\n", 1226 | "observe their output." 1227 | ] 1228 | }, 1229 | { 1230 | "cell_type": "code", 1231 | "execution_count": null, 1232 | "metadata": { 1233 | "slideshow": { 1234 | "slide_type": "skip" 1235 | } 1236 | }, 1237 | "outputs": [], 1238 | "source": [] 1239 | }, 1240 | { 1241 | "cell_type": "markdown", 1242 | "metadata": { 1243 | "slideshow": { 1244 | "slide_type": "subslide" 1245 | } 1246 | }, 1247 | "source": [ 1248 | "### Creating arrays using functions" 1249 | ] 1250 | }, 1251 | { 1252 | "cell_type": "markdown", 1253 | "metadata": { 1254 | "slideshow": { 1255 | "slide_type": "fragment" 1256 | } 1257 | }, 1258 | "source": [ 1259 | "* Experiment with `arange`, `linspace`, `ones`, `zeros`, `eye` and `diag`.\n", 1260 | "\n", 1261 | "* Create different kinds of arrays with random numbers.\n", 1262 | "\n", 1263 | "* Try setting the seed before creating an array with random values \n", 1264 | " - *hint*: use `np.random.seed`\n" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "execution_count": null, 1270 | "metadata": { 1271 | "collapsed": true, 1272 | "jupyter": { 1273 | "outputs_hidden": true 1274 | }, 1275 | "slideshow": { 1276 | "slide_type": "skip" 1277 | } 1278 | }, 1279 | "outputs": [], 1280 | "source": [] 1281 | }, 1282 | { 1283 | "cell_type": "markdown", 1284 | "metadata": {}, 1285 | "source": [ 1286 | "---" 1287 | ] 1288 | }, 1289 | { 1290 | "cell_type": "markdown", 1291 | "metadata": { 1292 | "slideshow": { 1293 | "slide_type": "slide" 1294 | } 1295 | }, 1296 | "source": [ 1297 | "## Numpy Array Object" 1298 | ] 1299 | }, 1300 | { 1301 | "cell_type": "markdown", 1302 | "metadata": { 1303 | "slideshow": { 1304 | "slide_type": "subslide" 1305 | } 1306 | }, 1307 | "source": [ 1308 | "`NumPy` has a multidimensional array object called ndarray. It consists of two parts as follows:\n", 1309 | " \n", 1310 | " * The actual data\n", 1311 | " * Some metadata describing the data\n", 1312 | " \n", 1313 | " \n", 1314 | "The majority of array operations leave the raw data untouched. The only aspect that changes is the metadata." 1315 | ] 1316 | }, 1317 | { 1318 | "cell_type": "markdown", 1319 | "metadata": { 1320 | "slideshow": { 1321 | "slide_type": "subslide" 1322 | } 1323 | }, 1324 | "source": [ 1325 | "" 1326 | ] 1327 | }, 1328 | { 1329 | "cell_type": "markdown", 1330 | "metadata": {}, 1331 | "source": [ 1332 | "## Data vs Metadata (Attributes)" 1333 | ] 1334 | }, 1335 | { 1336 | "cell_type": "markdown", 1337 | "metadata": { 1338 | "slideshow": { 1339 | "slide_type": "subslide" 1340 | } 1341 | }, 1342 | "source": [ 1343 | "This internal separation between actual data (i.e. the content of the array --> the `memory`) and metadata (i.e. properties and attributes of the data), allows for example for an efficient memory management.\n", 1344 | "\n", 1345 | "For example, the shape of an Numpy array **can be modified without copying and/or affecting** the actual data, which makes it a fast operation even for large arrays." 1346 | ] 1347 | }, 1348 | { 1349 | "cell_type": "code", 1350 | "execution_count": 34, 1351 | "metadata": { 1352 | "collapsed": false, 1353 | "jupyter": { 1354 | "outputs_hidden": false 1355 | }, 1356 | "slideshow": { 1357 | "slide_type": "fragment" 1358 | } 1359 | }, 1360 | "outputs": [ 1361 | { 1362 | "data": { 1363 | "text/plain": [ 1364 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", 1365 | " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", 1366 | " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44])" 1367 | ] 1368 | }, 1369 | "execution_count": 34, 1370 | "metadata": {}, 1371 | "output_type": "execute_result" 1372 | } 1373 | ], 1374 | "source": [ 1375 | "a = np.arange(45)\n", 1376 | "\n", 1377 | "a" 1378 | ] 1379 | }, 1380 | { 1381 | "cell_type": "code", 1382 | "execution_count": 35, 1383 | "metadata": {}, 1384 | "outputs": [ 1385 | { 1386 | "data": { 1387 | "text/plain": [ 1388 | "(45,)" 1389 | ] 1390 | }, 1391 | "execution_count": 35, 1392 | "metadata": {}, 1393 | "output_type": "execute_result" 1394 | } 1395 | ], 1396 | "source": [ 1397 | "a.shape" 1398 | ] 1399 | }, 1400 | { 1401 | "cell_type": "code", 1402 | "execution_count": 36, 1403 | "metadata": {}, 1404 | "outputs": [ 1405 | { 1406 | "data": { 1407 | "text/plain": [ 1408 | "array([[ 0, 1, 2, 3, 4],\n", 1409 | " [ 5, 6, 7, 8, 9],\n", 1410 | " [10, 11, 12, 13, 14],\n", 1411 | " [15, 16, 17, 18, 19],\n", 1412 | " [20, 21, 22, 23, 24],\n", 1413 | " [25, 26, 27, 28, 29],\n", 1414 | " [30, 31, 32, 33, 34],\n", 1415 | " [35, 36, 37, 38, 39],\n", 1416 | " [40, 41, 42, 43, 44]])" 1417 | ] 1418 | }, 1419 | "execution_count": 36, 1420 | "metadata": {}, 1421 | "output_type": "execute_result" 1422 | } 1423 | ], 1424 | "source": [ 1425 | "A = a.reshape(9, 5)\n", 1426 | "\n", 1427 | "A" 1428 | ] 1429 | }, 1430 | { 1431 | "cell_type": "code", 1432 | "execution_count": 37, 1433 | "metadata": { 1434 | "collapsed": false, 1435 | "jupyter": { 1436 | "outputs_hidden": false 1437 | }, 1438 | "slideshow": { 1439 | "slide_type": "subslide" 1440 | } 1441 | }, 1442 | "outputs": [], 1443 | "source": [ 1444 | "n, m = A.shape" 1445 | ] 1446 | }, 1447 | { 1448 | "cell_type": "code", 1449 | "execution_count": 38, 1450 | "metadata": { 1451 | "collapsed": false, 1452 | "jupyter": { 1453 | "outputs_hidden": false 1454 | }, 1455 | "slideshow": { 1456 | "slide_type": "fragment" 1457 | } 1458 | }, 1459 | "outputs": [ 1460 | { 1461 | "data": { 1462 | "text/plain": [ 1463 | "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n", 1464 | " 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,\n", 1465 | " 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]])" 1466 | ] 1467 | }, 1468 | "execution_count": 38, 1469 | "metadata": {}, 1470 | "output_type": "execute_result" 1471 | } 1472 | ], 1473 | "source": [ 1474 | "B = A.reshape((1,n*m))\n", 1475 | "B" 1476 | ] 1477 | }, 1478 | { 1479 | "cell_type": "markdown", 1480 | "metadata": {}, 1481 | "source": [ 1482 | "**Q**: What is the difference (in terms of shape) between `B` and the original `a`?" 1483 | ] 1484 | }, 1485 | { 1486 | "cell_type": "markdown", 1487 | "metadata": { 1488 | "slideshow": { 1489 | "slide_type": "slide" 1490 | } 1491 | }, 1492 | "source": [ 1493 | "### Flattening\n", 1494 | "\n", 1495 | "Another (quite common) reshaping operation you will end up performing on n-dimensional arrays is **flattening**.\n", 1496 | "\n", 1497 | "Flattening means _collapsing all the axis into a unique one_" 1498 | ] 1499 | }, 1500 | { 1501 | "cell_type": "markdown", 1502 | "metadata": { 1503 | "slideshow": { 1504 | "slide_type": "subslide" 1505 | } 1506 | }, 1507 | "source": [ 1508 | "### `np.ravel`\n", 1509 | "\n", 1510 | "`numpy.ndarray` objects have a `ravel` method that generates a new version of the array as a `1D` vector. \n", 1511 | "\n", 1512 | "Also this time, the original memory is unaffected, and a pointer with different metadata is returned." 1513 | ] 1514 | }, 1515 | { 1516 | "cell_type": "code", 1517 | "execution_count": 39, 1518 | "metadata": { 1519 | "collapsed": false, 1520 | "jupyter": { 1521 | "outputs_hidden": false 1522 | }, 1523 | "slideshow": { 1524 | "slide_type": "fragment" 1525 | } 1526 | }, 1527 | "outputs": [ 1528 | { 1529 | "data": { 1530 | "text/plain": [ 1531 | "array([1, 2, 3, 4, 5, 6])" 1532 | ] 1533 | }, 1534 | "execution_count": 39, 1535 | "metadata": {}, 1536 | "output_type": "execute_result" 1537 | } 1538 | ], 1539 | "source": [ 1540 | "A = np.array([[1, 2, 3], [4, 5, 6]])\n", 1541 | "A.ravel()" 1542 | ] 1543 | }, 1544 | { 1545 | "cell_type": "markdown", 1546 | "metadata": {}, 1547 | "source": [ 1548 | "By default, the `np.ravel` performs the operation _row-wise_ á-la-C. Numpy also support a Fortran-style order of indices (i.e. _column-major_ indexing)" 1549 | ] 1550 | }, 1551 | { 1552 | "cell_type": "code", 1553 | "execution_count": 40, 1554 | "metadata": {}, 1555 | "outputs": [ 1556 | { 1557 | "data": { 1558 | "text/plain": [ 1559 | "array([1, 4, 2, 5, 3, 6])" 1560 | ] 1561 | }, 1562 | "execution_count": 40, 1563 | "metadata": {}, 1564 | "output_type": "execute_result" 1565 | } 1566 | ], 1567 | "source": [ 1568 | "A.ravel('F') # order F (Fortran) is column-major, C (default) row-major" 1569 | ] 1570 | }, 1571 | { 1572 | "cell_type": "markdown", 1573 | "metadata": { 1574 | "slideshow": { 1575 | "slide_type": "subslide" 1576 | } 1577 | }, 1578 | "source": [ 1579 | "**Alternatively** We can also use the function `np.flatten` to make a higher-dimensional array into a vector. But this function create a copy of the data." 1580 | ] 1581 | }, 1582 | { 1583 | "cell_type": "markdown", 1584 | "metadata": {}, 1585 | "source": [ 1586 | "### Transpose\n", 1587 | "\n", 1588 | "Similarly, we can transpose a matrix" 1589 | ] 1590 | }, 1591 | { 1592 | "cell_type": "code", 1593 | "execution_count": 41, 1594 | "metadata": { 1595 | "collapsed": false, 1596 | "jupyter": { 1597 | "outputs_hidden": false 1598 | }, 1599 | "slideshow": { 1600 | "slide_type": "subslide" 1601 | } 1602 | }, 1603 | "outputs": [ 1604 | { 1605 | "data": { 1606 | "text/plain": [ 1607 | "array([[1, 4],\n", 1608 | " [2, 5],\n", 1609 | " [3, 6]])" 1610 | ] 1611 | }, 1612 | "execution_count": 41, 1613 | "metadata": {}, 1614 | "output_type": "execute_result" 1615 | } 1616 | ], 1617 | "source": [ 1618 | "A.T" 1619 | ] 1620 | }, 1621 | { 1622 | "cell_type": "code", 1623 | "execution_count": 42, 1624 | "metadata": { 1625 | "collapsed": false, 1626 | "jupyter": { 1627 | "outputs_hidden": false 1628 | }, 1629 | "slideshow": { 1630 | "slide_type": "subslide" 1631 | } 1632 | }, 1633 | "outputs": [ 1634 | { 1635 | "data": { 1636 | "text/plain": [ 1637 | "array([1, 4, 2, 5, 3, 6])" 1638 | ] 1639 | }, 1640 | "execution_count": 42, 1641 | "metadata": {}, 1642 | "output_type": "execute_result" 1643 | } 1644 | ], 1645 | "source": [ 1646 | "A.T.ravel()" 1647 | ] 1648 | }, 1649 | { 1650 | "cell_type": "markdown", 1651 | "metadata": {}, 1652 | "source": [ 1653 | "## Introducing `np.newaxis`\n", 1654 | "\n", 1655 | "In addition to shape, we can also manipulate the axis of an array." 1656 | ] 1657 | }, 1658 | { 1659 | "cell_type": "markdown", 1660 | "metadata": {}, 1661 | "source": [ 1662 | "**(1)** We can always add as many axis as we want:" 1663 | ] 1664 | }, 1665 | { 1666 | "cell_type": "code", 1667 | "execution_count": 43, 1668 | "metadata": {}, 1669 | "outputs": [ 1670 | { 1671 | "name": "stdout", 1672 | "output_type": "stream", 1673 | "text": [ 1674 | "(1, 10, 2)\n" 1675 | ] 1676 | } 1677 | ], 1678 | "source": [ 1679 | "A = np.arange(20).reshape(10, 2)\n", 1680 | "A = A[np.newaxis, ...] # this is called ellipsis\n", 1681 | "\n", 1682 | "print(A.shape)" 1683 | ] 1684 | }, 1685 | { 1686 | "cell_type": "markdown", 1687 | "metadata": {}, 1688 | "source": [ 1689 | "**(2)** We can also _permute_ axis:" 1690 | ] 1691 | }, 1692 | { 1693 | "cell_type": "code", 1694 | "execution_count": 44, 1695 | "metadata": {}, 1696 | "outputs": [ 1697 | { 1698 | "name": "stdout", 1699 | "output_type": "stream", 1700 | "text": [ 1701 | "(2, 10, 1)\n" 1702 | ] 1703 | } 1704 | ], 1705 | "source": [ 1706 | "A = A.swapaxes(0, 2) # swap axis 0 with axis 2 --> new shape: (2, 10, 1)\n", 1707 | "\n", 1708 | "print(A.shape)" 1709 | ] 1710 | }, 1711 | { 1712 | "cell_type": "markdown", 1713 | "metadata": {}, 1714 | "source": [ 1715 | "Again, changin and manipulating the `axis` will not touch the memory, it will just change parameters (i.e. `strides` and `offset`) to navigate data." 1716 | ] 1717 | }, 1718 | { 1719 | "cell_type": "markdown", 1720 | "metadata": {}, 1721 | "source": [ 1722 | "---" 1723 | ] 1724 | }, 1725 | { 1726 | "cell_type": "markdown", 1727 | "metadata": { 1728 | "slideshow": { 1729 | "slide_type": "slide" 1730 | } 1731 | }, 1732 | "source": [ 1733 | "## Numerical Types and Precision\n", 1734 | "\n", 1735 | "In NumPy, talking about `int` or `float` does not make \"real sense\". This is mainly for two reasons:\n", 1736 | "\n", 1737 | "(a) `int` or `float` are assumed at the maximum precision available on your machine (presumably `int64` and \n", 1738 | "`float64`, respectively.\n", 1739 | "\n", 1740 | "(b) Different precision imply different numerical ranges, and so different memory size (i.e. _number of bytes_ required to represent all the numbers in the corresponding numerical range).\n", 1741 | "\n", 1742 | "Numpy support the following numerical types:" 1743 | ] 1744 | }, 1745 | { 1746 | "cell_type": "markdown", 1747 | "metadata": { 1748 | "slideshow": { 1749 | "slide_type": "subslide" 1750 | } 1751 | }, 1752 | "source": [ 1753 | " bool | This stores boolean (True or False) as a bit\n", 1754 | "\n", 1755 | " int0 | This is a platform integer (normally either int32 or int64)\n", 1756 | " int8 | This is an integer ranging from -128 to 127\n", 1757 | " int16 | This is an integer ranging from -32768 to 32767\n", 1758 | " int32 | This is an integer ranging from -2 ** 31 to 2 ** 31 -1\n", 1759 | " int64 | This is an integer ranging from -2 ** 63 to 2 ** 63 -1\n", 1760 | " \n", 1761 | " uint8 | This is an unsigned integer ranging from 0 to 255\n", 1762 | " uint16 | This is an unsigned integer ranging from 0 to 65535\n", 1763 | " uint32 | This is an unsigned integer ranging from 0 to 2 ** 32 - 1\n", 1764 | " uint64 | This is an unsigned integer ranging from 0 to 2 ** 64 - 1\n", 1765 | "\n", 1766 | " float16 | This is a half precision float with sign bit, 5 bits exponent, and 10 bits mantissa\n", 1767 | " float32 | This is a single precision float with sign bit, 8 bits exponent, and 23 bits mantissa\n", 1768 | " float64 or float | This is a double precision float with sign bit, 11 bits exponent, and 52 bits mantissa\n", 1769 | " complex64 | This is a complex number represented by two 32-bit floats (real and imaginary components)\n", 1770 | " complex128 | This is a complex number represented by two 64-bit floats (real and imaginary components)\n", 1771 | " (or complex)\n" 1772 | ] 1773 | }, 1774 | { 1775 | "cell_type": "markdown", 1776 | "metadata": { 1777 | "slideshow": { 1778 | "slide_type": "slide" 1779 | } 1780 | }, 1781 | "source": [ 1782 | "### Numerical Types and Representation" 1783 | ] 1784 | }, 1785 | { 1786 | "cell_type": "markdown", 1787 | "metadata": { 1788 | "slideshow": { 1789 | "slide_type": "subslide" 1790 | } 1791 | }, 1792 | "source": [ 1793 | "The **numerical dtype** of an array should be selected very carefully, as it directly affects the numerical representation of elements, that is: \n", 1794 | "\n", 1795 | " * the number of **bytes** used; \n", 1796 | " * the *numerical range*" 1797 | ] 1798 | }, 1799 | { 1800 | "cell_type": "markdown", 1801 | "metadata": {}, 1802 | "source": [ 1803 | "We can **always specify** the `dtype` of an array when we create one. If we do not, the `dtype` of the array will be inferred, namely `np.int_` or `np.float_` depending on the case." 1804 | ] 1805 | }, 1806 | { 1807 | "cell_type": "code", 1808 | "execution_count": 45, 1809 | "metadata": {}, 1810 | "outputs": [ 1811 | { 1812 | "name": "stdout", 1813 | "output_type": "stream", 1814 | "text": [ 1815 | "[0 1 2 3 4 5 6 7 8 9]\n", 1816 | "int64\n" 1817 | ] 1818 | } 1819 | ], 1820 | "source": [ 1821 | "a = np.arange(10)\n", 1822 | "print(a)\n", 1823 | "\n", 1824 | "print(a.dtype)" 1825 | ] 1826 | }, 1827 | { 1828 | "cell_type": "code", 1829 | "execution_count": 46, 1830 | "metadata": {}, 1831 | "outputs": [ 1832 | { 1833 | "name": "stdout", 1834 | "output_type": "stream", 1835 | "text": [ 1836 | "[0 1 2 3 4 5 6 7 8 9]\n", 1837 | "uint8\n" 1838 | ] 1839 | } 1840 | ], 1841 | "source": [ 1842 | "au = np.arange(10, dtype=np.uint8)\n", 1843 | "print(au)\n", 1844 | "\n", 1845 | "print(au.dtype)" 1846 | ] 1847 | }, 1848 | { 1849 | "cell_type": "markdown", 1850 | "metadata": { 1851 | "slideshow": { 1852 | "slide_type": "subslide" 1853 | } 1854 | }, 1855 | "source": [ 1856 | "So, then: **What happens if I try to represent a number that is Out of range?**\n", 1857 | "\n", 1858 | "Let's have a go with **integers**, i.e., `int8` and `uint8`" 1859 | ] 1860 | }, 1861 | { 1862 | "cell_type": "code", 1863 | "execution_count": 47, 1864 | "metadata": { 1865 | "slideshow": { 1866 | "slide_type": "fragment" 1867 | } 1868 | }, 1869 | "outputs": [ 1870 | { 1871 | "data": { 1872 | "text/plain": [ 1873 | "array([0, 0, 0, 0], dtype=int8)" 1874 | ] 1875 | }, 1876 | "execution_count": 47, 1877 | "metadata": {}, 1878 | "output_type": "execute_result" 1879 | } 1880 | ], 1881 | "source": [ 1882 | "x = np.zeros(4, 'int8') # Integer ranging from -128 to 127\n", 1883 | "x" 1884 | ] 1885 | }, 1886 | { 1887 | "cell_type": "markdown", 1888 | "metadata": {}, 1889 | "source": [ 1890 | ">__Spoiler Alert__: _very simple example of indexing in NumPy_\n", 1891 | ">\n", 1892 | "> _Well...it works as expected, doesn't it?_" 1893 | ] 1894 | }, 1895 | { 1896 | "cell_type": "code", 1897 | "execution_count": 48, 1898 | "metadata": { 1899 | "slideshow": { 1900 | "slide_type": "subslide" 1901 | } 1902 | }, 1903 | "outputs": [ 1904 | { 1905 | "data": { 1906 | "text/plain": [ 1907 | "array([127, 0, 0, 0], dtype=int8)" 1908 | ] 1909 | }, 1910 | "execution_count": 48, 1911 | "metadata": {}, 1912 | "output_type": "execute_result" 1913 | } 1914 | ], 1915 | "source": [ 1916 | "x[0] = 127\n", 1917 | "x" 1918 | ] 1919 | }, 1920 | { 1921 | "cell_type": "code", 1922 | "execution_count": 49, 1923 | "metadata": { 1924 | "slideshow": { 1925 | "slide_type": "fragment" 1926 | } 1927 | }, 1928 | "outputs": [ 1929 | { 1930 | "data": { 1931 | "text/plain": [ 1932 | "array([-128, 0, 0, 0], dtype=int8)" 1933 | ] 1934 | }, 1935 | "execution_count": 49, 1936 | "metadata": {}, 1937 | "output_type": "execute_result" 1938 | } 1939 | ], 1940 | "source": [ 1941 | "x[0] = 128\n", 1942 | "x" 1943 | ] 1944 | }, 1945 | { 1946 | "cell_type": "code", 1947 | "execution_count": 50, 1948 | "metadata": { 1949 | "slideshow": { 1950 | "slide_type": "fragment" 1951 | } 1952 | }, 1953 | "outputs": [ 1954 | { 1955 | "data": { 1956 | "text/plain": [ 1957 | "array([-128, -127, 0, 0], dtype=int8)" 1958 | ] 1959 | }, 1960 | "execution_count": 50, 1961 | "metadata": {}, 1962 | "output_type": "execute_result" 1963 | } 1964 | ], 1965 | "source": [ 1966 | "x[1] = 129\n", 1967 | "x" 1968 | ] 1969 | }, 1970 | { 1971 | "cell_type": "code", 1972 | "execution_count": 51, 1973 | "metadata": { 1974 | "slideshow": { 1975 | "slide_type": "fragment" 1976 | } 1977 | }, 1978 | "outputs": [ 1979 | { 1980 | "data": { 1981 | "text/plain": [ 1982 | "array([-128, -127, 1, 0], dtype=int8)" 1983 | ] 1984 | }, 1985 | "execution_count": 51, 1986 | "metadata": {}, 1987 | "output_type": "execute_result" 1988 | } 1989 | ], 1990 | "source": [ 1991 | "x[2] = 257 # i.e. (128 x 2) + 1\n", 1992 | "x" 1993 | ] 1994 | }, 1995 | { 1996 | "cell_type": "code", 1997 | "execution_count": 52, 1998 | "metadata": { 1999 | "slideshow": { 2000 | "slide_type": "subslide" 2001 | } 2002 | }, 2003 | "outputs": [ 2004 | { 2005 | "data": { 2006 | "text/plain": [ 2007 | "array([0, 0, 0, 0], dtype=uint8)" 2008 | ] 2009 | }, 2010 | "execution_count": 52, 2011 | "metadata": {}, 2012 | "output_type": "execute_result" 2013 | } 2014 | ], 2015 | "source": [ 2016 | "ux = np.zeros(4, 'uint8') # Integer ranging from 0 to 255, dtype also as string!\n", 2017 | "ux" 2018 | ] 2019 | }, 2020 | { 2021 | "cell_type": "code", 2022 | "execution_count": 53, 2023 | "metadata": { 2024 | "slideshow": { 2025 | "slide_type": "subslide" 2026 | } 2027 | }, 2028 | "outputs": [ 2029 | { 2030 | "data": { 2031 | "text/plain": [ 2032 | "array([255, 0, 1, 1], dtype=uint8)" 2033 | ] 2034 | }, 2035 | "execution_count": 53, 2036 | "metadata": {}, 2037 | "output_type": "execute_result" 2038 | } 2039 | ], 2040 | "source": [ 2041 | "ux[0] = 255\n", 2042 | "ux[1] = 256\n", 2043 | "ux[2] = 257\n", 2044 | "ux[3] = 513 # (256 x 2) + 1\n", 2045 | "ux" 2046 | ] 2047 | }, 2048 | { 2049 | "cell_type": "markdown", 2050 | "metadata": {}, 2051 | "source": [ 2052 | "### Machine Info and Supported Numerical Representation" 2053 | ] 2054 | }, 2055 | { 2056 | "cell_type": "markdown", 2057 | "metadata": {}, 2058 | "source": [ 2059 | "Numpy provides two functions to inspect the information of supported integer and floating-point types, namely `np.iinfo` and `np.finfo`:" 2060 | ] 2061 | }, 2062 | { 2063 | "cell_type": "code", 2064 | "execution_count": 54, 2065 | "metadata": {}, 2066 | "outputs": [ 2067 | { 2068 | "data": { 2069 | "text/plain": [ 2070 | "iinfo(min=-2147483648, max=2147483647, dtype=int32)" 2071 | ] 2072 | }, 2073 | "execution_count": 54, 2074 | "metadata": {}, 2075 | "output_type": "execute_result" 2076 | } 2077 | ], 2078 | "source": [ 2079 | "np.iinfo(np.int32)" 2080 | ] 2081 | }, 2082 | { 2083 | "cell_type": "code", 2084 | "execution_count": 55, 2085 | "metadata": {}, 2086 | "outputs": [ 2087 | { 2088 | "data": { 2089 | "text/plain": [ 2090 | "finfo(resolution=0.001, min=-6.55040e+04, max=6.55040e+04, dtype=float16)" 2091 | ] 2092 | }, 2093 | "execution_count": 55, 2094 | "metadata": {}, 2095 | "output_type": "execute_result" 2096 | } 2097 | ], 2098 | "source": [ 2099 | "np.finfo(np.float16)" 2100 | ] 2101 | }, 2102 | { 2103 | "cell_type": "markdown", 2104 | "metadata": {}, 2105 | "source": [ 2106 | "In addition, the `MachAr` class will provide information on the current machine : " 2107 | ] 2108 | }, 2109 | { 2110 | "cell_type": "code", 2111 | "execution_count": 56, 2112 | "metadata": {}, 2113 | "outputs": [], 2114 | "source": [ 2115 | "machine_info = np.MachAr()" 2116 | ] 2117 | }, 2118 | { 2119 | "cell_type": "code", 2120 | "execution_count": 57, 2121 | "metadata": {}, 2122 | "outputs": [ 2123 | { 2124 | "data": { 2125 | "text/plain": [ 2126 | "2.220446049250313e-16" 2127 | ] 2128 | }, 2129 | "execution_count": 57, 2130 | "metadata": {}, 2131 | "output_type": "execute_result" 2132 | } 2133 | ], 2134 | "source": [ 2135 | "machine_info.epsilon" 2136 | ] 2137 | }, 2138 | { 2139 | "cell_type": "code", 2140 | "execution_count": 58, 2141 | "metadata": {}, 2142 | "outputs": [ 2143 | { 2144 | "data": { 2145 | "text/plain": [ 2146 | "1.7976931348623157e+308" 2147 | ] 2148 | }, 2149 | "execution_count": 58, 2150 | "metadata": {}, 2151 | "output_type": "execute_result" 2152 | } 2153 | ], 2154 | "source": [ 2155 | "machine_info.huge" 2156 | ] 2157 | }, 2158 | { 2159 | "cell_type": "code", 2160 | "execution_count": 59, 2161 | "metadata": {}, 2162 | "outputs": [ 2163 | { 2164 | "data": { 2165 | "text/plain": [ 2166 | "True" 2167 | ] 2168 | }, 2169 | "execution_count": 59, 2170 | "metadata": {}, 2171 | "output_type": "execute_result" 2172 | } 2173 | ], 2174 | "source": [ 2175 | "np.finfo(np.float64).max == machine_info.huge" 2176 | ] 2177 | }, 2178 | { 2179 | "cell_type": "code", 2180 | "execution_count": null, 2181 | "metadata": {}, 2182 | "outputs": [], 2183 | "source": [ 2184 | "# TRY THIS!\n", 2185 | "\n", 2186 | "help(machine_info)" 2187 | ] 2188 | }, 2189 | { 2190 | "cell_type": "markdown", 2191 | "metadata": { 2192 | "slideshow": { 2193 | "slide_type": "slide" 2194 | } 2195 | }, 2196 | "source": [ 2197 | "# Data Type Object" 2198 | ] 2199 | }, 2200 | { 2201 | "cell_type": "markdown", 2202 | "metadata": { 2203 | "slideshow": { 2204 | "slide_type": "subslide" 2205 | } 2206 | }, 2207 | "source": [ 2208 | "**Data type objects** are instances of the `numpy.dtype` class. \n", 2209 | "\n", 2210 | "Once again, arrays have a data type. \n", 2211 | "
\n", 2212 | "To be precise, *every element* in a NumPy array has the same data type. \n", 2213 | "\n", 2214 | "The data type object can tell you the `size` of the data in bytes.\n", 2215 | "
\n", 2216 | "(**Recall**: The size in bytes is given by the `itemsize` attribute of the dtype class)" 2217 | ] 2218 | }, 2219 | { 2220 | "cell_type": "code", 2221 | "execution_count": 60, 2222 | "metadata": { 2223 | "collapsed": false, 2224 | "jupyter": { 2225 | "outputs_hidden": false 2226 | }, 2227 | "slideshow": { 2228 | "slide_type": "subslide" 2229 | } 2230 | }, 2231 | "outputs": [ 2232 | { 2233 | "name": "stdout", 2234 | "output_type": "stream", 2235 | "text": [ 2236 | "a itemsize: 2\n", 2237 | "a.dtype.itemsize: 2\n" 2238 | ] 2239 | } 2240 | ], 2241 | "source": [ 2242 | "a = np.arange(7, dtype=np.uint16)\n", 2243 | "print('a itemsize: ', a.itemsize)\n", 2244 | "print('a.dtype.itemsize: ', a.dtype.itemsize)" 2245 | ] 2246 | }, 2247 | { 2248 | "cell_type": "markdown", 2249 | "metadata": { 2250 | "slideshow": { 2251 | "slide_type": "slide" 2252 | } 2253 | }, 2254 | "source": [ 2255 | "### Character Codes\n", 2256 | "\n", 2257 | "Character codes are included for backward compatibility with **Numeric**. \n", 2258 | "
\n", 2259 | "Numeric is the predecessor of NumPy. Their use is not recommended, but these codes pop up in several places. \n", 2260 | "\n", 2261 | "Btw, You should instead use the **dtype** objects. \n", 2262 | "\n", 2263 | " integer i\n", 2264 | " Unsigned integer u\n", 2265 | " Single precision float f\n", 2266 | " Double precision float d\n", 2267 | " bool b\n", 2268 | " complex D\n", 2269 | " string S\n", 2270 | " unicode U" 2271 | ] 2272 | }, 2273 | { 2274 | "cell_type": "markdown", 2275 | "metadata": { 2276 | "slideshow": { 2277 | "slide_type": "subslide" 2278 | } 2279 | }, 2280 | "source": [ 2281 | "### `dtype` contructors" 2282 | ] 2283 | }, 2284 | { 2285 | "cell_type": "code", 2286 | "execution_count": 61, 2287 | "metadata": { 2288 | "collapsed": false, 2289 | "jupyter": { 2290 | "outputs_hidden": false 2291 | }, 2292 | "slideshow": { 2293 | "slide_type": "fragment" 2294 | } 2295 | }, 2296 | "outputs": [ 2297 | { 2298 | "data": { 2299 | "text/plain": [ 2300 | "dtype('float64')" 2301 | ] 2302 | }, 2303 | "execution_count": 61, 2304 | "metadata": {}, 2305 | "output_type": "execute_result" 2306 | } 2307 | ], 2308 | "source": [ 2309 | "np.dtype(float)" 2310 | ] 2311 | }, 2312 | { 2313 | "cell_type": "code", 2314 | "execution_count": 62, 2315 | "metadata": { 2316 | "collapsed": false, 2317 | "jupyter": { 2318 | "outputs_hidden": false 2319 | }, 2320 | "slideshow": { 2321 | "slide_type": "fragment" 2322 | } 2323 | }, 2324 | "outputs": [ 2325 | { 2326 | "data": { 2327 | "text/plain": [ 2328 | "dtype('float32')" 2329 | ] 2330 | }, 2331 | "execution_count": 62, 2332 | "metadata": {}, 2333 | "output_type": "execute_result" 2334 | } 2335 | ], 2336 | "source": [ 2337 | "np.dtype('f')" 2338 | ] 2339 | }, 2340 | { 2341 | "cell_type": "code", 2342 | "execution_count": 63, 2343 | "metadata": { 2344 | "collapsed": false, 2345 | "jupyter": { 2346 | "outputs_hidden": false 2347 | }, 2348 | "slideshow": { 2349 | "slide_type": "fragment" 2350 | } 2351 | }, 2352 | "outputs": [ 2353 | { 2354 | "data": { 2355 | "text/plain": [ 2356 | "dtype('float64')" 2357 | ] 2358 | }, 2359 | "execution_count": 63, 2360 | "metadata": {}, 2361 | "output_type": "execute_result" 2362 | } 2363 | ], 2364 | "source": [ 2365 | "np.dtype('d')" 2366 | ] 2367 | }, 2368 | { 2369 | "cell_type": "code", 2370 | "execution_count": 64, 2371 | "metadata": { 2372 | "collapsed": false, 2373 | "jupyter": { 2374 | "outputs_hidden": false 2375 | }, 2376 | "slideshow": { 2377 | "slide_type": "fragment" 2378 | } 2379 | }, 2380 | "outputs": [ 2381 | { 2382 | "data": { 2383 | "text/plain": [ 2384 | "dtype('float64')" 2385 | ] 2386 | }, 2387 | "execution_count": 64, 2388 | "metadata": {}, 2389 | "output_type": "execute_result" 2390 | } 2391 | ], 2392 | "source": [ 2393 | "np.dtype('f8')" 2394 | ] 2395 | }, 2396 | { 2397 | "cell_type": "code", 2398 | "execution_count": 65, 2399 | "metadata": { 2400 | "collapsed": false, 2401 | "jupyter": { 2402 | "outputs_hidden": false 2403 | }, 2404 | "slideshow": { 2405 | "slide_type": "fragment" 2406 | } 2407 | }, 2408 | "outputs": [ 2409 | { 2410 | "data": { 2411 | "text/plain": [ 2412 | "dtype('