├── 1-Overview ├── 1_PythonAndConda.ipynb ├── README.md └── img │ ├── PyData_Stack.png │ └── python-version.png ├── 2-NumPy_SciPy ├── NumPy │ ├── 1_IntroAndCreation.ipynb │ ├── 2_IndexingAndSlicing.ipynb │ ├── 3_VectorizedOperations.ipynb │ ├── 4_Broadcasting.ipynb │ ├── 5_NumpyRecords.ipynb │ ├── EX01_ArrayCreation.ipynb │ ├── EX01_ArrayCreation_soln.ipynb │ ├── EX02_IndexingSlicing.ipynb │ ├── EX02_IndexingSlicing_soln.ipynb │ ├── EX03_VectorizedOperations.ipynb │ ├── EX03_VectorizedOperations_soln.ipynb │ ├── img │ │ ├── broadcasting2D.lightbg.scaled-noalpha.png │ │ ├── ecosystem.lightbg.scaled-noalpha.png │ │ ├── mef_numpy_selection-noalpha.png │ │ ├── mef_numpy_slice_01-noalpha.png │ │ ├── mef_numpy_slice_02-noalpha.png │ │ ├── numpyzerosdims-noalpha.png │ │ └── ufunc.lightbg.scaled-noalpha.png │ └── tmp │ │ └── .keep ├── README.md └── SciPy │ ├── 1_Introduction.ipynb │ ├── 2_Integration.ipynb │ ├── 3_FFT.ipynb │ └── 4_LinearAlgebra.ipynb ├── 3-Analytics ├── README.md ├── pandas │ ├── 1_Intro.ipynb │ ├── 2_DataStructures.ipynb │ ├── 3_ExamineData.ipynb │ ├── 4_ReadWriteData.ipynb │ ├── 5_Groupby.ipynb │ ├── 6_TimeSeries.ipynb │ ├── EX01_ExcelFiles.ipynb │ ├── EX02_weather.ipynb │ ├── data │ │ ├── 201509-citibike-tripdata.csv.gz │ │ ├── beer2.csv.gz │ │ ├── exoplanets.csv │ │ ├── goog.csv │ │ ├── nyc_harbor_wq_2006-2014.xlsx │ │ └── pittsburgh2013.csv │ └── img │ │ └── pydata_stack_model.png └── sklearn │ ├── 1_Intro.ipynb │ ├── 2_KNN_and_Validation.ipynb │ ├── 3_ModelComparison.ipynb │ ├── 4_RegressionModels.ipynb │ └── EX01_CrossValidation.ipynb ├── 4-AcceleratedPython ├── Accel Python Offloading to Intel Xeon Phi (Co)processors.pdf ├── README.md └── numba │ ├── 1_Numba_Basics.ipynb │ ├── 2_How_Numba_Works.ipynb │ ├── 3_Making_Ufuncs-Solution.ipynb │ ├── 3_Making_Ufuncs.ipynb │ ├── EX01_Intro_to_JIT-Solution.ipynb │ ├── EX01_Intro_to_JIT.ipynb │ ├── EX02_Direct_Summation-Solution.ipynb │ ├── EX02_Direct_Summation.ipynb │ ├── LICENSE.md │ ├── Numba Tutorial.pdf │ ├── README.md │ └── nopython_failure.py ├── 5-AdvancedScaling ├── README.md ├── basics │ └── basic_features.py ├── pi │ ├── builtins_mpi_pi.py │ ├── builtins_pi.py │ ├── builtins_pyobj_mpi_pi.py │ ├── numba_mpi_pi.py │ ├── numba_pi.py │ ├── numpy_mpi_pi.py │ ├── numpy_pi.py │ ├── threads_pi.py │ ├── util.py │ └── util.pyc ├── scaling_python_with_mpi.pdf └── scaling_python_with_mpi.pptx ├── 6-Profiling ├── Profiling.ipynb ├── README.md └── vtune │ ├── VTune_Python_Tutorial.pdf │ ├── demo.py │ ├── run.py │ ├── run_th.py │ ├── t_0.py │ ├── t_1.py │ ├── test_class_sample.py │ └── webinar │ ├── main.py │ ├── pythonset.txt │ └── slowpoke │ ├── __init__.py │ ├── __init__.pyc │ ├── compile.bat1 │ ├── core.c │ ├── core.pyd │ ├── core.pyx │ └── setup.py └── README.md /1-Overview/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/1-Overview/README.md -------------------------------------------------------------------------------- /1-Overview/img/PyData_Stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/1-Overview/img/PyData_Stack.png -------------------------------------------------------------------------------- /1-Overview/img/python-version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/1-Overview/img/python-version.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/3_VectorizedOperations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "\t* [Some Simple Setup](#Some-Simple-Setup)\n", 10 | "* [Working with Arrays](#Working-with-Arrays)\n", 11 | "\t* [Elementwise vs. matrix multiplications](#Elementwise-vs.-matrix-multiplications)\n", 12 | "\t* [Functions and methods](#Functions-and-methods)\n", 13 | "* [Array Operations as Methods](#Array-Operations-as-Methods)\n", 14 | "\t* [Additional methods:](#Additional-methods:)\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Learning Objectives:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "After completion of this module, learners should be able to:\n", 29 | "\n", 30 | "* explain & use *vectorization* to speed up array-based computation\n", 31 | "* apply (`numpy`) *universal functions* to vectorize array computations\n", 32 | "* construct simple timed experiments to compare array-based computations" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Some Simple Setup" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "%matplotlib inline\n", 51 | "\n", 52 | "import numpy as np\n", 53 | "import matplotlib.pyplot as plt\n", 54 | "import os.path as osp\n", 55 | "import numpy.random as npr\n", 56 | "vsep = \"\\n-------------------\\n\"\n", 57 | "\n", 58 | "def dump_array(arr):\n", 59 | " print(\"%s array of %s:\" % (arr.shape, arr.dtype))\n", 60 | " print(arr)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "# Working with Arrays" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Math is quite simple—and this is part of the reason that using NumPy arrays can significantly simplify numerical code. The generic pattern `array OP scalar` (or `scalar OP array`), applies `OP` (with the `scalar` value) across elements of `array`." 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "NumPy *ufuncs* (universal functions) are functions that operate elementwise on one or more arrays.\n", 82 | "\n", 83 | "![](img/ufunc.lightbg.scaled-noalpha.png)\n", 84 | "\n", 85 | "When called, *ufuncs* dispatch to optimized C inner-loops based on the array *dtype*." 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Builtin numpy ufuncs\n", 93 | "\n", 94 | "- comparison: <, <=, ==, !=, >=, >\n", 95 | "- arithmetic: +, -, *, /, reciprocal, square\n", 96 | "- exponential: exp, expm1, exp2, log, log10, log1p, log2, power, sqrt\n", 97 | "- trig: sin, cos, tan, acsin, arccos, atctan, sinh, cosh, tanh, acsinh, arccosh, atctanh\n", 98 | "- bitwise: &, |, ~, ^, left_shift, right_shift\n", 99 | "- logical operations: and, logical_xor, logical_not, or\n", 100 | "- predicates: isfinite, isinf, isnan, signbit\n", 101 | "- other: abs, ceil, floor, mod, modf, round, sinc, sign, trunc" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "# array OP scalar applies across all elements and creates a new array\n", 113 | "arr = np.arange(10)\n", 114 | "print(\" arr:\", arr)\n", 115 | "print(\" arr + 1:\", arr + 1)\n", 116 | "print(\" arr * 2:\", arr * 2)\n", 117 | "print(\"arr ** 2:\", arr ** 2)\n", 118 | "print(\"2 ** arr:\", 2 ** arr)\n", 119 | "\n", 120 | "# bit-wise ops (cf. np.logical_and, etc.)\n", 121 | "print(\" arr | 1:\", arr | 1)\n", 122 | "print(\" arr & 1:\", arr & 1)\n", 123 | "\n", 124 | "# NOTE: arr += 1, etc. for in-place" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "# array OP array works element-by-element and creates a new array\n", 136 | "arr1 = np.arange(5)\n", 137 | "arr2 = 2 ** arr1 # makes a new array\n", 138 | "\n", 139 | "print(arr1, \"+\", arr2, \"=\", arr1 + arr2, end=vsep)\n", 140 | "print(arr1, \"*\", arr2, \"=\", arr1 * arr2)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Elementwise vs. matrix multiplications" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "NumPy arrays and matrices are related, but slightly different types." 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "a, b = np.arange(8).reshape(2,4), np.arange(10,18).reshape(2,4)\n", 166 | "print(\"a\")\n", 167 | "print(a)\n", 168 | "print(\"b\")\n", 169 | "print(b, end=vsep)\n", 170 | "print(\"Elementwise multiplication: a * b\")\n", 171 | "print(a * b, end=vsep)\n", 172 | "print(\"Dot product: np.dot(a.T, b)\")\n", 173 | "print(np.dot(a.T, b), end=vsep)\n", 174 | "print(\"Dot product as an array method: a.T.dot(b)\")\n", 175 | "print(a.T.dot(b), end=vsep)\n", 176 | "\n", 177 | "amat, bmat = np.matrix(a), np.matrix(b)\n", 178 | "print(\"amat, bmat = np.matrix(a), np.matrix(b)\")\n", 179 | "print('amat')\n", 180 | "print(amat)\n", 181 | "print('bmat')\n", 182 | "print(bmat, end=vsep)\n", 183 | "print(\"Dot product of matrices: amat.T * bmat\")\n", 184 | "print(amat.T * bmat, end=vsep)\n", 185 | "print(\"Dot product in Python 3.5+: a.T @ b\")\n", 186 | "print(\"... PEP 465: time to upgrade ...\")" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "In the wondrous future, we will write:\n", 194 | " \n", 195 | "```python\n", 196 | "S = (H @ β - r).T @ inv(H @ V @ H.T) @ (H @ β - r)\n", 197 | "```" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "## Functions and methods" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "A number of important mathematical operations on arrays are defined as functions in the NumPy module (not as methods on NumPy arrays). Some operations are even available both ways. Some of the more important mathematical routines include: `sin, cos, tan, exp, log`. We can use these as `np.sin`, for example. For a complete list, see http://docs.scipy.org/doc/numpy/reference/routines.math.html" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": { 218 | "collapsed": false 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "arr = np.arange(-np.pi, np.pi, np.pi/4)\n", 223 | "print(\"some multiples of pi:\")\n", 224 | "print(arr, end=vsep)\n", 225 | "\n", 226 | "print(\"... and their cosines:\")\n", 227 | "print(np.cos(arr))" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "# Array Operations as Methods" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "Several useful operations are definied as methods on NumPy arrays. For a full list, see the NumPy docs: \n", 242 | "\n", 243 | "http://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html#array-methods" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "collapsed": false 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "arr = np.random.randint(0,10, size=(10,))# arange(1,10)\n", 255 | "print(\"arr: \", arr, end=vsep)\n", 256 | "\n", 257 | "print(\"%18s : %s\" % (\"mean\", arr.mean()))\n", 258 | "print(\"%18s : %s\" % (\"variance\", arr.var()))\n", 259 | "print(\"%18s : %s\" % (\"std. deviation\", arr.std()))\n", 260 | "print(\"%18s : %s\" % (\"cumulative sum\", arr.cumsum()))\n", 261 | "print(\"%18s : %s\" % (\"cumulative product\", arr.cumprod()))" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": false 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "# two other useful methods for defining predicates \n", 273 | "# based on an array are .any() and .all()\n", 274 | "arr = np.array([True, False, False])\n", 275 | "print(\"arr:\", arr)\n", 276 | "print(\"any true?: \", arr.any())\n", 277 | "print(\"Python any:\", any(arr))\n", 278 | "print(\"all true?: \", arr.all())\n", 279 | "print(\"Python all:\", all(arr))" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "# With numpy arrays that have more than 1 dimension, we need to use np.all\n", 291 | "arr = np.arange(15).reshape(3, 5)\n", 292 | "np.all(arr)\n", 293 | "# Why? all() iterates the argument and checks if each element is truthy.\n", 294 | "# With a 2-d array, each iteration is a row not a single element, \n", 295 | "# and as we saw above, we cannot evaluate the truthiness of an \n", 296 | "# array (bool(some_array) fails).\n" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "## Additional methods:" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "* Predicates\n", 311 | " * `a.any(), a.all()`\n", 312 | "* Reductions\n", 313 | " * `a.mean(), a.argmin(), a.argmax(), a.trace(), a.cumsum(), a.cumprod()`\n", 314 | "* Manipulation\n", 315 | " * `a.argsort(), a.transpose(), a.reshape(...), a.ravel(), a.fill(...), a.clip(...)`\n", 316 | "* Complex Numbers\n", 317 | " * `a.real, a.imag, a.conj()`" 318 | ] 319 | } 320 | ], 321 | "metadata": { 322 | "anaconda-cloud": {}, 323 | "continuum": { 324 | "depends": [ 325 | "np_intro", 326 | "ip_essentials", 327 | "ip_datatypes" 328 | ], 329 | "requires": [ 330 | "img/ufunc.lightbg.scaled-noalpha.png" 331 | ], 332 | "tag": "np_vectorization" 333 | }, 334 | "kernelspec": { 335 | "display_name": "Python [conda env:python3]", 336 | "language": "python", 337 | "name": "conda-env-python3-py" 338 | }, 339 | "language_info": { 340 | "codemirror_mode": { 341 | "name": "ipython", 342 | "version": 3 343 | }, 344 | "file_extension": ".py", 345 | "mimetype": "text/x-python", 346 | "name": "python", 347 | "nbconvert_exporter": "python", 348 | "pygments_lexer": "ipython3", 349 | "version": "3.5.2" 350 | } 351 | }, 352 | "nbformat": 4, 353 | "nbformat_minor": 0 354 | } 355 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/4_Broadcasting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "\t* [Some Simple Setup](#Some-Simple-Setup)\n", 10 | "* [Broadcasting](#Broadcasting)\n", 11 | "\t* [What are the rules for broadcasting?](#What-are-the-rules-for-broadcasting?)\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Learning Objectives:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "After completion of this module, learners should be able to:\n", 26 | "\n", 27 | "* use and explain *broadcasting* in numpy" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Some Simple Setup" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "%matplotlib inline\n", 46 | "\n", 47 | "import numpy as np\n", 48 | "import matplotlib.pyplot as plt\n", 49 | "import os.path as osp\n", 50 | "import numpy.random as npr\n", 51 | "vsep = \"\\n-------------------\\n\"\n", 52 | "\n", 53 | "def dump_array(arr):\n", 54 | " print(\"%s array of %s:\" % (arr.shape, arr.dtype))\n", 55 | " print(arr)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "# Broadcasting" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Broadcasting lets arrays with *different but compatible* shapes be arguments to *ufuncs*." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "arr1 = np.arange(5)\n", 81 | "print(\"arr1:\\n\", arr1, end=vsep)\n", 82 | "\n", 83 | "print(\"arr1 + scalar:\\n\", arr1+10, end=vsep)\n", 84 | "\n", 85 | "print(\"arr1 + arr1 (same shape):\\n\", arr1+arr1, end=vsep)\n", 86 | "\n", 87 | "arr2 = np.arange(5).reshape(5,1) * 10\n", 88 | "arr3 = np.arange(5).reshape(1,5) * 100\n", 89 | "print(\"arr2:\\n\", arr2)\n", 90 | "print(\"arr3:\\n\", arr3, end=vsep)\n", 91 | "\n", 92 | "print(\"arr1 + arr2 [ %s + %s --> %s ]:\" % \n", 93 | " (arr1.shape, arr2.shape, (arr1 + arr2).shape))\n", 94 | "print(arr1+arr2, end=vsep)\n", 95 | "print(\"arr1 + arr3 [ %s + %s --> %s ]:\" % \n", 96 | " (arr1.shape, arr3.shape, (arr1 + arr3).shape))\n", 97 | "print(arr1+arr3)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "arr1 = np.arange(6).reshape(3,2)\n", 109 | "arr2 = np.arange(10, 40, 10).reshape(3,1)\n", 110 | "\n", 111 | "print(\"arr1:\")\n", 112 | "dump_array(arr1)\n", 113 | "print(\"\\narr2:\")\n", 114 | "dump_array(arr2)\n", 115 | "print(\"\\narr1 + arr2:\")\n", 116 | "print(arr1+arr2)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Here, an array of shape `(3, 1)` is broadcast to an array with shape `(3, 2)`\n", 124 | "\n", 125 | "![](files/img/broadcasting2D.lightbg.scaled-noalpha.png)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## What are the rules for broadcasting? " 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "In order for an operation to broadcast, the size of all the trailing dimensions for both arrays must either be *equal* or be *one*. Dimensions that are one and dimensions that are missing from the \"head\" are duplicated to match the larger number. So, we have:\n", 140 | "\n", 141 | "|Array |Shape |\n", 142 | "|:------------------|---------------:|\n", 143 | "|A (1d array)| 3|\n", 144 | "|B (2d array)| 2 x 3|\n", 145 | "|Result (2d array)| 2 x 3|\n", 146 | "\n", 147 | "|Array |Shape |\n", 148 | "|:------------------|-------------:|\n", 149 | "|A (2d array)| 6 x 1|\n", 150 | "|B (3d array)| 1 x 6 x 4|\n", 151 | "|Result (3d array)| 1 x 6 x 4|\n", 152 | "\n", 153 | "|Array |Shape |\n", 154 | "|:-----------------|---------------:|\n", 155 | "|A (4d array)| 3 x 1 x 6 x 1|\n", 156 | "|B (3d array)| 2 x 1 x 4|\n", 157 | "|Result (4d array)| 3 x 2 x 6 x 4|" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Some other interpretations of compatibility:\n", 165 | " \n", 166 | " * Tails must be the same, ones are wild.\n", 167 | " \n", 168 | "\n", 169 | " * If one shape is shorter than the other, pad the shorter shape on the LHS with `1`s.\n", 170 | " * Now, from the right, the shapes must be identical with ones acting as wild cards." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "a1 = np.array([1,2,3]) # 3 -> 1x3\n", 182 | "b1 = np.array([[10, 20, 30], # 2x3\n", 183 | " [40, 50, 60]]) \n", 184 | "print(a1+b1)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "result = (np.ones(( 6,1)) + # 3rd dimension replicated\n", 196 | " np.ones((1,6,4)))\n", 197 | "print(result.shape)\n", 198 | "\n", 199 | "result = (np.ones((3,6,1)) + \n", 200 | " np.ones((1,6,4))) # 1st and 3rd dimension replicated\n", 201 | "print(result.shape)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "Sometimes, it is useful to explicitly insert a new dimension in the shape. We can do this with a fancy slice that takes the value `np.newaxis`." 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": false, 216 | "raises": "ValueError" 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "arr1 = np.arange(6).reshape((2,3)) # 2x3\n", 221 | "arr2 = np.array([10, 100]) # 2\n", 222 | "arr1 + arr2 # This will fail" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": false 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "# let's massage the shape\n", 234 | "arr3 = arr2[:, np.newaxis] # arr2 -> 2x1\n", 235 | "print(\"arr3 shape:\", arr3.shape)\n", 236 | "print(\"arr1 + arr3\")\n", 237 | "print(arr1+arr3)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "arr = np.array([10, 100])\n", 249 | "print(\"original shape:\", arr.shape)\n", 250 | "\n", 251 | "arrNew = arr2[np.newaxis, :]\n", 252 | "print(\"arrNew shape:\", arrNew.shape)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": { 259 | "collapsed": false 260 | }, 261 | "outputs": [], 262 | "source": [ 263 | "arr1 = np.arange(0,6).reshape(2,3)\n", 264 | "arr2 = np.arange(10,22).reshape(4,3)\n", 265 | "np.tile(arr1, (2,1)) * arr2" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": { 272 | "collapsed": true 273 | }, 274 | "outputs": [], 275 | "source": [] 276 | } 277 | ], 278 | "metadata": { 279 | "anaconda-cloud": {}, 280 | "continuum": { 281 | "depends": [ 282 | "np_slicing", 283 | "np_intro", 284 | "np_vectorization", 285 | "ip_essentials", 286 | "ip_datatypes" 287 | ], 288 | "requires": [ 289 | "img/broadcasting2D.lightbg.scaled-noalpha.png" 290 | ], 291 | "tag": "np_broadcast" 292 | }, 293 | "kernelspec": { 294 | "display_name": "Python [conda env:python3]", 295 | "language": "python", 296 | "name": "conda-env-python3-py" 297 | }, 298 | "language_info": { 299 | "codemirror_mode": { 300 | "name": "ipython", 301 | "version": 3 302 | }, 303 | "file_extension": ".py", 304 | "mimetype": "text/x-python", 305 | "name": "python", 306 | "nbconvert_exporter": "python", 307 | "pygments_lexer": "ipython3", 308 | "version": "3.5.2" 309 | } 310 | }, 311 | "nbformat": 4, 312 | "nbformat_minor": 0 313 | } 314 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/5_NumpyRecords.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives](#Learning-Objectives)\n", 9 | "\t* [Some Simple Setup](#Some-Simple-Setup)\n", 10 | "\t* [Compound Data: Structured Arrays / Record Arrays: `np.record`](#Compound-Data:--Structured-Arrays-/-Record-Arrays:--np.record)\n", 11 | " * [IO on arrays](#IO-on-arrays)\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Learning Objectives" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "After completion of this module, learners should be able to:\n", 26 | "\n", 27 | "* Usage of `np.record` data type." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Some Simple Setup" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "%matplotlib inline\n", 46 | "\n", 47 | "import numpy as np\n", 48 | "import matplotlib.pyplot as plt\n", 49 | "import os.path as osp\n", 50 | "import numpy.random as npr\n", 51 | "vsep = \"\\n-------------------\\n\"\n", 52 | "\n", 53 | "def dump_array(arr):\n", 54 | " print(\"%s array of %s:\" % (arr.shape, arr.dtype))\n", 55 | " print(arr)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Compound Data: Structured Arrays / Record Arrays: `np.record`" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "NumPy arrays have elements with a single type. But, that type can be a compound type (i.e., a record or a struct).\n", 70 | "\n", 71 | "Two main recommended ways of specifying type codes:\n", 72 | " \n", 73 | " * b1, i1, i2, i4, i8, u1, u2, u4, u8, f2, f4, f8, c8, c16, a<n>\n", 74 | " (bytes, ints, unsigned ints, floats, complex and fixed length strings of a given *byte* lengths)\n", 75 | " * int8,...,uint8,...,float16, float32, float64, complex64, complex128\n", 76 | " (similar but with *bit* sizes)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# a record with a 4 byte int, a 4 byte float, \n", 88 | "# and 10 bytes of characters (ascii values)\n", 89 | "x = np.zeros((2,), dtype=('i4,f4,a10'))\n", 90 | "print(x)\n", 91 | "print(repr(x), end=vsep)\n", 92 | "\n", 93 | "x[:] = [(1, 5., 'Hello'), (2, 6., 'World')]\n", 94 | "print(x)\n", 95 | "print(repr(x), end=vsep)\n", 96 | "\n", 97 | "print(\"a field:\")\n", 98 | "print(x['f1'])\n", 99 | "print(repr(x['f1']))" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "%%file tmp/patient-records.csv\n", 111 | "name,date,weight(kg),height(cm)\n", 112 | "Mark,2011-01-01,86.1,180\n", 113 | "Barb,2012-02-03,65.7,167\n", 114 | "Ethan,2013-04-06,29.45,127" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "patient_dtype = [(\"name\", \"a10\"),\n", 126 | " (\"visit_date\", 'datetime64[D]'),\n", 127 | " (\"weight\", np.float),\n", 128 | " (\"height\", np.int)]\n", 129 | "data = np.loadtxt(\"tmp/patient-records.csv\", \n", 130 | " skiprows=1, \n", 131 | " delimiter=\",\", \n", 132 | " dtype=patient_dtype,\n", 133 | " converters = {1: np.datetime64})\n", 134 | "\n", 135 | "print(\"first row: \", data[0])\n", 136 | "print(\"all weights: \", data['weight'])\n", 137 | "\n", 138 | "# BMI = kg / m**2\n", 139 | "print(\"BMIs:\", data['weight'] / (data['height']/100.0)**2)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "# IO on arrays" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "We can also save and load arrays" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "#saving / load data\n", 165 | "np.savez('tmp/data.npz',data=data) # list of arrays to store\n", 166 | "dataz = np.load('tmp/data.npz')\n", 167 | "\n", 168 | "print(dataz.files) # list of arrays stored in this archive\n", 169 | "print(dataz['data'])" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": false 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "# cleanup\n", 181 | "!rm tmp/data.npz" 182 | ] 183 | } 184 | ], 185 | "metadata": { 186 | "continuum": { 187 | "depends": [ 188 | "np_intro", 189 | "np_slicing", 190 | "np_vectorization", 191 | "ip_essentials", 192 | "ip_datatypes", 193 | "ip_containers" 194 | ], 195 | "requires": [], 196 | "tag": "np_records" 197 | }, 198 | "kernelspec": { 199 | "display_name": "Python [conda env:python3]", 200 | "language": "python", 201 | "name": "conda-env-python3-py" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.5.2" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 0 218 | } 219 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/EX01_ArrayCreation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Exercise: Numpy Array Creation](#Exercise:-Numpy-Array-Creation)\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Exercise: Numpy Array Creation" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "IPython notebooks have two special functions to measure the time it takes to perform a single line" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "%timeit np.sqrt(np.pi)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "and the time it takes to perform an entire cell" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "%%timeit\n", 63 | "import numpy as np\n", 64 | "for i in range(0,1000):\n", 65 | " np.sqrt(np.pi)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "A numpy array behave like any other container `for item in array:`." 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Unless otherwise specified, try to solve these problems using NumPy but not using raw Python.\n", 80 | "\n", 81 | " 1. Create a Python list with the ints from 1 to 10. Create a NumPy array from that list. \n", 82 | " 1. For both, add one to each element.\n", 83 | " 1. For both, multiply each element by two.\n", 84 | " 1. Create an int array of all zeros.\n", 85 | " 1. Create a float array of all zeros.\n", 86 | " 1. Create an evenly spaced grid of 100 floating point values on [-10, 10].\n", 87 | " 1. Create an int array with the powers of two from 1 to 1024.\n", 88 | " 1. Bonus: Can you figure out a second \"NumPy only\" way to do it? (Hint: help(function) is your friend)\n", 89 | " 1. Explain what NumPy dtype would be well-suited for (and why):\n", 90 | " * Temperatures\n", 91 | " * Counts of occurances of an event\n", 92 | " * Differences in counts\n", 93 | " * Probabilities\n", 94 | " 1. Images can be stored as (R,G,B) value triples. Frequently, the color values (red, green, or blue) range from [0, 255]. What would be an ideal NumPy data type for one color value?\n", 95 | " 1. Come up with two ways to create a (2,5,3) shaped int array filled the value 42.\n", 96 | " 1. Generate a (5,5) array with values from a Normal distribution of mean 10 and standard deviation 1.\n", 97 | " 1. Now, try to do it another way.\n", 98 | " 1. Define a function of *N*, that returns an array with *N* values all equal to $1/N$.\n", 99 | " 1. Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]`. See the following note on timing.\n", 100 | " 1. Do the same with a NumPy array.\n", 101 | " 1. Time how long it takes to multiply each sequence by `np.pi`." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": true 109 | }, 110 | "outputs": [], 111 | "source": [] 112 | } 113 | ], 114 | "metadata": { 115 | "continuum": { 116 | "depends": [ 117 | "np_intro", 118 | "ip_essentials", 119 | "ip_datatypes", 120 | "ip_containers" 121 | ], 122 | "requires": [], 123 | "tag": "np_ex_intro" 124 | }, 125 | "kernelspec": { 126 | "display_name": "Python 3", 127 | "language": "python", 128 | "name": "python3" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 3 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython3", 140 | "version": "3.4.4" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 0 145 | } 146 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/EX01_ArrayCreation_soln.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Exercise: Numpy Array Creation](#Exercise:-Numpy-Array-Creation)\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Exercise: Numpy Array Creation" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "IPython notebooks have two special functions to measure the time it takes to perform a single line" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "%timeit np.sqrt(np.pi)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "and the time it takes to perform an entire cell" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "%%timeit\n", 63 | "import numpy as np\n", 64 | "for i in range(0,1000):\n", 65 | " np.sqrt(np.pi)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "A numpy array behave like any other container `for item in array:`." 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Unless otherwise specified, try to solve these problems using NumPy but not using raw Python.\n", 80 | "\n", 81 | " 1. Create a Python list with the ints from 1 to 10. Create a NumPy array from that list. \n", 82 | " 1. For both, add one to each element.\n", 83 | " 1. For both, multiply each element by two.\n", 84 | " 1. Create an int array of all zeros.\n", 85 | " 1. Create a float array of all zeros.\n", 86 | " 1. Create an evenly spaced grid of 100 floating point values on [-10, 10].\n", 87 | " 1. Create an int array with the powers of two from 1 to 1024.\n", 88 | " 1. Bonus: Can you figure out a second \"NumPy only\" way to do it? (Hint: help(function) is your friend)\n", 89 | " 1. Explain what NumPy dtype would be well-suited for (and why):\n", 90 | " * Temperatures\n", 91 | " * Counts of occurances of an event\n", 92 | " * Differences in counts\n", 93 | " * Probabilities\n", 94 | " 1. Images can be stored as (R,G,B) value triples. Frequently, the color values (red, green, or blue) range from [0, 255]. What would be an ideal NumPy data type for one color value?\n", 95 | " 1. Come up with two ways to create a (2,5,3) shaped int array filled the value 42.\n", 96 | " 1. Generate a (5,5) array with values from a Normal distribution of mean 10 and standard deviation 1.\n", 97 | " 1. Now, try to do it another way.\n", 98 | " 1. Define a function of *N*, that returns an array with *N* values all equal to $1/N$.\n", 99 | " 1. Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]`. See the following note on timing.\n", 100 | " 1. Do the same with a NumPy array.\n", 101 | " 1. Time how long it takes to multiply each sequence by `np.pi`." 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "# Solution 1: \n", 113 | "# Create a Python list with the ints from 1 to 10. \n", 114 | "# Create a NumPy array from that list.\n", 115 | "list1 = list(range(1,11))\n", 116 | "array1 = np.array(list1)\n", 117 | "\n", 118 | "print(list1, array1, sep=\"\\n\")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "# Solution 1A: \n", 130 | "# For both, add one to each element.\n", 131 | "list2 = [x+1 for x in list1]\n", 132 | "array2 = array1 + 1\n", 133 | "\n", 134 | "print(list2, array2, sep=\"\\n\")" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "# Solution 1B: \n", 146 | "# For both, multiply each element by two.\n", 147 | "list3 = [2*x for x in list1]\n", 148 | "array3 = array1 * 2\n", 149 | "\n", 150 | "print(list3, array3, sep=\"\\n\")" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "# Solution 2:\n", 162 | "# Create an int array of all zeros.\n", 163 | "\n", 164 | "array = np.zeros(10, dtype=int)\n", 165 | "\n", 166 | "print(array, len(array), array.dtype, sep=\"\\n\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "# Solution 3:\n", 178 | "# Create a float array of all zeros\n", 179 | "\n", 180 | "array = np.zeros(10, dtype=float)\n", 181 | "\n", 182 | "print(array, len(array), array.dtype, sep=\"\\n\")" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "collapsed": false 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "# Solution 4:\n", 194 | "# Create an evenly spaced grid of 100 floating point values on [-10, 10].\n", 195 | "\n", 196 | "array = np.linspace(-10,+10,100)\n", 197 | "\n", 198 | "print(array, len(array), array.dtype, sep=\"\\n\")" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": false 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "# Solution 5:\n", 210 | "# Create an int array with the powers of two from 1 to 1024.\n", 211 | "\n", 212 | "array = 2**np.linspace(0,10,11, dtype=int)\n", 213 | "\n", 214 | "print(array, len(array), array.dtype, sep=\"\\n\")" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "collapsed": false 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "# Solution 5 Bonus: \n", 226 | "# Can you figure out a second \"NumPy only\" way to do it? \n", 227 | "# (Hint: help(function) is your friend)\n", 228 | "\n", 229 | "array = np.logspace(0,10,num=11,base=2, dtype=int)\n", 230 | "\n", 231 | "print(array, len(array), array.dtype, sep=\"\\n\")" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "# Solution 6:\n", 243 | "# Explain what NumPy dtype would be well-suited for (and why):\n", 244 | "# * Temperatures\n", 245 | "# * Counts of occurances of an event\n", 246 | "# * Differences in counts\n", 247 | "# * Probabilities\n", 248 | "\n", 249 | "temps = np.array(98.6, dtype=float)\n", 250 | "counts = np.array(range(7), dtype=int)\n", 251 | "deltas = counts - 1\n", 252 | "probs = np.array( np.random.random_sample() )\n", 253 | "\n", 254 | "print( temps, temps.dtype, sep=' ' )\n", 255 | "print( counts, counts.dtype, sep=' ' )\n", 256 | "print( deltas, deltas.dtype, sep=' ' )\n", 257 | "print( probs, probs.dtype, sep=' ' )" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# Solution 7:\n", 269 | "# Images can be stored as (R,G,B) value triples. \n", 270 | "# Frequently, the color values (red, green, or blue) range from [0, 255]. \n", 271 | "# What would be an ideal NumPy data type for one color value?\n", 272 | "\n", 273 | "array = 256*np.ones(2500).reshape(50, 50)\n", 274 | "f = np.vectorize(lambda x: np.int(x*np.random.random()))\n", 275 | "image = f(array)\n", 276 | "print(image[0], image.dtype, sep='\\n\\n')" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": { 283 | "collapsed": false 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "# Solution 7 Bonus: Plot the image\n", 288 | "import matplotlib.pyplot as plt\n", 289 | "%matplotlib inline\n", 290 | "plt.imshow(image, cmap=plt.cm.viridis)\n", 291 | "plt.colorbar()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": false 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "# Solution 8A: not the best way (see next cell)\n", 303 | "# Come up with two ways to create a (2,5,3) shaped\n", 304 | "# int array filled the value 42.\n", 305 | "\n", 306 | "array1 = np.ones((2,5,3))*42\n", 307 | "print(array1, array1.shape, sep='\\n\\n')" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": { 314 | "collapsed": false 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "# Solution 8B: better way\n", 319 | "# Come up with two ways to create a (2,5,3) shaped\n", 320 | "# int array filled the value 42.\n", 321 | "\n", 322 | "array2 = np.tile(42,(2,5,3))\n", 323 | "print(array2, array2.shape, sep='\\n\\n')" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "# Solution 9A:\n", 335 | "# Generate a (5,5) array with \n", 336 | "# values from a Normal distribution \n", 337 | "# of mean 10 and standard deviation 1.\n", 338 | "\n", 339 | "array9A = np.random.normal(loc=10.0, scale=1.0, size=(5,5))\n", 340 | "print( array9A )" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": false 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "# Solution 9B:\n", 352 | "# Now, try to do it another way.\n", 353 | "\n", 354 | "array = np.ones(25).reshape(5, 5)\n", 355 | "f = np.vectorize(lambda x: x*np.random.normal(loc=10.0, scale=1.0))\n", 356 | "array9B = f(array)\n", 357 | "print( array9B )" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": false 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "# Solution 10:\n", 369 | "# Define a function of *N*, that returns an array with *N* values all equal to $1/N$.\n", 370 | "def one_over(N):\n", 371 | " return 1./np.linspace(1,N,N)\n", 372 | "\n", 373 | "one_over(5)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "%%timeit\n", 385 | "# Solution 11A: timeit\n", 386 | "# Create a Python list with the floating-point \n", 387 | "# values `[1.0, 2.0, 3.0, ..., 1E6]`.\n", 388 | "list11A = [x for x in range(1000000)]" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": { 395 | "collapsed": false 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "%%timeit\n", 400 | "# Solution 11B: timeit\n", 401 | "# Create a NumPy array with the floating-point \n", 402 | "# values `[1.0, 2.0, 3.0, ..., 1E6]`.\n", 403 | "array11B = np.arange(1000000)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "# recreate because %%timeit weirdness\n", 415 | "list11A = [x for x in range(1000000)]\n", 416 | "array11B = np.arange(1000000)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": { 423 | "collapsed": false 424 | }, 425 | "outputs": [], 426 | "source": [ 427 | "%%timeit\n", 428 | "# Solution 11C: timeit\n", 429 | "# Time how long it takes to multiply each sequence by `np.pi`.\n", 430 | "list11C = [x*np.pi for x in list11A]" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": { 437 | "collapsed": false 438 | }, 439 | "outputs": [], 440 | "source": [ 441 | "%%timeit\n", 442 | "# Solution 11C: timeit\n", 443 | "# Time how long it takes to multiply each sequence by `np.pi`.\n", 444 | "array11C = np.pi*array11B" 445 | ] 446 | } 447 | ], 448 | "metadata": { 449 | "continuum": { 450 | "depends": [ 451 | "np_intro", 452 | "ip_essentials", 453 | "ip_datatypes", 454 | "ip_containers" 455 | ], 456 | "requires": [], 457 | "tag": "np_ex_intro_soln" 458 | }, 459 | "kernelspec": { 460 | "display_name": "Python 3", 461 | "language": "python", 462 | "name": "python3" 463 | }, 464 | "language_info": { 465 | "codemirror_mode": { 466 | "name": "ipython", 467 | "version": 3 468 | }, 469 | "file_extension": ".py", 470 | "mimetype": "text/x-python", 471 | "name": "python", 472 | "nbconvert_exporter": "python", 473 | "pygments_lexer": "ipython3", 474 | "version": "3.4.4" 475 | } 476 | }, 477 | "nbformat": 4, 478 | "nbformat_minor": 0 479 | } 480 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/EX02_IndexingSlicing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Exercise: Numpy Indexing and Slicing](#Exercise:-Numpy-Indexing-and-Slicing)\n", 9 | "\t* [Part 1: Slicing](#Part-1:-Slicing)\n", 10 | "\t* [Part 2: Indexing and Assignment](#Part-2:-Indexing-and-Assignment)\n", 11 | "\t* [Part 3: Shift an Array](#Part-3:-Shift-an-Array)\n", 12 | "\t* [Part 4: Element Replacement](#Part-4:-Element-Replacement)\n", 13 | "* [Exercise: Optional Parts](#Exercise:-Optional-Parts)\n", 14 | "\t* [Optional Part 5: Replacing Rows](#Optional-Part-5:-Replacing-Rows)\n", 15 | "\t* [Optional Part 6: Replacing Columns](#Optional-Part-6:-Replacing-Columns)\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Exercise: Numpy Indexing and Slicing" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Part 1: Slicing" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "Let a = np.arange(200)\n", 48 | " 1. access the last element of the array\n", 49 | " 1. slice all but the last element of the array\n", 50 | " 1. slice the last 5 elements of the array\n", 51 | " 1. slice the first 5 elements of the array" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "# Solution 1:\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Part 2: Indexing and Assignment" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "Create a sample array with shape (3,4).\n", 77 | " 1. Using single item assignments, place your favorite number in the four corners.\n", 78 | " 1. Make the first column equal to -1.\n", 79 | " 1. Make the last row equal to 99.\n", 80 | " 1. Make a 2x2 block in the bottom-center contain the values .25, .5, .75, and 1.0\n", 81 | " 1. Replace a row with the values: 2, 4, 8, and 16." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "# Solution 2:\n" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Part 3: Shift an Array" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "We used slicing to do a right shift on a 1-D array. Do a left shift on a 1-D array." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "# Solution 3:\n" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Part 4: Element Replacement" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Can you replace every element of an array with a particular value (say, 42.0)." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# Solution 4:\n" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "# Exercise: Optional Parts" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "These two are more difficult. We won't answer them now (we will revisit them in a bit), but see if you can figure them out." 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## Optional Part 5: Replacing Rows" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "Can you replace every row with a particular row (for example, 2, 4, 8, 16)?" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "collapsed": true 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "# Solution 5:\n" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Optional Part 6: Replacing Columns" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "[Don't strain yourself] Can you replace every column with a particular column?" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": true 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "# Solution 6:\n" 207 | ] 208 | } 209 | ], 210 | "metadata": { 211 | "continuum": { 212 | "depends": [ 213 | "np_slicing", 214 | "np_intro", 215 | "ip_essentials", 216 | "ip_datatypes" 217 | ], 218 | "requires": [], 219 | "tag": "np_ex_slicing" 220 | }, 221 | "kernelspec": { 222 | "display_name": "Python 3", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.4.4" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 0 241 | } 242 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/EX02_IndexingSlicing_soln.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Exercise: Numpy Indexing and Slicing](#Exercise:-Numpy-Indexing-and-Slicing)\n", 9 | "\t* [Part 1: Slicing](#Part-1:-Slicing)\n", 10 | "\t* [Part 2: Indexing and Assignment](#Part-2:-Indexing-and-Assignment)\n", 11 | "\t* [Part 3: Shift an Array](#Part-3:-Shift-an-Array)\n", 12 | "\t* [Part 4: Element Replacement](#Part-4:-Element-Replacement)\n", 13 | "* [Exercise: Optional Parts](#Exercise:-Optional-Parts)\n", 14 | "\t* [Optional Part 5: Replacing Rows](#Optional-Part-5:-Replacing-Rows)\n", 15 | "\t* [Optional Part 6: Replacing Columns](#Optional-Part-6:-Replacing-Columns)\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Exercise: Numpy Indexing and Slicing" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Part 1: Slicing" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "Let a = np.arange(200)\n", 48 | " 1. access the last element of the array\n", 49 | " 1. slice all but the last element of the array\n", 50 | " 1. slice the last 5 elements of the array\n", 51 | " 1. slice the first 5 elements of the array" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "# Solution 1:\n", 63 | "\n", 64 | "a = np.arange(200)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# Solution 1.1: access the last element\n", 76 | "a[-1]" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# Solution 1.2: slice all but last element\n", 88 | "a[:-1]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "# Solution 1.3: slice the last 5 elements\n", 100 | "a[-5:]" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# Solution 1.4: slice the first 5 elements\n", 112 | "a[:5]" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Part 2: Indexing and Assignment" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "Create a sample array with shape (3,4).\n", 127 | " 1. Using single item assignments, place your favorite number in the four corners.\n", 128 | " 1. Make the first column equal to -1.\n", 129 | " 1. Make the last row equal to 99.\n", 130 | " 1. Make a 2x2 block in the bottom-center contain the values .25, .5, .75, and 1.0\n", 131 | " 1. Replace a row with the values: 2, 4, 8, and 16." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# Solution 2:\n", 143 | "\n", 144 | "a = np.zeros(12).reshape(3,4)\n", 145 | "a" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": false 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "# Solution 2.1: favorite number in four corners\n", 157 | "a[0,0] = 42\n", 158 | "a[0,-1] = 42\n", 159 | "a[-1,0] = 42\n", 160 | "a[-1,-1] = 42\n", 161 | "a" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "# Solution 2.2: first column equal to -1\n", 173 | "a[:,0] = -1\n", 174 | "a" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# Solution 2.3: last row equal to 99\n", 186 | "a[-1,:] = 99\n", 187 | "a" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "# Solution 2.4: make a 2x2 block in bottom-center with values .25, .5, .75, and 1.0\n", 199 | "a[1:,1:3] = [[0.25,0.5],[0.75,1.0]]\n", 200 | "a" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "# Solution 2.5: Replace a row with the values: 2, 4, 8, and 16.\n", 212 | "a[0,:] = [2,4,8,16]\n", 213 | "a" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "## Part 3: Shift an Array" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "We used slicing to do a right shift on a 1-D array. Do a left shift on a 1-D array." 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# Solution 3: don't use slicing for everything\n", 239 | "a = np.array([1,2,3,4])\n", 240 | "print(a)\n", 241 | "\n", 242 | "b = np.roll(a,-1)\n", 243 | "print(b)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Part 4: Element Replacement" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "Can you replace every element of an array with a particular value (say, 42.0)." 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# Solution 4:\n", 269 | "b[:] = 42\n", 270 | "b" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "# Exercise: Optional Parts" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "These two are more difficult. We won't answer them now (we will revisit them in a bit), but see if you can figure them out." 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "## Optional Part 5: Replacing Rows" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "Can you replace every row with a particular row (for example, 2, 4, 8, 16)?" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "# Solution 5:\n", 310 | "a = np.zeros(12).reshape(3,4)\n", 311 | "a" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "collapsed": false 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "a[:,:] = [2,4,8,16]\n", 323 | "a" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "## Optional Part 6: Replacing Columns" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "[Don't strain yourself] Can you replace every column with a particular column?" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "collapsed": false 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "# Solution 6: prep\n", 349 | "a = np.arange(12).reshape(3,4)\n", 350 | "a" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": { 357 | "collapsed": false 358 | }, 359 | "outputs": [], 360 | "source": [ 361 | "# Solution 6: soln\n", 362 | "for col in range(a.shape[1]):\n", 363 | " a[:, col] = a[:,1]\n", 364 | "\n", 365 | "print(a)" 366 | ] 367 | } 368 | ], 369 | "metadata": { 370 | "continuum": { 371 | "depends": [ 372 | "np_slicing", 373 | "np_intro", 374 | "np_ex_slicing", 375 | "ip_essentials", 376 | "ip_datatypes" 377 | ], 378 | "requires": [], 379 | "tag": "np_ex_slicing_soln" 380 | }, 381 | "kernelspec": { 382 | "display_name": "Python 3", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.4.4" 397 | } 398 | }, 399 | "nbformat": 4, 400 | "nbformat_minor": 0 401 | } 402 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/EX03_VectorizedOperations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Exercise: Numpy Vectorized Operations](#Exercise:-Numpy-Vectorized-Operations)\n", 9 | "\t* [Part 1: Timeit](#Part-1:-Timeit)\n", 10 | "\t* [Part 2: Ufuncs and Plotting](#Part-2:-Ufuncs-and-Plotting)\n", 11 | "\t\t* [Part 2.1](#Part-2.1)\n", 12 | "\t\t* [Part 2.2](#Part-2.2)\n", 13 | "\t* [Part 3: All Disappear](#Part-3:-All-Disappear)\n", 14 | "\t* [Part 4: Wallis Formula](#Part-4:-Wallis-Formula)\n", 15 | "\t\t* [Part 4.1](#Part-4.1)\n", 16 | "\t\t* [Part 4.2](#Part-4.2)\n", 17 | "\t\t* [Part 4.3](#Part-4.3)\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Exercise: Numpy Vectorized Operations" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Part 1: Timeit" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]` and with Numpy. \n", 50 | "\n", 51 | "Time how long it takes to multiply each sequence by `np.pi`." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "# Solution 1:\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "%%timeit" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Part 2: Ufuncs and Plotting" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "import matplotlib.pyplot as plt\n", 92 | "%matplotlib inline" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Use numpy and matplotlib for the following:\n", 100 | "* **numpy** allows us to easily compute expressions like\n", 101 | "> $y=x^2$ using vectorized expression `y = x**2` where x is a numpy array\n", 102 | "\n", 103 | "* **matplotlib** lets us graph xy-values very quickly using: \n", 104 | "> `plt.plot(x, y)` where `x = [x1, x2, x3, ...]`, and `y = [y1, y2, y3, ...]` \n", 105 | "\n", 106 | "* Repeated `plt.plot` commands will go to the same graph. " 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### Part 2.1" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "Graph the following functions on the interval [-2.0, 2.0):\n", 121 | "\n", 122 | " * $y=x + 1$\n", 123 | " * $y=e^x$\n", 124 | " * $y=cos(x^2) + sin(x^2)$\n", 125 | " * $y=cos(x)^2 + sin(x)^2$" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "collapsed": false 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "# Solution 2.1:\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "### Part 2.2" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Graph a parametric equation over $t$ on $[0,2\\pi]$ defined by:\n", 151 | " \n", 152 | " * $y(t) = sin(t)$\n", 153 | " * $x(t) = cos(t)$\n", 154 | "\n", 155 | "You may want to issue a matplotlib statement: \n", 156 | "> `plot.axis(\"equal\")` \n", 157 | "\n", 158 | "to ensure you don't get a skewed perspective on your result." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "collapsed": false 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "# Solution 2.2\n" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Part 3: All Disappear" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "A. Suppose, *poof*, `arr.all()` (and `np.all()`) just disappeared. Write a function `myAll` that replaces them.\n", 184 | "\n", 185 | "B. Define a function `noneTrue` that returns `True` when no element of an array is `True` and `False` otherwise." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "# Reminder\n", 197 | "bool(1)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "# Solution 3A:\n" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": false 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "# Solution 3B:\n" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "## Part 4: Wallis Formula" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "The value of $\\pi$ can be computed with the Wallis formula, developed in 1655.\n", 234 | "\n", 235 | "$$\\pi=2\\prod_{i=1}^{\\infty}\\frac{4i^2}{4i^2-1}$$" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "### Part 4.1" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "Implement this method using native Python" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "collapsed": false, 257 | "raises": "IndentationError" 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# Solution 4.1\n", 262 | "def py_wallis(n):\n", 263 | " # your solution goes here\n", 264 | "\n", 265 | "print(py_wallis(100000))" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "### Part 4.2" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "Implement this method using Numpy vectorization." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": { 286 | "collapsed": false, 287 | "raises": "IndentationError" 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "# Solution 4.2\n", 292 | "def np_wallis(n):\n", 293 | " # your solution goes here\n", 294 | "\n", 295 | "print(np_wallis(100000))" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "### Part 4.3" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "How much faster is the Numpy implementation?" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": { 316 | "collapsed": true 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "# Solution 4.3\n" 321 | ] 322 | } 323 | ], 324 | "metadata": { 325 | "celltoolbar": "Edit Metadata", 326 | "continuum": { 327 | "depends": [ 328 | "np_vectorization", 329 | "np_intro", 330 | "ip_essentials", 331 | "ip_datatypes", 332 | "ip_containers", 333 | "ip_functions" 334 | ], 335 | "requires": [], 336 | "tag": "np_ex_vectorization" 337 | }, 338 | "kernelspec": { 339 | "display_name": "Python 2", 340 | "language": "python", 341 | "name": "python2" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 2 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython2", 353 | "version": "2.7.11" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 0 358 | } 359 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/EX03_VectorizedOperations_soln.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Exercise: Numpy Vectorized Operations](#Exercise:-Numpy-Vectorized-Operations)\n", 9 | "\t* [Part 1: Timeit](#Part-1:-Timeit)\n", 10 | "\t* [Part 2: Ufuncs and Plotting](#Part-2:-Ufuncs-and-Plotting)\n", 11 | "\t\t* [Part 2.1](#Part-2.1)\n", 12 | "\t\t* [Part 2.2](#Part-2.2)\n", 13 | "\t* [Part 3: All Disappear](#Part-3:-All-Disappear)\n", 14 | "\t* [Part 4: Wallis Formula](#Part-4:-Wallis-Formula)\n", 15 | "\t\t* [Part 4.1](#Part-4.1)\n", 16 | "\t\t* [Part 4.2](#Part-4.2)\n", 17 | "\t\t* [Part 4.3](#Part-4.3)\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Exercise: Numpy Vectorized Operations" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Part 1: Timeit" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]` and with Numpy. \n", 50 | "\n", 51 | "Time how long it takes to multiply each sequence by `np.pi`." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "# Solution 1:\n", 63 | "\n", 64 | "list1 = [x for x in range(1000000)]\n", 65 | "array1 = np.arange(1000000)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "%%timeit\n", 77 | "list2 = [x*np.pi for x in list1]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "%%timeit\n", 89 | "array2 = np.pi*array1" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Part 2: Ufuncs and Plotting" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "import matplotlib.pyplot as plt\n", 108 | "%matplotlib inline" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Use numpy and matplotlib for the following:\n", 116 | "* **numpy** allows us to easily compute expressions like\n", 117 | "> $y=x^2$ using vectorized expression `y = x**2` where x is a numpy array\n", 118 | "\n", 119 | "* **matplotlib** lets us graph xy-values very quickly using: \n", 120 | "> `plt.plot(x, y)` where `x = [x1, x2, x3, ...]`, and `y = [y1, y2, y3, ...]` \n", 121 | "\n", 122 | "* Repeated `plt.plot` commands will go to the same graph. " 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Part 2.1" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "Graph the following functions on the interval [-2.0, 2.0):\n", 137 | "\n", 138 | " * $y=x + 1$\n", 139 | " * $y=e^x$\n", 140 | " * $y=cos(x^2) + sin(x^2)$\n", 141 | " * $y=cos(x)^2 + sin(x)^2$" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "# Solution 2.1:\n", 153 | "x = np.arange(-2,2, 0.01)\n", 154 | "\n", 155 | "y1 = x + 1\n", 156 | "y2 = np.exp(x)\n", 157 | "y3 = np.cos(x**2) + np.sin(x**2)\n", 158 | "y4 = np.cos(x)**2 + np.sin(x)**2\n", 159 | "\n", 160 | "plt.plot(x,y1,\n", 161 | " x,y2,\n", 162 | " x,y3,\n", 163 | " x,y4)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "### Part 2.2" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Graph a parametric equation over $t$ on $[0,2\\pi]$ defined by:\n", 178 | " \n", 179 | " * $y(t) = sin(t)$\n", 180 | " * $x(t) = cos(t)$\n", 181 | "\n", 182 | "You may want to issue a matplotlib statement: \n", 183 | "> `plot.axis(\"equal\")` \n", 184 | "\n", 185 | "to ensure you don't get a skewed perspective on your result." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "# Solution 2.2\n", 197 | "t = np.linspace(-np.pi, +np.pi, 1000)\n", 198 | "y = np.sin(t)\n", 199 | "x = np.cos(t)\n", 200 | "plt.plot(x,y)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "## Part 3: All Disappear" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "A. Suppose, *poof*, `arr.all()` (and `np.all()`) just disappeared. Write a function `myAll` that replaces them.\n", 215 | "\n", 216 | "B. Define a function `noneTrue` that returns `True` when no element of an array is `True` and `False` otherwise." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": false 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "# Reminder\n", 228 | "bool(1)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "collapsed": false 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "# Solution 3A: all_true == not_any_false\n", 240 | "\n", 241 | "def not_any_false(x):\n", 242 | " return not any(x==False)\n", 243 | "\n", 244 | "x1 = np.array([1,1])\n", 245 | "x2 = np.array([0,1])\n", 246 | "x3 = np.array([0,0])\n", 247 | "\n", 248 | "print( not_any_false(x1) )\n", 249 | "print( not_any_false(x2) )\n", 250 | "print( not_any_false(x3) )" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# Solution 3B: not_any_true\n", 262 | "\n", 263 | "def not_any_true(x):\n", 264 | " return not any(x==True)\n", 265 | "\n", 266 | "x1 = np.array([1,1])\n", 267 | "x2 = np.array([0,1])\n", 268 | "x3 = np.array([0,0])\n", 269 | "\n", 270 | "print( not_any_true(x1) )\n", 271 | "print( not_any_true(x2) )\n", 272 | "print( not_any_true(x3) ) " 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Part 4: Wallis Formula" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "The value of $\\pi$ can be computed with the Wallis formula, developed in 1655.\n", 287 | "\n", 288 | "$$\\pi=2\\prod_{i=1}^{\\infty}\\frac{4i^2}{4i^2-1}$$" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "### Part 4.1" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "Implement this method using native Python" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "# Solution 4.1\n", 314 | "def py_wallis(n):\n", 315 | " prod = 1.0\n", 316 | " for i in range(1,n):\n", 317 | " term1 = 4*(i**2)\n", 318 | " prod = prod * term1/(term1-1)\n", 319 | " return 2*prod\n", 320 | "\n", 321 | "print(py_wallis(100000))" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "### Part 4.2" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "Implement this method using Numpy vectorization." 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "collapsed": false 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "# Solution 4.2\n", 347 | "def np_wallis(n):\n", 348 | " x = np.arange(1,n)\n", 349 | " y = (4*x**2)/(4*x**2 - 1)\n", 350 | " z = 2.0*y.prod()\n", 351 | " return z\n", 352 | "\n", 353 | "print(np_wallis(100000))" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "### Part 4.3" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "How much faster is the Numpy implementation?" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "collapsed": true 375 | }, 376 | "outputs": [], 377 | "source": [ 378 | "# Solution 4.3" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": false 386 | }, 387 | "outputs": [], 388 | "source": [ 389 | "n = 1000\n", 390 | "%timeit pi = py_wallis(n)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": { 397 | "collapsed": false 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "n = 1000\n", 402 | "%timeit pi = np_wallis(n)" 403 | ] 404 | } 405 | ], 406 | "metadata": { 407 | "continuum": { 408 | "depends": [ 409 | "np_vectorization", 410 | "np_intro", 411 | "np_ex_vectorization", 412 | "ip_essentials", 413 | "ip_datatypes", 414 | "ip_containers", 415 | "ip_functions" 416 | ], 417 | "requires": [], 418 | "tag": "np_ex_vectorization_soln" 419 | }, 420 | "kernelspec": { 421 | "display_name": "Python 2", 422 | "language": "python", 423 | "name": "python2" 424 | }, 425 | "language_info": { 426 | "codemirror_mode": { 427 | "name": "ipython", 428 | "version": 2 429 | }, 430 | "file_extension": ".py", 431 | "mimetype": "text/x-python", 432 | "name": "python", 433 | "nbconvert_exporter": "python", 434 | "pygments_lexer": "ipython2", 435 | "version": "2.7.11" 436 | } 437 | }, 438 | "nbformat": 4, 439 | "nbformat_minor": 0 440 | } 441 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/broadcasting2D.lightbg.scaled-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/broadcasting2D.lightbg.scaled-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/ecosystem.lightbg.scaled-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/ecosystem.lightbg.scaled-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/mef_numpy_selection-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/mef_numpy_selection-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/mef_numpy_slice_01-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/mef_numpy_slice_01-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/mef_numpy_slice_02-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/mef_numpy_slice_02-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/numpyzerosdims-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/numpyzerosdims-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/img/ufunc.lightbg.scaled-noalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/ufunc.lightbg.scaled-noalpha.png -------------------------------------------------------------------------------- /2-NumPy_SciPy/NumPy/tmp/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/tmp/.keep -------------------------------------------------------------------------------- /2-NumPy_SciPy/README.md: -------------------------------------------------------------------------------- 1 | NumPy and SciPy tutorial materials -------------------------------------------------------------------------------- /2-NumPy_SciPy/SciPy/1_Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Solving mathematical problems with Scientific Python](#Solving-mathematical-problems-with-Scientific-Python)\n", 9 | "* [SciPy - Library of scientific algorithms for Python](#SciPy---Library-of-scientific-algorithms-for-Python)\n", 10 | "\t* [Introduction](#Introduction)\n", 11 | "\t* [Further Reading](#Further-Reading)\n", 12 | "\t* [Special functions](#Special-functions)\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Solving mathematical problems with Scientific Python" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# SciPy - Library of scientific algorithms for Python" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "This curriculum builds on material by J. Robert Johansson from his \"Introduction to scientific computing with Python,\" generously made available under a [Creative Commons Attribution 3.0 Unported License](http://creativecommons.org/licenses/by/3.0/) at https://github.com/jrjohansson/scientific-python-lectures. The Continuum Analytics enhancements use the [Creative Commons Attribution-NonCommercial 4.0 International License](https://creativecommons.org/licenses/by-nc/4.0/).\n", 34 | "\n", 35 | "****\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Introduction" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "The SciPy framework builds on top of the low-level NumPy framework for multidimensional arrays, and provides a large number of higher-level scientific algorithms. Some of the topics that SciPy covers are:\n", 50 | "\n", 51 | "* Special functions ([scipy.special](http://docs.scipy.org/doc/scipy/reference/special.html))\n", 52 | "* Integration ([scipy.integrate](http://docs.scipy.org/doc/scipy/reference/integrate.html))\n", 53 | "* Optimization ([scipy.optimize](http://docs.scipy.org/doc/scipy/reference/optimize.html))\n", 54 | "* Interpolation ([scipy.interpolate](http://docs.scipy.org/doc/scipy/reference/interpolate.html))\n", 55 | "* Fourier Transforms ([scipy.fftpack](http://docs.scipy.org/doc/scipy/reference/fftpack.html))\n", 56 | "* Signal Processing ([scipy.signal](http://docs.scipy.org/doc/scipy/reference/signal.html))\n", 57 | "* Linear Algebra ([scipy.linalg](http://docs.scipy.org/doc/scipy/reference/linalg.html))\n", 58 | "* Sparse Eigenvalue Problems ([scipy.sparse](http://docs.scipy.org/doc/scipy/reference/sparse.html))\n", 59 | "* Statistics ([scipy.stats](http://docs.scipy.org/doc/scipy/reference/stats.html))\n", 60 | "* Multi-dimensional image processing ([scipy.ndimage](http://docs.scipy.org/doc/scipy/reference/ndimage.html))\n", 61 | "* File IO ([scipy.io](http://docs.scipy.org/doc/scipy/reference/io.html))\n", 62 | "\n", 63 | "Each of these submodules provides a number of functions and classes that can be used to solve problems in their respective topics.\n", 64 | "\n", 65 | "In this tutorial, we will look at how to use some of these subpackages." 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## Further Reading" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "* http://www.scipy.org - The official web page for the SciPy project.\n", 80 | "* http://docs.scipy.org/doc/scipy/reference/tutorial/index.html - A tutorial on how to get started using SciPy. \n", 81 | "* https://github.com/scipy/scipy/ - The SciPy source code." 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "If we only need to use part of the SciPy framework, we can selectively include only those modules we are interested in. For example, to include the linear algebra package under the name `la`, we can do:\n", 89 | "\n", 90 | "```python\n", 91 | "import scipy.linalg as la\n", 92 | "```" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "While running `from scipy import *` may be convenient it is not recommended particularly because this will also import all of Numpy into the global namespace." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Special functions" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "A large number of mathematical special functions are important for many computional physics problems. SciPy provides implementations of a very extensive set of special functions. For details, see the list of functions in the reference documentation at http://docs.scipy.org/doc/scipy/reference/special.html#module-scipy.special. \n", 114 | "\n", 115 | "To demonstrate the typical usage of special functions, we will look in more detail at the Bessel functions:" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "#\n", 127 | "# The scipy.special module includes a large number of Bessel functions\n", 128 | "# Here we will use the functions jn and yn, which are the Bessel functions \n", 129 | "# of the first and second kind and real-valued order. We also include the \n", 130 | "# function jn_zeros and yn_zeros that gives the zeroes of the functions jn\n", 131 | "# and yn. Bessel functions are useful in partial differential equations,\n", 132 | "# like the wave equation.\n", 133 | "from scipy.special import jn, yn, jn_zeros, yn_zeros\n", 134 | "import numpy as np\n", 135 | "import matplotlib.pyplot as plt\n", 136 | "%matplotlib inline" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "collapsed": false 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "n = 0 # order\n", 148 | "x = 0.0\n", 149 | "\n", 150 | "# Bessel function of first kind\n", 151 | "print(\"J_%d(%f) = %f\" % (n, x, jn(n, x)))\n", 152 | "\n", 153 | "x = 1.0\n", 154 | "# Bessel function of second kind\n", 155 | "print(\"Y_%d(%f) = %f\" % (n, x, yn(n, x)))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "x = np.linspace(0, 10, 100)\n", 167 | "\n", 168 | "fig, ax = plt.subplots()\n", 169 | "for n in range(4):\n", 170 | " ax.plot(x, jn(n, x), label=r\"$J_%d(x)$\" % n)\n", 171 | "ax.legend();" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "# zeros of Bessel functions\n", 183 | "n = 0 # order\n", 184 | "m = 4 # number of roots to compute\n", 185 | "jn_zeros(n, m)" 186 | ] 187 | } 188 | ], 189 | "metadata": { 190 | "anaconda-cloud": {}, 191 | "continuum": { 192 | "depends": [ 193 | "np_join" 194 | ], 195 | "tag": "math_intro" 196 | }, 197 | "kernelspec": { 198 | "display_name": "Python [conda env:python3]", 199 | "language": "python", 200 | "name": "conda-env-python3-py" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.5.2" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 0 217 | } 218 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/SciPy/2_Integration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "* [Integration](#Integration)\n", 10 | "\t* [Numerical integration: quadrature](#Numerical-integration:-quadrature)\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Learning Objectives:" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "After completion of this module, learners should be able to:\n", 25 | "\n", 26 | "* compute numerical integrals (quadrature) and solutions of initial-value ordinary differential equations" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Integration" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Numerical integration: quadrature" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "Numerical evaluation of a function of the type\n", 48 | "\n", 49 | "$\\displaystyle \\int_a^b f(x) dx$\n", 50 | "\n", 51 | "is called *numerical quadrature*, or simply *quadrature*. SciPy provides a series of functions for different kind of quadrature, for example the `quad`, `dblquad` and `tplquad` for single, double and triple integrals, respectively.\n", 52 | "\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "from scipy import Inf\n", 64 | "from scipy.special import jn, yn, jn_zeros, yn_zeros\n", 65 | "from scipy.integrate import quad, dblquad, tplquad\n", 66 | "import numpy as np" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "The `quad` function takes a large number of optional arguments which can be used to fine-tune the behavior of the function (try `help(quad)` for details).\n", 74 | "\n", 75 | "The basic usage is as follows:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "# define a simple function for the integrand\n", 87 | "def f(x):\n", 88 | " return x" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "x_lower = 0 # the lower limit of x\n", 100 | "x_upper = 1 # the upper limit of x\n", 101 | "\n", 102 | "val, abserr = quad(f, x_lower, x_upper)\n", 103 | "\n", 104 | "print(\"integral value =\", val, \", absolute error =\", abserr )" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "If we need to pass extra arguments to the integrand function, we can use the `args` keyword argument:" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "def integrand(x, n):\n", 123 | " \"\"\"\n", 124 | " Bessel function of first kind and order n. \n", 125 | " \"\"\"\n", 126 | " return jn(n, x)\n", 127 | "\n", 128 | "\n", 129 | "x_lower = 0 # the lower limit of x\n", 130 | "x_upper = 10 # the upper limit of x\n", 131 | "\n", 132 | "val, abserr = quad(integrand, x_lower, x_upper, args=(3,))\n", 133 | "\n", 134 | "print(val, abserr)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "For simple functions, we can use a lambda function (nameless function) instead of explicitly defining a function for the integrand:" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "val, abserr = quad(lambda x: np.exp(-x ** 2), -Inf, Inf)\n", 153 | "\n", 154 | "print(\"numerical =\", val, abserr)\n", 155 | "\n", 156 | "analytical = np.sqrt(np.pi)\n", 157 | "print(\"analytical =\", analytical)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "As shown in the example above, we can also use 'Inf' or '-Inf' as integral limits.\n", 165 | "\n", 166 | "Higher-dimensional integration works in the same way:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "def integrand(x, y):\n", 178 | " return np.exp(-x**2-y**2)\n", 179 | "\n", 180 | "x_lower = 0 \n", 181 | "x_upper = 10\n", 182 | "y_lower = 0\n", 183 | "y_upper = 10\n", 184 | "\n", 185 | "val, abserr = dblquad(integrand, x_lower, x_upper, lambda x : y_lower, lambda x: y_upper)\n", 186 | "\n", 187 | "print(val, abserr)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "Note how we had to pass lambda functions for the limits for the y integration, since these in general can be functions of x." 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "continuum": { 200 | "depends": [ 201 | "math_intro" 202 | ], 203 | "tag": "math_integration" 204 | }, 205 | "kernelspec": { 206 | "display_name": "Python [conda env:python3]", 207 | "language": "python", 208 | "name": "conda-env-python3-py" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.5.2" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 0 225 | } 226 | -------------------------------------------------------------------------------- /2-NumPy_SciPy/SciPy/3_FFT.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "* [Fourier transform](#Fourier-transform)\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Learning Objectives:" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "After completion of this module, learners should be able to:\n", 24 | "\n", 25 | "* figure out how to apply Python library functions for statistical tests, for special functions, and for integral transforms (e.g., FFTs)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Fourier transform" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Fourier transforms are one of the universal tools in computational physics; they appear over and over again in different contexts. SciPy provides functions for accessing the classic [FFTPACK](http://www.netlib.org/fftpack/) library from NetLib, an efficient and well tested FFT library written in FORTRAN. The SciPy API has a few additional convenience functions, but overall the API is closely related to the original FORTRAN library.\n", 40 | "\n", 41 | "To use the `fftpack` module in a python program, include it using:" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import numpy as np\n", 53 | "import scipy.fftpack as fft\n", 54 | "import matplotlib.pyplot as plt\n", 55 | "%matplotlib inline" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "To demonstrate how to do a fast Fourier transform with SciPy, let's look at the FFT of the solution to the damped oscillator from the previous section:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "from scipy.integrate import odeint\n", 74 | "def dy(y, t, zeta, w0):\n", 75 | " \"\"\"\n", 76 | " The right-hand side of the damped oscillator ODE\n", 77 | " \"\"\"\n", 78 | " x, p = y[0], y[1]\n", 79 | " \n", 80 | " dx = p\n", 81 | " dp = -2 * zeta * w0 * p - w0**2 * x\n", 82 | "\n", 83 | " return [dx, dp]\n", 84 | "y0 = [1.0, 0.0]\n", 85 | "t = np.linspace(0, 10, 1000)\n", 86 | "w0 = 2*np.pi*1.0\n", 87 | "y1 = odeint(dy, y0, t, args=(0.0, w0)) # undamped\n", 88 | "y2 = odeint(dy, y0, t, args=(0.2, w0)) # under damped\n", 89 | "y3 = odeint(dy, y0, t, args=(1.0, w0)) # critial damping\n", 90 | "y4 = odeint(dy, y0, t, args=(5.0, w0)) # over damped" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "fig, ax = plt.subplots()\n", 102 | "ax.plot(t, y1[:,0], 'k', label=\"undamped\", linewidth=0.25)\n", 103 | "ax.plot(t, y2[:,0], 'r', label=\"under damped\")\n", 104 | "ax.plot(t, y3[:,0], 'b', label=r\"critical damping\")\n", 105 | "ax.plot(t, y4[:,0], 'g', label=\"over damped\")\n", 106 | "ax.legend();" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "N = len(t)\n", 118 | "dt = t[1]-t[0]\n", 119 | "\n", 120 | "# calculate the fast fourier transform\n", 121 | "# y2 is the solution to the under-damped oscillator from the previous section\n", 122 | "F = fft.fft(y2[:,0]) \n", 123 | "\n", 124 | "# calculate the frequencies for the components in F\n", 125 | "w = fft.fftfreq(N, dt)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "collapsed": false 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "fig, ax = plt.subplots(figsize=(9,3))\n", 137 | "ax.plot(w, abs(F));" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Since the signal is real, the spectrum is symmetric. We therefore only need to plot the part that corresponds to the postive frequencies. To extract that part of the `w` and `F`, we can use some of the indexing tricks for NumPy arrays we saw in Lecture 2:" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "# select only indices for elements that corresponds to positive frequencies\n", 156 | "indices = np.where(w > 0) \n", 157 | "w_pos = w[indices]\n", 158 | "F_pos = F[indices]" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "collapsed": false 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "fig, ax = plt.subplots(figsize=(9,3))\n", 170 | "ax.plot(w_pos, abs(F_pos))\n", 171 | "ax.set_xlim(0, 5);" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "As expected, we now see a peak in the spectrum that is centered around 1, which is the frequency we used in the damped oscillator example." 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "continuum": { 184 | "depends": [ 185 | "math_intro" 186 | ], 187 | "tag": "math_fft" 188 | }, 189 | "kernelspec": { 190 | "display_name": "Python [conda env:python3]", 191 | "language": "python", 192 | "name": "conda-env-python3-py" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": { 196 | "name": "ipython", 197 | "version": 3 198 | }, 199 | "file_extension": ".py", 200 | "mimetype": "text/x-python", 201 | "name": "python", 202 | "nbconvert_exporter": "python", 203 | "pygments_lexer": "ipython3", 204 | "version": "3.5.2" 205 | } 206 | }, 207 | "nbformat": 4, 208 | "nbformat_minor": 0 209 | } 210 | -------------------------------------------------------------------------------- /3-Analytics/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/README.md -------------------------------------------------------------------------------- /3-Analytics/pandas/EX01_ExcelFiles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives](#Learning-Objectives)\n", 9 | "* [Pandas Exercise 3: Relational Normalization](#Pandas-Exercise-3:-Relational-Normalization)\n", 10 | "\t* [Background on Reading Excel](#Background-on-Reading-Excel)\n", 11 | "\t* [Background on Relational Normalization](#Background-on-Relational-Normalization)\n", 12 | "\t* [Background on Categorical Data](#Background-on-Categorical-Data)\n", 13 | "\t* [Set-up](#Set-up)\n", 14 | "\t* [Part 1: Read the data](#Part-1:-Read-the-data)\n", 15 | "\t* [Part 2: Normalize](#Part-2:-Normalize)\n", 16 | "\t* [Part 3: Create a Sqlite3 database](#Part-3:-Create-a-Sqlite3-database)\n", 17 | "\t* [Part 4: Compare file sizes](#Part-4:-Compare-file-sizes)\n", 18 | "\t* [Part 5: Optional](#Part-5:-Optional)\n", 19 | "\t* [Part 6: Optional](#Part-6:-Optional)\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Learning Objectives" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "After completion of this module, learners should be able to:\n", 34 | "* list various python modules used for reading Excel files\n", 35 | "* read an Excel data file into a pandas DataFrame\n", 36 | "* use categorials and other techniques to reduce data size\n", 37 | "* use pandas to convert an Excel file into an Sqlite database file" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "# Pandas Exercise 3: Relational Normalization" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Background on Reading Excel" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "There are several 3rd party Python modules for working with Microsoft Excel spreadsheets. A list of them is collected at:\n", 59 | "\n", 60 | "* [Working with Excel Files in Python](http://www.python-excel.org/)\n", 61 | "\n", 62 | "I've used [openpyxl](https://openpyxl.readthedocs.org/en/latest/) successfully in some projects.\n", 63 | "\n", 64 | "However, within the Scientific Python toolstack, the most common way of accessing the Excel format is the [Pandas](http://pandas.pydata.org/) framework. This is heavier weight than other options if all you wanted to do was read Excel, but in a scientific context, you already need most of the requirements (NumPy, etc), and you probably want to be using Pandas for numerous other purposes anyway.\n", 65 | "\n", 66 | "Pandas relies internally uses `xlrd` to read Excel files, but provides a higher-level wrapper. You probably need to run:\n", 67 | "\n", 68 | "```bash\n", 69 | "conda install xlrd\n", 70 | "```" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "## Background on Relational Normalization" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Description from [Wikipedia](https://en.wikipedia.org/wiki/Database_normalization):\n", 85 | "> *Database normalization ... is the process of organizing the columns (attributes) and tables (relations) of a relational database to minimize data redundancy. Normalization involves decomposing a table into less redundant tables without losing information*" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "## Background on Categorical Data" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Description from the [documentation](https://pandas-docs.github.io/pandas-docs-travis/categorical.html):\n", 100 | "\n", 101 | "> *Categoricals are a pandas data type, which correspond to categorical variables in statistics: **a variable, which can take on only a limited, and usually fixed, number of possible values** (categories; levels in R). Examples are gender, social class, blood types, country affiliations, observation time or ratings via Likert scales.*" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "# Categorical example: notice the counts for each category\n", 113 | "import pandas as pd\n", 114 | "s = pd.Series(pd.Categorical([\"a\",\"b\",\"c\",\"c\",\"e\"], categories=[\"c\",\"a\",\"b\",\"d\"]))\n", 115 | "\n", 116 | "s.value_counts()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "# Categorical example: notice the NaN for a value that did not match any category\n", 128 | "\n", 129 | "s" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Set-up" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "collapsed": false 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "## Optional: Uncomment to install the python module `xlrd` for reading Excel files\n", 148 | "## Recommendation: use the built-in pandas methods instead.\n", 149 | "\n", 150 | "# !conda install -y xlrd" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "# Required: imports needed in this exercise\n", 162 | "%matplotlib inline\n", 163 | "import pandas as pd" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## Part 1: Read the data" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Read the NYC Harbor data from the excel data file ``data/nyc_harbor_wq_2006-2014.xlsx`` into DataFrame.\n", 178 | "\n", 179 | "*Note: This Excel file is roughly 24 MB in size, contining 300k rows of largely categorical data. It may take some time to load...*" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false, 187 | "scrolled": true 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# Solution:\n" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "## Part 2: Normalize" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "A large fraction of all values in a given column are duplicates.\n", 206 | "* Use the unique `STATION` values as categories to reduce data duplication stored in memory" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "# Solution:\n" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "## Part 3: Create a Sqlite3 database" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "Using the NYC Harbor data set, create an Sqlite3 single-file database containing all of the data inside the spreadsheet.\n", 232 | "\n", 233 | "* Store the data in its native types per column/cell (Pandas does a good job of inferring data types)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "#Solution\n" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## Part 4: Compare file sizes" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Write code that compares the file size of the resulting sqlite3 file compared to the original Excel file." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "#Solution\n" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "## Part 5: Optional" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "Compose some interesting queries of the database to extract patterns or features of the data." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "collapsed": true 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "#Solution\n" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "## Part 6: Optional" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "If you have access configured, try the exercise using a general purpose RDBMS, such as MySQL, PostgreSQL, SQL Server, etc." 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "Related to the normalization, we might notice that our Pandas `DataFrame` itself is inefficient for the same reasons that normalization is desirable. A large number of copies of the same strings are stored within the same column `Series` objects. Moreover, in many cases what is stored are strings which need to be stored as Python objects, and processed much more slowly and indirectly than with basic numeric types that leverage their underlying `numpy` arrays. We can improve this quite a bit." 316 | ] 317 | } 318 | ], 319 | "metadata": { 320 | "anaconda-cloud": {}, 321 | "continuum": { 322 | "depends": [ 323 | "pd_intro", 324 | "pd_data_io" 325 | ], 326 | "requires": [ 327 | "data/nyc_harbor_wq_2006-2014.xlsx" 328 | ], 329 | "tag": "pd_ex_excel" 330 | }, 331 | "kernelspec": { 332 | "display_name": "Python [conda env:python3]", 333 | "language": "python", 334 | "name": "conda-env-python3-py" 335 | }, 336 | "language_info": { 337 | "codemirror_mode": { 338 | "name": "ipython", 339 | "version": 3 340 | }, 341 | "file_extension": ".py", 342 | "mimetype": "text/x-python", 343 | "name": "python", 344 | "nbconvert_exporter": "python", 345 | "pygments_lexer": "ipython3", 346 | "version": "3.5.2" 347 | } 348 | }, 349 | "nbformat": 4, 350 | "nbformat_minor": 0 351 | } 352 | -------------------------------------------------------------------------------- /3-Analytics/pandas/EX02_weather.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Timeseries with weather data](#Timeseries-with-weather-data)\n", 9 | "\t* [EX1](#EX1)\n", 10 | "\t* [EX2](#EX2)\n", 11 | "\t* [EX3](#EX3)\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Timeseries with weather data" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "Weather data has been downloaded from [Wunderground](http://www.wunderground.com/history) and stored in `data/pittsburgh2013.csv`." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import pandas as pd\n", 37 | "import numpy as np\n", 38 | "%matplotlib inline" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "df = pd.read_csv('data/pittsburgh2013.csv', parse_dates=['Date'], index_col='Date')\n", 50 | "df.head()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## EX1" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Compute the average precipitation for each month." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# your solution here" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## EX2" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Which month got the most precipitation?" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "# your solution here" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## EX3" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "Use the *Events* column to determine how many days a **Snow** event occurred in each month.\n", 115 | " * Notice that events may be hyphenated, e.g. **Snow-Rain**." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "# your solution here" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "anaconda-cloud": {}, 132 | "continuum": { 133 | "depends": [ 134 | "pd_intro", 135 | "pd_series" 136 | ], 137 | "requires": [ 138 | "data/pittsburgh2013.csv" 139 | ], 140 | "tag": "pd_ex_weather" 141 | }, 142 | "kernelspec": { 143 | "display_name": "Python [conda env:python3]", 144 | "language": "python", 145 | "name": "conda-env-python3-py" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.5.2" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 0 162 | } 163 | -------------------------------------------------------------------------------- /3-Analytics/pandas/data/201509-citibike-tripdata.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/data/201509-citibike-tripdata.csv.gz -------------------------------------------------------------------------------- /3-Analytics/pandas/data/beer2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/data/beer2.csv.gz -------------------------------------------------------------------------------- /3-Analytics/pandas/data/nyc_harbor_wq_2006-2014.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/data/nyc_harbor_wq_2006-2014.xlsx -------------------------------------------------------------------------------- /3-Analytics/pandas/img/pydata_stack_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/img/pydata_stack_model.png -------------------------------------------------------------------------------- /3-Analytics/sklearn/1_Intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "* [Machine Learning with Scikit Learn](#Machine-Learning-with-Scikit-Learn)\n", 10 | "\t* [API and Terminology](#API-and-Terminology)\n", 11 | "\t\t* [Scikit Learn modules](#Scikit-Learn-modules)\n", 12 | "\t* [Chosing an Estimator](#Chosing-an-Estimator)\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Learning Objectives:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "After completion of this module, learners should be able to:\n", 27 | "\n", 28 | "* Understand and explain estimators, models and scoring metrics\n", 29 | "* Import scikit-learn modules" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "# Machine Learning with Scikit Learn" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "`scikit-learn` is an open source Machine Learning Toolkit built on Numpy and Scipy. Methods available in `scikit-learn` can be used for supervised and unsupervised learning. Among the many features of `scikit-learn` are\n", 44 | "\n", 45 | "* classification\n", 46 | "* regression\n", 47 | "* clustering\n", 48 | "* support vector machines\n", 49 | "* random forests\n", 50 | "* gradient boosting\n", 51 | "* k-means\n", 52 | "* DBSCAN\n", 53 | "\n", 54 | "The [User Guide](http://scikit-learn.org/stable/user_guide.html) and [Documentation](http://scikit-learn.org/stable/documentation.html) are the best place to learn how to use the methods available in `scikit-learn` and there are several [tutorals avilable online](http://scikit-learn.org/stable/tutorial/index.html)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "This course will provide an introduction to `sklearn` with a focus on highlighting how the methods work together to understand the performance of a given model." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## API and Terminology" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "While the following definitions may be the most widely accepted in the fields of Machine Learning and Statistics, they are useful to help understand the `sklearn` modules and API\n", 76 | "\n", 77 | "* **estimator**: A method used to make a prediction for supervised and unsupervised learning\n", 78 | " * **classifier**: An estimator with a discrete response to input data. *Assign a label to each data point.* Classifiers implement a `fit` member function.\n", 79 | " * **regressor**: An estimator with a continuous response to input data. *Predict output value of each data point.*\n", 80 | " * **cluster**: Performs clustering of input data. *Discover grouping within the data set.*\n", 81 | " * **transformer**: Transforms input data according to a set of requirements. *Preprocess data to have zero mean and unit variance*\n", 82 | "* **model**: Nearly synonymous with **estimator**. A **model** may be a more concrete instance of an **estimator**.\n", 83 | "* **metric**: A set of scores given to a **model** or **estimator** to indicate its accuracy. *Estimators for supervised learning implement a `score` member function.*" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "### Scikit Learn modules" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Each of the following modules must be individually imported. The modules listed here include **estimators** and higher-level methods to perform operations such as cross validation, grid search and pipelining." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "import sklearn\n", 109 | "sklearn.__all__" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false, 117 | "scrolled": true 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "import sklearn.cluster\n", 122 | "help(sklearn.cluster)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## Chosing an Estimator" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "See the [Scikit Learn Flowchart](http://scikit-learn.org/stable/tutorial/machine_learning_map/)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "source": [ 145 | "As shown in the flowchart, the algorithms in scikit-learn mainly fall into:\n", 146 | " \n", 147 | "* Classification - Predicting the label or class membership of observation\n", 148 | "* Dimensionality reduction (Principle component analysis, independent component analysis)\n", 149 | "* Regression - Predicting a continuous response variable rather than class membership\n", 150 | "* Clustering - Unsupervised algorithms grouping similar observations" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "In the scikit-learn notebooks we work algorithms from each of these groups." 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "collapsed": true 165 | }, 166 | "outputs": [], 167 | "source": [] 168 | } 169 | ], 170 | "metadata": { 171 | "anaconda-cloud": {}, 172 | "continuum": { 173 | "depends": [], 174 | "tag": "ml_intro" 175 | }, 176 | "kernelspec": { 177 | "display_name": "Python [conda env:python3]", 178 | "language": "python", 179 | "name": "conda-env-python3-py" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.5.2" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 0 196 | } 197 | -------------------------------------------------------------------------------- /3-Analytics/sklearn/2_KNN_and_Validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives](#Learning-Objectives)\n", 9 | "* [Importing our libraries](#Importing-our-libraries)\n", 10 | "\t* [Some Simple Data](#Some-Simple-Data)\n", 11 | "\t* [A Simple kNN Classifier](#A-Simple-kNN-Classifier)\n", 12 | "\t* [Simple Evaluation](#Simple-Evaluation)\n", 13 | "\t* [Visualization using two features](#Visualization-using-two-features)\n", 14 | "\t* [Exercise (exploring grid_step and number of neighbors)](#Exercise-%28exploring-grid_step-and-number-of-neighbors%29)\n", 15 | "* [Simple Comparison](#Simple-Comparison)\n", 16 | "* [Synthetic Datasets](#Synthetic-Datasets)\n", 17 | "\t* [make_blobs](#make_blobs)\n", 18 | "\t* [make_classification](#make_classification)\n", 19 | "* [Downloading Common Datasets](#Downloading-Common-Datasets)\n", 20 | "\t* [Exercise](#Exercise)\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Learning Objectives:" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "After completion of this module, learners should be able to:\n", 35 | "\n", 36 | "* Explain what KNN classification and logistic regression are\n", 37 | "* Apply the KNN classifier\n", 38 | "* Develop training/testing sets and perform model validation.\n", 39 | "\n", 40 | "\n", 41 | "* Work with primary component analysis and support vector machines.\n", 42 | "* Compare optimization and curve fitting techniques.\n", 43 | "\n", 44 | "K-Nearest neighbor algorithms fall into regression and classification. In classification, a K-nearest neighbor method uses local vote counts for class membership based on K nearest neighbors considered. A K==1 model considers only the nearest neighbor.\n", 45 | "\n", 46 | "Logistic regression is fitting a logistic distribution to continuous data to model a binomial or multinomial response. An example, described [here](https://en.wikipedia.org/wiki/Logistic_regression), is a logistic regression that predicts probability of success/failure on an exam given observations of passing/failing and the hours studied in advance.\n", 47 | " " 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "# Importing our libraries" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "import numpy as np\n", 66 | "import pandas as pd\n", 67 | "import matplotlib.pyplot as plt\n", 68 | "from sklearn import (cross_validation, datasets,\n", 69 | " decomposition,\n", 70 | " grid_search, linear_model, \n", 71 | " neighbors, metrics)\n", 72 | "%matplotlib inline" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "## Some Simple Data" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": false, 87 | "scrolled": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "iris = datasets.load_iris()\n", 92 | "examples = iris.data\n", 93 | "classes = iris.target\n", 94 | "print(iris.DESCR)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": false, 102 | "scrolled": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# Let's take a look at the \"shape\" of the data\n", 107 | "df_iris = pd.DataFrame(iris.data, columns=iris.feature_names)\n", 108 | "df_iris['species'] = iris.target\n", 109 | "df_iris['species_name'] = [iris.target_names[i] for i in iris.target]\n", 110 | "df_iris" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "# Create a training and a testing set from this data by choosing indices\n", 122 | "# (wait a few cells for a better API)\n", 123 | "\n", 124 | "# Random order of indices\n", 125 | "n_examples = len(examples)\n", 126 | "shuffled_indices = np.random.permutation(n_examples)\n", 127 | "\n", 128 | "# Pick a trainig/testing split\n", 129 | "train_pct = 0.8\n", 130 | "train_ct = int(n_examples * train_pct)\n", 131 | "\n", 132 | "# Select indices for training and testing\n", 133 | "train_idx, test_idx = shuffled_indices[:train_ct], shuffled_indices[train_ct:]\n", 134 | "train_idx, test_idx" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## A Simple kNN Classifier" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "## Simple Evaluation" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": false 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "knn5.fit(examples[train_idx], classes[train_idx])\n", 171 | "predictions = knn5.predict(examples[test_idx])\n", 172 | "print(metrics.accuracy_score(predictions, classes[test_idx]))" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Visualization using two features" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "datasets.make_classification?" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "# the punch line is to predict for a large grid of data points\n", 202 | "# http://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html\n", 203 | "def KNN_2D_map(twodim):\n", 204 | " grid_step = 0.1\n", 205 | " knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)\n", 206 | " knn5.fit(twodim, classes)\n", 207 | "\n", 208 | " # create testing data points on the standard \n", 209 | " # Cartesian grid (over our data range)\n", 210 | " # to color the background\n", 211 | " maxes = np.max(twodim, axis=0) + 2*grid_step\n", 212 | " mins = np.min(twodim, axis=0) - grid_step\n", 213 | "\n", 214 | " xs,ys = np.mgrid[mins[0]:maxes[0]:grid_step, \n", 215 | " mins[1]:maxes[1]:grid_step]\n", 216 | " grid_points = np.c_[xs.ravel(), ys.ravel()]\n", 217 | "\n", 218 | " p = knn5.predict(grid_points)\n", 219 | "\n", 220 | " # plot the predictions at the grid points\n", 221 | " fig = plt.figure(figsize=(10,5))\n", 222 | " ax = fig.gca()\n", 223 | " ax.pcolormesh(xs,ys,p.reshape(xs.shape))\n", 224 | "\n", 225 | " ax.set_xlim(mins[0], maxes[0]-grid_step)\n", 226 | " ax.set_ylim(mins[1], maxes[1]-grid_step)\n", 227 | " \n", 228 | "twodim = examples[:,:2] # select first two features\n", 229 | "KNN_2D_map(twodim)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": { 236 | "collapsed": false 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "twodim2 = examples[:,2:] # choose different features\n", 241 | "KNN_2D_map(twodim2)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## Exercise (exploring grid_step and number of neighbors)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Quick question: why did we add an extra `grid_step` value to the maxes, above?\n", 256 | "\n", 257 | "Investigate what happens to the decision boundary as we raise or lower the number of neighbors? You could start answering this trying a range of neighbor values: $k=3,5,10,15$. Could the `grid_step` parameter mislead us, if we aren't paying close attention?" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "# Simple Comparison" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "We'll learn about a more efficient comparison method in the next section" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "collapsed": false 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)\n", 283 | "logreg = linear_model.LogisticRegression()" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "collapsed": false 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "knn5.fit(examples[train_idx], classes[train_idx])\n", 295 | "logreg.fit(examples[train_idx], classes[train_idx])\n", 296 | "\n", 297 | "lr_preds = logreg.predict(examples[test_idx])\n", 298 | "knn5_preds = knn5.predict(examples[test_idx])\n", 299 | "\n", 300 | "for preds in [lr_preds, knn5_preds]:\n", 301 | " print(metrics.accuracy_score(preds, classes[test_idx]))" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "# Synthetic Datasets" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "## make_blobs" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "`sklearn.datasets.make_blobs(n_samples=100, \n", 323 | " n_features=2,\n", 324 | " centers=3, # number of classes\n", 325 | " cluster_std=1.0) # shared -or- class-by-class`" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": false 333 | }, 334 | "outputs": [], 335 | "source": [ 336 | "x, y = datasets.make_blobs(n_samples=50)\n", 337 | "plt.scatter(x[:,0], x[:,1], c=y, s=50)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "## make_classification" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": {}, 350 | "source": [ 351 | "`sklearn.datasets.make_classification()`\n", 352 | "\n", 353 | "Many, many arguments. See: http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html\n", 354 | "\n", 355 | "For examples, see: http://scikit-learn.org/stable/auto_examples/datasets/plot_random_dataset.html" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "collapsed": false 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "x,y = datasets.make_classification(n_features=2, n_redundant=0, n_informative=2,\n", 367 | " n_clusters_per_class=1, n_classes=3)\n", 368 | "plt.scatter(x[:, 0], x[:, 1], c=y, s=50);" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "# Downloading Common Datasets" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": { 382 | "collapsed": false 383 | }, 384 | "outputs": [], 385 | "source": [ 386 | "iris_dwn_1 = datasets.fetch_mldata('iris', data_home=\"./data\")\n", 387 | "print(iris_dwn_1.data.shape)\n", 388 | "print(iris_dwn_1.target.shape)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": { 395 | "collapsed": false 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "iris_dwn_2 = datasets.fetch_mldata('datasets-UCI Iris',\n", 400 | " target_name='class', \n", 401 | " data_name='double0',\n", 402 | " data_home=\"./data\")\n", 403 | "print(iris_dwn_2.data.shape)\n", 404 | "print(iris_dwn_2.target.shape)" 405 | ] 406 | } 407 | ], 408 | "metadata": { 409 | "anaconda-cloud": {}, 410 | "continuum": { 411 | "depends": [], 412 | "requires": [ 413 | "data/wine.csv" 414 | ], 415 | "tag": "ml_knn" 416 | }, 417 | "kernelspec": { 418 | "display_name": "Python [conda env:python3]", 419 | "language": "python", 420 | "name": "conda-env-python3-py" 421 | }, 422 | "language_info": { 423 | "codemirror_mode": { 424 | "name": "ipython", 425 | "version": 3 426 | }, 427 | "file_extension": ".py", 428 | "mimetype": "text/x-python", 429 | "name": "python", 430 | "nbconvert_exporter": "python", 431 | "pygments_lexer": "ipython3", 432 | "version": "3.5.2" 433 | } 434 | }, 435 | "nbformat": 4, 436 | "nbformat_minor": 0 437 | } 438 | -------------------------------------------------------------------------------- /3-Analytics/sklearn/3_ModelComparison.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "* [Model Comparison](#Model-Comparison)\n", 10 | "\t* [Exercise](#Exercise)\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Learning Objectives:" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "After completion of this module, learners should be able to:\n", 25 | "\n", 26 | "* Perform model comparisons using CV scores\n", 27 | "\n", 28 | "We compare K-nearest neighbors classifiers, logistic regression, and a decision tree classifier. Background material on these classifiers can be found at:\n", 29 | "* [nearest neighbors](http://scikit-learn.org/stable/modules/neighbors.html)\n", 30 | "* [logistic regression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n", 31 | "* [decision trees](http://scikit-learn.org/stable/modules/tree.html)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# Model Comparison" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "# We saw some model comparison earlier. Let's add a few more models\n", 50 | "# (http://scikit-learn.org/stable/auto_examples/plot_classifier_comparison.html)\n", 51 | "%matplotlib inline\n", 52 | "from sklearn.tree import DecisionTreeClassifier\n", 53 | "from sklearn import neighbors, linear_model,cross_validation, datasets\n", 54 | "import matplotlib.pyplot as plt" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "iris = datasets.load_iris()\n", 66 | "examples = iris.data\n", 67 | "classes = iris.target\n", 68 | "n_examples = len(examples)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "knn20 = neighbors.KNeighborsClassifier(n_neighbors=20)\n", 80 | "knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)\n", 81 | "logreg = linear_model.LogisticRegression()\n", 82 | "dectree = DecisionTreeClassifier()\n", 83 | "\n", 84 | "modelsAndMarker = [(knn20, 'x', 'knn20'), (knn5, 'o', 'knn5'), \n", 85 | " (logreg, '^', 'logreg'), (dectree, '.', 'dectree')]\n", 86 | "\n", 87 | "for mod, marker, label in modelsAndMarker:\n", 88 | " k_fold = cross_validation.KFold(n_examples, n_folds=10)\n", 89 | " cv_scores = cross_validation.cross_val_score(mod, examples, classes, \n", 90 | " cv=k_fold, \n", 91 | " scoring='accuracy', \n", 92 | " n_jobs=-1) # all CPUs \n", 93 | " plt.plot(cv_scores, marker=marker, label=label)\n", 94 | " \n", 95 | "plt.ylim(0.5, 1.05)\n", 96 | "plt.legend(bbox_to_anchor=(1.05, 1), loc=2)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## Exercise" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "In the \"Model Comparison\" notebook, the k-folds are created inside the for-loop (see line 7). What are the effects of moving that line outside (above) the loop? Is this strictly a benefit or are there any drawbacks? What factors might influence your answer?" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": true 118 | }, 119 | "outputs": [], 120 | "source": [] 121 | } 122 | ], 123 | "metadata": { 124 | "anaconda-cloud": {}, 125 | "continuum": { 126 | "depends": [ 127 | "ml_kfold" 128 | ], 129 | "requires": [], 130 | "tag": "ml_compare" 131 | }, 132 | "kernelspec": { 133 | "display_name": "Python [conda env:python3]", 134 | "language": "python", 135 | "name": "conda-env-python3-py" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.5.2" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 0 152 | } 153 | -------------------------------------------------------------------------------- /3-Analytics/sklearn/4_RegressionModels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Learning Objectives:](#Learning-Objectives:)\n", 9 | "* [Regression Models](#Regression-Models)\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Learning Objectives:" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "After completion of this module, learners should be able to:\n", 24 | "\n", 25 | "* Apply the Linear Regression Model\n", 26 | "* Cross validate the model" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Regression Models" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "import numpy as np\n", 45 | "from sklearn import datasets, linear_model, cross_validation, metrics\n", 46 | "diabetes_dataset = datasets.load_diabetes()\n", 47 | "dd_examples, dd_targets = diabetes_dataset.data, diabetes_dataset.target\n", 48 | "\n", 49 | "linreg = linear_model.LinearRegression()\n", 50 | "kfold = cross_validation.KFold(len(dd_examples), n_folds=3, shuffle=True)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": false 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "# manually extract a k-fold train/test split\n", 62 | "train, test = next(iter(kfold))\n", 63 | "linreg.fit(dd_examples[train], dd_targets[train])\n", 64 | "\n", 65 | "preds = linreg.predict(dd_examples[test])\n", 66 | "errors = preds - dd_targets[test]\n", 67 | "\n", 68 | "print(np.mean(errors**2))" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "for train, test in kfold:\n", 80 | " preds = linreg.fit(dd_examples[train], dd_targets[train]).predict(dd_examples[test])\n", 81 | " print(metrics.mean_squared_error(preds, dd_targets[test]))" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "cv_scores = cross_validation.cross_val_score(linreg, dd_examples, dd_targets, \n", 93 | " cv=kfold, \n", 94 | " scoring='mean_squared_error', \n", 95 | " n_jobs=-1) # all CPUs\n", 96 | "print(cv_scores)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Additional regression metrics are described here:\n", 104 | "\n", 105 | "http://scikit-learn.org/stable/modules/model_evaluation.html#regression-metrics\n", 106 | "\n", 107 | "Their names are all importable from `sklearn.metrics`:\n", 108 | "\n", 109 | "`mean_absolute_error \n", 110 | "mean_squared_error \n", 111 | "median_absolute_error \n", 112 | "r2` \n" 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "continuum": { 118 | "depends": [ 119 | "ml_eval" 120 | ], 121 | "tag": "ml_regression" 122 | }, 123 | "kernelspec": { 124 | "display_name": "Python [conda env:python3]", 125 | "language": "python", 126 | "name": "conda-env-python3-py" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.5.2" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 0 143 | } 144 | -------------------------------------------------------------------------------- /3-Analytics/sklearn/EX01_CrossValidation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Table of Contents\n", 8 | "* [Classification Exercise](#Classification-Exercise)\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "After completion of this module, learners should be able to:\n", 16 | "\n", 17 | "* Be aware of the range of facilities in scikit-learn.\n", 18 | "* Apply classifiers, such as K-nearest neighbor, logistic regression, decision tree, and linear discriminant analysis." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "# Classification Exercise" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "1. Experiment with `make_gaussian_quantiles` to generate some data:\n", 33 | "\n", 34 | "http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html\n", 35 | "\n", 36 | " 2. It turns out that the wine dataset at mldata is broken. Here's is how you can grab it directly from UCI (it will be downloaded to wine.csv in your local directory):\n", 37 | "\n", 38 | "```python\n", 39 | "import urllib\n", 40 | "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'\n", 41 | "urllib.request.urlretrieve(url, 'data/wine.csv')\n", 42 | "tbl = np.genfromtxt('data/wine.csv', delimiter=\",\")\n", 43 | "classes = tbl[:,0]\n", 44 | "examples = tbl[:,1:]\n", 45 | "```" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "Pick two classifiers from `sklearn` (a partial list is given below) and compare their performance on your random dataset and the wine dataset. Use manual train/test splitting and KFold cross validation methods.\n", 53 | "\n", 54 | "\n", 55 | "```python\n", 56 | "from sklearn.neighbors import KNeighborsClassifier\n", 57 | "from sklearn.svm import SVC\n", 58 | "from sklearn.tree import DecisionTreeClassifier\n", 59 | "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", 60 | "from sklearn.naive_bayes import GaussianNB\n", 61 | "from sklearn.qda import QDA\n", 62 | "from sklearn.lda import LDA\n", 63 | "classifiers = [\n", 64 | " KNeighborsClassifier(3),\n", 65 | " SVC(kernel=\"linear\", C=0.025),\n", 66 | " SVC(gamma=2, C=1),\n", 67 | " DecisionTreeClassifier(max_depth=5),\n", 68 | " RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), # ftrs @ split\n", 69 | " AdaBoostClassifier(),\n", 70 | " GaussianNB(),\n", 71 | " LDA(),\n", 72 | " QDA()]\n", 73 | "```" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "import numpy as np\n", 85 | "import pandas as pd\n", 86 | "import matplotlib.pyplot as plt\n", 87 | "from sklearn import (cross_validation, datasets,\n", 88 | " decomposition,\n", 89 | " grid_search, linear_model, \n", 90 | " neighbors, metrics)\n", 91 | "%matplotlib inline" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [] 102 | } 103 | ], 104 | "metadata": { 105 | "anaconda-cloud": {}, 106 | "continuum": { 107 | "depends": [ 108 | "ml_kfold", 109 | "ml_knn" 110 | ], 111 | "requires": [ 112 | "data/wine.csv" 113 | ], 114 | "tag": "ml_ex_cv" 115 | }, 116 | "kernelspec": { 117 | "display_name": "Python [conda env:python3]", 118 | "language": "python", 119 | "name": "conda-env-python3-py" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.5.2" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 0 136 | } 137 | -------------------------------------------------------------------------------- /4-AcceleratedPython/Accel Python Offloading to Intel Xeon Phi (Co)processors.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/4-AcceleratedPython/Accel Python Offloading to Intel Xeon Phi (Co)processors.pdf -------------------------------------------------------------------------------- /4-AcceleratedPython/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/4-AcceleratedPython/README.md -------------------------------------------------------------------------------- /4-AcceleratedPython/numba/1_Numba_Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using `jit`\n", 8 | "\n", 9 | "We'll start with a trivial example but get to some more realistic applications shortly." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### Array sum\n", 17 | "\n", 18 | "The function below is a naive `sum` function that sums all the elements of a given array." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "def sum_array(inp):\n", 30 | " J, I = inp.shape\n", 31 | " \n", 32 | " #this is a bad idea\n", 33 | " mysum = 0\n", 34 | " for j in range(J):\n", 35 | " for i in range(I):\n", 36 | " mysum += inp[j, i]\n", 37 | " \n", 38 | " return mysum" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import numpy" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "arr = numpy.random.random((300, 300))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "45041.071854295071" 74 | ] 75 | }, 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "sum_array(arr)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "10 loops, best of 3: 20.5 ms per loop\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "plain = %timeit -o sum_array(arr)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "# Let's get started" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "from numba import jit" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## As a function call" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 7, 132 | "metadata": { 133 | "collapsed": true 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "sum_array_numba = jit()(sum_array)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "What's up with the weird double `()`s? We'll cover that in a little bit." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 8, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "45041.07185429507" 158 | ] 159 | }, 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "sum_array_numba(arr)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 9, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "10000 loops, best of 3: 86.2 µs per loop\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "jitted = %timeit -o sum_array_numba(arr)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 10, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "238.1571011913437" 199 | ] 200 | }, 201 | "execution_count": 10, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "plain.best / jitted.best" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## (more commonly) As a decorator" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 11, 220 | "metadata": { 221 | "collapsed": true 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "@jit\n", 226 | "def sum_array(inp):\n", 227 | " I, J = inp.shape\n", 228 | " \n", 229 | " mysum = 0\n", 230 | " for i in range(I):\n", 231 | " for j in range(J):\n", 232 | " mysum += inp[i, j]\n", 233 | " \n", 234 | " return mysum" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 12, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [ 244 | { 245 | "data": { 246 | "text/plain": [ 247 | "45041.07185429507" 248 | ] 249 | }, 250 | "execution_count": 12, 251 | "metadata": {}, 252 | "output_type": "execute_result" 253 | } 254 | ], 255 | "source": [ 256 | "sum_array(arr)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 13, 262 | "metadata": { 263 | "collapsed": false 264 | }, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "10000 loops, best of 3: 89.1 µs per loop\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "%timeit sum_array(arr)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "## How does this compare to NumPy?" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 14, 288 | "metadata": { 289 | "collapsed": false 290 | }, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "The slowest run took 5.33 times longer than the fastest. This could mean that an intermediate result is being cached.\n", 297 | "10000 loops, best of 3: 40.7 µs per loop\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "%timeit arr.sum()" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## When is Numba faster than NumPy?\n", 310 | "\n", 311 | "When doing more complex things, or when using less common integer types, like int16:" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 15, 317 | "metadata": { 318 | "collapsed": false 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "arr_int16 = (arr * 4096).astype(numpy.int16)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 17, 328 | "metadata": { 329 | "collapsed": false 330 | }, 331 | "outputs": [ 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "10000 loops, best of 3: 20 µs per loop\n" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "jitted_int16 = %timeit -o sum_array_numba(arr_int16)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 18, 347 | "metadata": { 348 | "collapsed": false 349 | }, 350 | "outputs": [ 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "The slowest run took 7.39 times longer than the fastest. This could mean that an intermediate result is being cached.\n", 356 | "10000 loops, best of 3: 108 µs per loop\n" 357 | ] 358 | } 359 | ], 360 | "source": [ 361 | "numpy_int16 = %timeit -o arr_int16.sum()" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 19, 367 | "metadata": { 368 | "collapsed": false 369 | }, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/plain": [ 374 | "5.420978311244756" 375 | ] 376 | }, 377 | "execution_count": 19, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "numpy_int16.best / jitted_int16.best" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "NumPy doesn't have a specialized version of `sum()` for 16-bit integers, but Numba just generated one that was many times faster! Numba can take advantage of things like AVX support for packed integers while NumPy has to cast to a larger datatype to use one of the precompiled implementations." 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "## When does `numba` compile things?" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "The first time you call the function. " 405 | ] 406 | } 407 | ], 408 | "metadata": { 409 | "anaconda-cloud": {}, 410 | "kernelspec": { 411 | "display_name": "Python [conda env:python3]", 412 | "language": "python", 413 | "name": "conda-env-python3-py" 414 | }, 415 | "language_info": { 416 | "codemirror_mode": { 417 | "name": "ipython", 418 | "version": 3 419 | }, 420 | "file_extension": ".py", 421 | "mimetype": "text/x-python", 422 | "name": "python", 423 | "nbconvert_exporter": "python", 424 | "pygments_lexer": "ipython3", 425 | "version": "3.5.2" 426 | } 427 | }, 428 | "nbformat": 4, 429 | "nbformat_minor": 0 430 | } 431 | -------------------------------------------------------------------------------- /4-AcceleratedPython/numba/EX02_Direct_Summation-Solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy\n", 12 | "from numba import njit" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "particle_dtype = numpy.dtype({'names':['x','y','z','m','phi'], \n", 24 | " 'formats':[numpy.double, \n", 25 | " numpy.double, \n", 26 | " numpy.double, \n", 27 | " numpy.double, \n", 28 | " numpy.double]})" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Exercise 1\n", 36 | "\n", 37 | "Write a function `create_n_random_particles` that takes the arguments `n` (number of particles), `m` (mass of every particle) and a domain within to generate a random number (as in the class above).\n", 38 | "It should create an array with `n` elements and `dtype=particle_dtype` and then return that array.\n", 39 | "\n", 40 | "For each particle, the mass should be initialized to the value of `m` and the potential `phi` initialized to zero.\n", 41 | "\n", 42 | "For the `x` component of a given particle `p`, you might do something like\n", 43 | "\n", 44 | "```python\n", 45 | "p['x'] = domain * numpy.random.random()\n", 46 | "```" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 8, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "def create_n_random_particles(n, m, domain=1):\n", 58 | " '''\n", 59 | " Creates `n` particles with mass `m` with random coordinates\n", 60 | " between 0 and `domain`\n", 61 | " '''\n", 62 | " parts = numpy.zeros((n), dtype=particle_dtype)\n", 63 | " \n", 64 | " parts['x'] = numpy.random.random(size=n) * domain\n", 65 | " parts['y'] = numpy.random.random(size=n) * domain\n", 66 | " parts['z'] = numpy.random.random(size=n) * domain\n", 67 | " parts['m'] = m\n", 68 | " parts['phi'] = 0.0\n", 69 | "\n", 70 | " return parts" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Test it out!" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 10, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "array([ (0.07865253058714916, 0.17845767290893022, 0.2782564508743751, 0.001, 0.0),\n", 91 | " (0.6098656647837719, 0.465900008549502, 0.7708386758735862, 0.001, 0.0),\n", 92 | " (0.5407396799472325, 0.43441139551555785, 0.5205542751741511, 0.001, 0.0),\n", 93 | " (0.6289394790346508, 0.5203392254721185, 0.510620859464995, 0.001, 0.0),\n", 94 | " (0.08541443823778716, 0.12960520559911615, 0.5964363323868767, 0.001, 0.0)], \n", 95 | " dtype=[('x', '= 4 and int(sys.argv[-1]) <= 64: 71 | thread_count=int(sys.argv[-1]) 72 | 73 | start_time = time.time() 74 | 75 | pi = calcpi_threads(samples, thread_count) 76 | 77 | end_time = time.time() 78 | 79 | util.output(samples, pi, start_time, end_time) 80 | -------------------------------------------------------------------------------- /5-AdvancedScaling/pi/util.py: -------------------------------------------------------------------------------- 1 | # File: util.py 2 | # Author: William Scullin 3 | # Date: 2015-11-28 4 | # 5 | # Utility functions used by all demo programs 6 | # 7 | 8 | """This module contains utility functions used by pi calculating 9 | demo programs. 10 | """ 11 | 12 | 13 | from sys import argv 14 | from math import pi as const_pi 15 | from decimal import Decimal, InvalidOperation 16 | 17 | 18 | def output(samples=0, pi=0, start_time=0, end_time=0): 19 | """Print the program output""" 20 | perr = (abs(const_pi-pi)/const_pi)*100 21 | print "Pi value is %f, with error %02f%%" % (pi, perr) 22 | print "Run time for %s samples was %s" % (samples, end_time-start_time) 23 | 24 | 25 | def get_sample_count(samples=1.2e7): 26 | """get input from argv or set default""" 27 | if len(argv) > 1: 28 | try: 29 | samples = int(Decimal(argv[1])) 30 | except (ValueError, InvalidOperation): 31 | return samples 32 | return samples 33 | -------------------------------------------------------------------------------- /5-AdvancedScaling/pi/util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/5-AdvancedScaling/pi/util.pyc -------------------------------------------------------------------------------- /5-AdvancedScaling/scaling_python_with_mpi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/5-AdvancedScaling/scaling_python_with_mpi.pdf -------------------------------------------------------------------------------- /5-AdvancedScaling/scaling_python_with_mpi.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/5-AdvancedScaling/scaling_python_with_mpi.pptx -------------------------------------------------------------------------------- /6-Profiling/Profiling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Profiling Tricks in Jupyter" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "def filter_bad_values_try1(values, lower, upper):\n", 30 | " good = []\n", 31 | " for v in values:\n", 32 | " if lower < v < upper:\n", 33 | " good.append(v)\n", 34 | " return np.array(good)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "def filter_bad_values_try4(values, lower, upper):\n", 46 | " selector_lower = lower < values\n", 47 | " selector_upper = values < upper\n", 48 | " selector = selector_lower & selector_upper\n", 49 | " return values[selector]" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "500065\n", 64 | "500065\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "a = np.random.uniform(-2000, 2000, 1000000)\n", 70 | "print(len(filter_bad_values_try1(a, -1000, 1000)))\n", 71 | "print(len(filter_bad_values_try4(a, -1000, 1000)))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## %time" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "CPU times: user 620 ms, sys: 10.3 ms, total: 630 ms\n", 93 | "Wall time: 629 ms\n", 94 | "CPU times: user 8.75 ms, sys: 1.17 ms, total: 9.91 ms\n", 95 | "Wall time: 9.79 ms\n" 96 | ] 97 | }, 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "array([ 450.2143654 , -948.85901315, -293.47172022, ..., -775.49455528,\n", 102 | " -986.98275299, 601.84069558])" 103 | ] 104 | }, 105 | "execution_count": 5, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "%time filter_bad_values_try1(a, -1000, 1000)\n", 112 | "%time filter_bad_values_try4(a, -1000, 1000)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## %timeit" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 6, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "100 loops, best of 3: 8.82 ms per loop\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "%timeit filter_bad_values_try4(a, -1000, 1000)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## %%prun - Profile a cell with cProfile" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 7, 151 | "metadata": { 152 | "collapsed": false 153 | }, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | " " 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "%%prun \n", 165 | "b = np.random.uniform(-2000, 2000, 1000000)\n", 166 | "filter_bad_values_try1(b, -1000, 1000)\n", 167 | "filter_bad_values_try4(b, -1000, 1000)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## %lprun - Profile line execution with line_profiler" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 8, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "Requirement already up-to-date: line_profiler in /Users/stan/anaconda/envs/sc2016/lib/python3.5/site-packages\n", 189 | "Requirement already up-to-date: IPython>=0.13 in /Users/stan/anaconda/envs/sc2016/lib/python3.5/site-packages (from line_profiler)\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "!pip install --upgrade line_profiler" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 9, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "%load_ext line_profiler" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 10, 211 | "metadata": { 212 | "collapsed": false 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "%lprun -f filter_bad_values_try1 filter_bad_values_try1(a, -1000, 1000)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": true 224 | }, 225 | "outputs": [], 226 | "source": [] 227 | } 228 | ], 229 | "metadata": { 230 | "anaconda-cloud": {}, 231 | "kernelspec": { 232 | "display_name": "Python [conda env:sc2016]", 233 | "language": "python", 234 | "name": "conda-env-sc2016-py" 235 | }, 236 | "language_info": { 237 | "codemirror_mode": { 238 | "name": "ipython", 239 | "version": 3 240 | }, 241 | "file_extension": ".py", 242 | "mimetype": "text/x-python", 243 | "name": "python", 244 | "nbconvert_exporter": "python", 245 | "pygments_lexer": "ipython3", 246 | "version": "3.5.2" 247 | } 248 | }, 249 | "nbformat": 4, 250 | "nbformat_minor": 1 251 | } 252 | -------------------------------------------------------------------------------- /6-Profiling/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/README.md -------------------------------------------------------------------------------- /6-Profiling/vtune/VTune_Python_Tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/vtune/VTune_Python_Tutorial.pdf -------------------------------------------------------------------------------- /6-Profiling/vtune/demo.py: -------------------------------------------------------------------------------- 1 | class Encoder: 2 | CHAR_MAP = {'a': 'b', 'b': 'c'} 3 | def __init__(self, input): 4 | self.input = input 5 | 6 | def process_slow(self): 7 | result = '' 8 | for ch in self.input: 9 | result += self.CHAR_MAP.get(ch, ch) 10 | return result 11 | 12 | def process_fast(self): 13 | result = [] 14 | for ch in self.input: 15 | result.append(self.CHAR_MAP.get(ch, ch)) 16 | return ''.join(result) 17 | 18 | -------------------------------------------------------------------------------- /6-Profiling/vtune/run.py: -------------------------------------------------------------------------------- 1 | import demo 2 | import time 3 | 4 | def slow_encode(input): 5 | return demo.Encoder(input).process_slow() 6 | 7 | def fast_encode(input): 8 | return demo.Encoder(input).process_fast() 9 | 10 | if __name__ == '__main__': 11 | input = 'a' * 10000000 # 10 millions of 'a' 12 | start = time.time() 13 | s1 = slow_encode(input) 14 | slow_stop = time.time() 15 | print('slow: %.2f sec' % (slow_stop - start)) 16 | s2 = fast_encode(input) 17 | print('fast: %.2f sec' % (time.time() - slow_stop)) 18 | -------------------------------------------------------------------------------- /6-Profiling/vtune/run_th.py: -------------------------------------------------------------------------------- 1 | import demo 2 | import time 3 | import threading 4 | 5 | def slow_encode(input): 6 | return demo.Encoder(input).process_slow() 7 | 8 | def fast_encode(input): 9 | return demo.Encoder(input).process_fast() 10 | 11 | if __name__ == '__main__': 12 | input = 'a' * 10000000 # 10 millions of 'a' 13 | th1 = threading.Thread(target=slow_encode, args=(input,)) 14 | th2 = threading.Thread(target=fast_encode, args=(input,)) 15 | th1.start() 16 | th2.start() 17 | th1.join() 18 | th2.join() 19 | -------------------------------------------------------------------------------- /6-Profiling/vtune/t_0.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | try: 5 | xrange 6 | except NameError: 7 | # python3 8 | xrange = range 9 | 10 | class BigObject: 11 | STR_VALUE = ''.join(str(x) for x in xrange(10000)) 12 | def __str__(self): 13 | return self.STR_VALUE 14 | 15 | def makeParams(): 16 | objects = tuple(BigObject() for _ in xrange(50)) 17 | template = ''.join('{%d}' % i for i in xrange(len(objects))) 18 | return template, objects 19 | 20 | def doLog(): 21 | template, objects = makeParams() 22 | for _ in xrange(1000): 23 | logging.info(template.format(*objects)) 24 | 25 | def main(): 26 | logging.basicConfig() 27 | 28 | start = time.time() 29 | doLog() 30 | stop = time.time() 31 | print('run took: %.3f' % (stop - start)) 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /6-Profiling/vtune/t_1.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | try: 5 | xrange 6 | except NameError: 7 | # python3 8 | xrange = range 9 | 10 | class BigObject: 11 | STR_VALUE = ''.join(str(x) for x in xrange(10000)) 12 | def __str__(self): 13 | return self.STR_VALUE 14 | 15 | def makeParams(): 16 | objects = tuple(BigObject() for _ in xrange(50)) 17 | template = ''.join('{%d}' % i for i in xrange(len(objects))) 18 | return template, objects 19 | 20 | def doLog(): 21 | template, objects = makeParams() 22 | for _ in xrange(1000): 23 | logging.info(template, *objects) 24 | 25 | def main(): 26 | logging.basicConfig() 27 | 28 | start = time.time() 29 | doLog() 30 | stop = time.time() 31 | print('run took: %.3f' % (stop - start)) 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /6-Profiling/vtune/test_class_sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | if sys.version_info < (3,0,0): 5 | from thread import get_ident 6 | else: 7 | from threading import get_ident 8 | import threading 9 | import time 10 | 11 | class WaitClass: 12 | def __init__(self, time): 13 | self.time = time 14 | 15 | def __call__(self): 16 | ident = get_ident() 17 | print('START task_noop_waiter: %s\n' % ident, end='') 18 | time.sleep(self.time) 19 | print('STOP task_noop_waiter: %s\n' % ident, end='') 20 | 21 | 22 | def do_work(self): 23 | ident = get_ident() 24 | print('START1 task_cpu_eater: %s\n' % ident, end='') 25 | stop = time.time() + self.time 26 | while time.time() < stop: 27 | pass 28 | print('STOP1 task_cpu_eater: %s\n' % ident, end='') 29 | 30 | class SpinClass: 31 | def __init__(self, time): 32 | self.time = time 33 | 34 | def __call__(self): 35 | do_work(self); 36 | 37 | def main(): 38 | ident = get_ident() 39 | print('START main: %s\n' % ident, end='') 40 | wc = WaitClass(9) 41 | sc = SpinClass(7) 42 | t1 = threading.Thread(target=wc) 43 | t2 = threading.Thread(target=sc) 44 | t1.start() 45 | t2.start() 46 | t1.join() 47 | t2.join() 48 | print('STOP main: %s\n' % ident, end='') 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/main.py: -------------------------------------------------------------------------------- 1 | from slowpoke import SlowpokeCore 2 | import logging 3 | import time 4 | 5 | def makeParams(): 6 | objects = tuple(SlowpokeCore(50000) for _ in xrange(50)) 7 | template = ''.join('{%d}' % i for i in xrange(len(objects))) 8 | return template, objects 9 | 10 | def doLog(): 11 | template, objects = makeParams() 12 | for _ in xrange(1000): 13 | logging.info(template.format(*objects)) 14 | 15 | def main(): 16 | logging.basicConfig() 17 | start = time.time() 18 | doLog() 19 | stop = time.time() 20 | print('run took: %.3f' % (stop - start)) 21 | 22 | if __name__ == '__main__': 23 | main() 24 | -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/pythonset.txt: -------------------------------------------------------------------------------- 1 | set VS90COMNTOOLS=C:\Users\kpoleary\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python\9.0\VC\bin 2 | -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/slowpoke/__init__.py: -------------------------------------------------------------------------------- 1 | from core import SlowpokeCore 2 | 3 | class Slowpoke(SlowpokeCore): 4 | pass 5 | 6 | __all__ = ['Slowpoke', 'SlowpokeCore'] 7 | -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/slowpoke/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/vtune/webinar/slowpoke/__init__.pyc -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/slowpoke/compile.bat1: -------------------------------------------------------------------------------- 1 | python -d setup.py build_ext --inplace 2 | -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/slowpoke/core.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/vtune/webinar/slowpoke/core.pyd -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/slowpoke/core.pyx: -------------------------------------------------------------------------------- 1 | import math 2 | cdef class SlowpokeCore: 3 | cdef public object N 4 | cdef public int divisor 5 | def __init__(self, N): 6 | self.N = N 7 | self.divisor = 1 8 | 9 | cdef double doWork(self, int N) except *: 10 | cdef int i, j, k 11 | cdef double res 12 | res = 1 13 | for j in range(N / self.divisor): 14 | k = 1 15 | for i in range(N): 16 | k += 1 17 | res += k 18 | return math.log(res) 19 | 20 | def __str__(self): 21 | return 'SlowpokeCore: %f' % self.doWork(self.N) 22 | -------------------------------------------------------------------------------- /6-Profiling/vtune/webinar/slowpoke/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | 5 | setup( 6 | cmdclass = {'build_ext': build_ext}, 7 | ext_modules = [Extension('core', sources=["core.pyx"], 8 | extra_compile_args=['/Z7'], 9 | extra_link_args=['/DEBUG']), 10 | ], 11 | ) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # supercomputing2016-python 2 | Materials for the Supercomputing 2016 tutorial on high performance Python 3 | 4 | # Environment 5 | 6 | ``` 7 | conda create -n sc2016 python=3.5 jupyter pandas scipy bokeh matplotlib numba line_profiler pandas-datareader xlwt xlrd sqlalchemy scikit-learn pytables 8 | ``` 9 | --------------------------------------------------------------------------------