├── 1-Overview
├── 1_PythonAndConda.ipynb
├── README.md
└── img
│ ├── PyData_Stack.png
│ └── python-version.png
├── 2-NumPy_SciPy
├── NumPy
│ ├── 1_IntroAndCreation.ipynb
│ ├── 2_IndexingAndSlicing.ipynb
│ ├── 3_VectorizedOperations.ipynb
│ ├── 4_Broadcasting.ipynb
│ ├── 5_NumpyRecords.ipynb
│ ├── EX01_ArrayCreation.ipynb
│ ├── EX01_ArrayCreation_soln.ipynb
│ ├── EX02_IndexingSlicing.ipynb
│ ├── EX02_IndexingSlicing_soln.ipynb
│ ├── EX03_VectorizedOperations.ipynb
│ ├── EX03_VectorizedOperations_soln.ipynb
│ ├── img
│ │ ├── broadcasting2D.lightbg.scaled-noalpha.png
│ │ ├── ecosystem.lightbg.scaled-noalpha.png
│ │ ├── mef_numpy_selection-noalpha.png
│ │ ├── mef_numpy_slice_01-noalpha.png
│ │ ├── mef_numpy_slice_02-noalpha.png
│ │ ├── numpyzerosdims-noalpha.png
│ │ └── ufunc.lightbg.scaled-noalpha.png
│ └── tmp
│ │ └── .keep
├── README.md
└── SciPy
│ ├── 1_Introduction.ipynb
│ ├── 2_Integration.ipynb
│ ├── 3_FFT.ipynb
│ └── 4_LinearAlgebra.ipynb
├── 3-Analytics
├── README.md
├── pandas
│ ├── 1_Intro.ipynb
│ ├── 2_DataStructures.ipynb
│ ├── 3_ExamineData.ipynb
│ ├── 4_ReadWriteData.ipynb
│ ├── 5_Groupby.ipynb
│ ├── 6_TimeSeries.ipynb
│ ├── EX01_ExcelFiles.ipynb
│ ├── EX02_weather.ipynb
│ ├── data
│ │ ├── 201509-citibike-tripdata.csv.gz
│ │ ├── beer2.csv.gz
│ │ ├── exoplanets.csv
│ │ ├── goog.csv
│ │ ├── nyc_harbor_wq_2006-2014.xlsx
│ │ └── pittsburgh2013.csv
│ └── img
│ │ └── pydata_stack_model.png
└── sklearn
│ ├── 1_Intro.ipynb
│ ├── 2_KNN_and_Validation.ipynb
│ ├── 3_ModelComparison.ipynb
│ ├── 4_RegressionModels.ipynb
│ └── EX01_CrossValidation.ipynb
├── 4-AcceleratedPython
├── Accel Python Offloading to Intel Xeon Phi (Co)processors.pdf
├── README.md
└── numba
│ ├── 1_Numba_Basics.ipynb
│ ├── 2_How_Numba_Works.ipynb
│ ├── 3_Making_Ufuncs-Solution.ipynb
│ ├── 3_Making_Ufuncs.ipynb
│ ├── EX01_Intro_to_JIT-Solution.ipynb
│ ├── EX01_Intro_to_JIT.ipynb
│ ├── EX02_Direct_Summation-Solution.ipynb
│ ├── EX02_Direct_Summation.ipynb
│ ├── LICENSE.md
│ ├── Numba Tutorial.pdf
│ ├── README.md
│ └── nopython_failure.py
├── 5-AdvancedScaling
├── README.md
├── basics
│ └── basic_features.py
├── pi
│ ├── builtins_mpi_pi.py
│ ├── builtins_pi.py
│ ├── builtins_pyobj_mpi_pi.py
│ ├── numba_mpi_pi.py
│ ├── numba_pi.py
│ ├── numpy_mpi_pi.py
│ ├── numpy_pi.py
│ ├── threads_pi.py
│ ├── util.py
│ └── util.pyc
├── scaling_python_with_mpi.pdf
└── scaling_python_with_mpi.pptx
├── 6-Profiling
├── Profiling.ipynb
├── README.md
└── vtune
│ ├── VTune_Python_Tutorial.pdf
│ ├── demo.py
│ ├── run.py
│ ├── run_th.py
│ ├── t_0.py
│ ├── t_1.py
│ ├── test_class_sample.py
│ └── webinar
│ ├── main.py
│ ├── pythonset.txt
│ └── slowpoke
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── compile.bat1
│ ├── core.c
│ ├── core.pyd
│ ├── core.pyx
│ └── setup.py
└── README.md
/1-Overview/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/1-Overview/README.md
--------------------------------------------------------------------------------
/1-Overview/img/PyData_Stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/1-Overview/img/PyData_Stack.png
--------------------------------------------------------------------------------
/1-Overview/img/python-version.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/1-Overview/img/python-version.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/3_VectorizedOperations.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "\t* [Some Simple Setup](#Some-Simple-Setup)\n",
10 | "* [Working with Arrays](#Working-with-Arrays)\n",
11 | "\t* [Elementwise vs. matrix multiplications](#Elementwise-vs.-matrix-multiplications)\n",
12 | "\t* [Functions and methods](#Functions-and-methods)\n",
13 | "* [Array Operations as Methods](#Array-Operations-as-Methods)\n",
14 | "\t* [Additional methods:](#Additional-methods:)\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "# Learning Objectives:"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "After completion of this module, learners should be able to:\n",
29 | "\n",
30 | "* explain & use *vectorization* to speed up array-based computation\n",
31 | "* apply (`numpy`) *universal functions* to vectorize array computations\n",
32 | "* construct simple timed experiments to compare array-based computations"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Some Simple Setup"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {
46 | "collapsed": true
47 | },
48 | "outputs": [],
49 | "source": [
50 | "%matplotlib inline\n",
51 | "\n",
52 | "import numpy as np\n",
53 | "import matplotlib.pyplot as plt\n",
54 | "import os.path as osp\n",
55 | "import numpy.random as npr\n",
56 | "vsep = \"\\n-------------------\\n\"\n",
57 | "\n",
58 | "def dump_array(arr):\n",
59 | " print(\"%s array of %s:\" % (arr.shape, arr.dtype))\n",
60 | " print(arr)"
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "# Working with Arrays"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "Math is quite simple—and this is part of the reason that using NumPy arrays can significantly simplify numerical code. The generic pattern `array OP scalar` (or `scalar OP array`), applies `OP` (with the `scalar` value) across elements of `array`."
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "NumPy *ufuncs* (universal functions) are functions that operate elementwise on one or more arrays.\n",
82 | "\n",
83 | "\n",
84 | "\n",
85 | "When called, *ufuncs* dispatch to optimized C inner-loops based on the array *dtype*."
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "Builtin numpy ufuncs\n",
93 | "\n",
94 | "- comparison: <, <=, ==, !=, >=, >
\n",
95 | "- arithmetic: +, -, *, /, reciprocal, square
\n",
96 | "- exponential: exp, expm1, exp2, log, log10, log1p, log2, power, sqrt
\n",
97 | "- trig: sin, cos, tan, acsin, arccos, atctan, sinh, cosh, tanh, acsinh, arccosh, atctanh
\n",
98 | "- bitwise: &, |, ~, ^, left_shift, right_shift
\n",
99 | "- logical operations: and, logical_xor, logical_not, or
\n",
100 | "- predicates: isfinite, isinf, isnan, signbit
\n",
101 | "- other: abs, ceil, floor, mod, modf, round, sinc, sign, trunc
"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": [
112 | "# array OP scalar applies across all elements and creates a new array\n",
113 | "arr = np.arange(10)\n",
114 | "print(\" arr:\", arr)\n",
115 | "print(\" arr + 1:\", arr + 1)\n",
116 | "print(\" arr * 2:\", arr * 2)\n",
117 | "print(\"arr ** 2:\", arr ** 2)\n",
118 | "print(\"2 ** arr:\", 2 ** arr)\n",
119 | "\n",
120 | "# bit-wise ops (cf. np.logical_and, etc.)\n",
121 | "print(\" arr | 1:\", arr | 1)\n",
122 | "print(\" arr & 1:\", arr & 1)\n",
123 | "\n",
124 | "# NOTE: arr += 1, etc. for in-place"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {
131 | "collapsed": false
132 | },
133 | "outputs": [],
134 | "source": [
135 | "# array OP array works element-by-element and creates a new array\n",
136 | "arr1 = np.arange(5)\n",
137 | "arr2 = 2 ** arr1 # makes a new array\n",
138 | "\n",
139 | "print(arr1, \"+\", arr2, \"=\", arr1 + arr2, end=vsep)\n",
140 | "print(arr1, \"*\", arr2, \"=\", arr1 * arr2)"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "## Elementwise vs. matrix multiplications"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "NumPy arrays and matrices are related, but slightly different types."
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "collapsed": false
162 | },
163 | "outputs": [],
164 | "source": [
165 | "a, b = np.arange(8).reshape(2,4), np.arange(10,18).reshape(2,4)\n",
166 | "print(\"a\")\n",
167 | "print(a)\n",
168 | "print(\"b\")\n",
169 | "print(b, end=vsep)\n",
170 | "print(\"Elementwise multiplication: a * b\")\n",
171 | "print(a * b, end=vsep)\n",
172 | "print(\"Dot product: np.dot(a.T, b)\")\n",
173 | "print(np.dot(a.T, b), end=vsep)\n",
174 | "print(\"Dot product as an array method: a.T.dot(b)\")\n",
175 | "print(a.T.dot(b), end=vsep)\n",
176 | "\n",
177 | "amat, bmat = np.matrix(a), np.matrix(b)\n",
178 | "print(\"amat, bmat = np.matrix(a), np.matrix(b)\")\n",
179 | "print('amat')\n",
180 | "print(amat)\n",
181 | "print('bmat')\n",
182 | "print(bmat, end=vsep)\n",
183 | "print(\"Dot product of matrices: amat.T * bmat\")\n",
184 | "print(amat.T * bmat, end=vsep)\n",
185 | "print(\"Dot product in Python 3.5+: a.T @ b\")\n",
186 | "print(\"... PEP 465: time to upgrade ...\")"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {},
192 | "source": [
193 | "In the wondrous future, we will write:\n",
194 | " \n",
195 | "```python\n",
196 | "S = (H @ β - r).T @ inv(H @ V @ H.T) @ (H @ β - r)\n",
197 | "```"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "## Functions and methods"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "A number of important mathematical operations on arrays are defined as functions in the NumPy module (not as methods on NumPy arrays). Some operations are even available both ways. Some of the more important mathematical routines include: `sin, cos, tan, exp, log`. We can use these as `np.sin`, for example. For a complete list, see http://docs.scipy.org/doc/numpy/reference/routines.math.html"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": null,
217 | "metadata": {
218 | "collapsed": false
219 | },
220 | "outputs": [],
221 | "source": [
222 | "arr = np.arange(-np.pi, np.pi, np.pi/4)\n",
223 | "print(\"some multiples of pi:\")\n",
224 | "print(arr, end=vsep)\n",
225 | "\n",
226 | "print(\"... and their cosines:\")\n",
227 | "print(np.cos(arr))"
228 | ]
229 | },
230 | {
231 | "cell_type": "markdown",
232 | "metadata": {},
233 | "source": [
234 | "# Array Operations as Methods"
235 | ]
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {},
240 | "source": [
241 | "Several useful operations are definied as methods on NumPy arrays. For a full list, see the NumPy docs: \n",
242 | "\n",
243 | "http://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html#array-methods"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {
250 | "collapsed": false
251 | },
252 | "outputs": [],
253 | "source": [
254 | "arr = np.random.randint(0,10, size=(10,))# arange(1,10)\n",
255 | "print(\"arr: \", arr, end=vsep)\n",
256 | "\n",
257 | "print(\"%18s : %s\" % (\"mean\", arr.mean()))\n",
258 | "print(\"%18s : %s\" % (\"variance\", arr.var()))\n",
259 | "print(\"%18s : %s\" % (\"std. deviation\", arr.std()))\n",
260 | "print(\"%18s : %s\" % (\"cumulative sum\", arr.cumsum()))\n",
261 | "print(\"%18s : %s\" % (\"cumulative product\", arr.cumprod()))"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {
268 | "collapsed": false
269 | },
270 | "outputs": [],
271 | "source": [
272 | "# two other useful methods for defining predicates \n",
273 | "# based on an array are .any() and .all()\n",
274 | "arr = np.array([True, False, False])\n",
275 | "print(\"arr:\", arr)\n",
276 | "print(\"any true?: \", arr.any())\n",
277 | "print(\"Python any:\", any(arr))\n",
278 | "print(\"all true?: \", arr.all())\n",
279 | "print(\"Python all:\", all(arr))"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {
286 | "collapsed": false
287 | },
288 | "outputs": [],
289 | "source": [
290 | "# With numpy arrays that have more than 1 dimension, we need to use np.all\n",
291 | "arr = np.arange(15).reshape(3, 5)\n",
292 | "np.all(arr)\n",
293 | "# Why? all() iterates the argument and checks if each element is truthy.\n",
294 | "# With a 2-d array, each iteration is a row not a single element, \n",
295 | "# and as we saw above, we cannot evaluate the truthiness of an \n",
296 | "# array (bool(some_array) fails).\n"
297 | ]
298 | },
299 | {
300 | "cell_type": "markdown",
301 | "metadata": {},
302 | "source": [
303 | "## Additional methods:"
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "metadata": {},
309 | "source": [
310 | "* Predicates\n",
311 | " * `a.any(), a.all()`\n",
312 | "* Reductions\n",
313 | " * `a.mean(), a.argmin(), a.argmax(), a.trace(), a.cumsum(), a.cumprod()`\n",
314 | "* Manipulation\n",
315 | " * `a.argsort(), a.transpose(), a.reshape(...), a.ravel(), a.fill(...), a.clip(...)`\n",
316 | "* Complex Numbers\n",
317 | " * `a.real, a.imag, a.conj()`"
318 | ]
319 | }
320 | ],
321 | "metadata": {
322 | "anaconda-cloud": {},
323 | "continuum": {
324 | "depends": [
325 | "np_intro",
326 | "ip_essentials",
327 | "ip_datatypes"
328 | ],
329 | "requires": [
330 | "img/ufunc.lightbg.scaled-noalpha.png"
331 | ],
332 | "tag": "np_vectorization"
333 | },
334 | "kernelspec": {
335 | "display_name": "Python [conda env:python3]",
336 | "language": "python",
337 | "name": "conda-env-python3-py"
338 | },
339 | "language_info": {
340 | "codemirror_mode": {
341 | "name": "ipython",
342 | "version": 3
343 | },
344 | "file_extension": ".py",
345 | "mimetype": "text/x-python",
346 | "name": "python",
347 | "nbconvert_exporter": "python",
348 | "pygments_lexer": "ipython3",
349 | "version": "3.5.2"
350 | }
351 | },
352 | "nbformat": 4,
353 | "nbformat_minor": 0
354 | }
355 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/4_Broadcasting.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "\t* [Some Simple Setup](#Some-Simple-Setup)\n",
10 | "* [Broadcasting](#Broadcasting)\n",
11 | "\t* [What are the rules for broadcasting?](#What-are-the-rules-for-broadcasting?)\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Learning Objectives:"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "After completion of this module, learners should be able to:\n",
26 | "\n",
27 | "* use and explain *broadcasting* in numpy"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "## Some Simple Setup"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "%matplotlib inline\n",
46 | "\n",
47 | "import numpy as np\n",
48 | "import matplotlib.pyplot as plt\n",
49 | "import os.path as osp\n",
50 | "import numpy.random as npr\n",
51 | "vsep = \"\\n-------------------\\n\"\n",
52 | "\n",
53 | "def dump_array(arr):\n",
54 | " print(\"%s array of %s:\" % (arr.shape, arr.dtype))\n",
55 | " print(arr)"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "# Broadcasting"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "Broadcasting lets arrays with *different but compatible* shapes be arguments to *ufuncs*."
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "metadata": {
76 | "collapsed": false
77 | },
78 | "outputs": [],
79 | "source": [
80 | "arr1 = np.arange(5)\n",
81 | "print(\"arr1:\\n\", arr1, end=vsep)\n",
82 | "\n",
83 | "print(\"arr1 + scalar:\\n\", arr1+10, end=vsep)\n",
84 | "\n",
85 | "print(\"arr1 + arr1 (same shape):\\n\", arr1+arr1, end=vsep)\n",
86 | "\n",
87 | "arr2 = np.arange(5).reshape(5,1) * 10\n",
88 | "arr3 = np.arange(5).reshape(1,5) * 100\n",
89 | "print(\"arr2:\\n\", arr2)\n",
90 | "print(\"arr3:\\n\", arr3, end=vsep)\n",
91 | "\n",
92 | "print(\"arr1 + arr2 [ %s + %s --> %s ]:\" % \n",
93 | " (arr1.shape, arr2.shape, (arr1 + arr2).shape))\n",
94 | "print(arr1+arr2, end=vsep)\n",
95 | "print(\"arr1 + arr3 [ %s + %s --> %s ]:\" % \n",
96 | " (arr1.shape, arr3.shape, (arr1 + arr3).shape))\n",
97 | "print(arr1+arr3)"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [],
107 | "source": [
108 | "arr1 = np.arange(6).reshape(3,2)\n",
109 | "arr2 = np.arange(10, 40, 10).reshape(3,1)\n",
110 | "\n",
111 | "print(\"arr1:\")\n",
112 | "dump_array(arr1)\n",
113 | "print(\"\\narr2:\")\n",
114 | "dump_array(arr2)\n",
115 | "print(\"\\narr1 + arr2:\")\n",
116 | "print(arr1+arr2)"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "Here, an array of shape `(3, 1)` is broadcast to an array with shape `(3, 2)`\n",
124 | "\n",
125 | ""
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "## What are the rules for broadcasting? "
133 | ]
134 | },
135 | {
136 | "cell_type": "markdown",
137 | "metadata": {},
138 | "source": [
139 | "In order for an operation to broadcast, the size of all the trailing dimensions for both arrays must either be *equal* or be *one*. Dimensions that are one and dimensions that are missing from the \"head\" are duplicated to match the larger number. So, we have:\n",
140 | "\n",
141 | "|Array |Shape |\n",
142 | "|:------------------|---------------:|\n",
143 | "|A (1d array)| 3|\n",
144 | "|B (2d array)| 2 x 3|\n",
145 | "|Result (2d array)| 2 x 3|\n",
146 | "\n",
147 | "|Array |Shape |\n",
148 | "|:------------------|-------------:|\n",
149 | "|A (2d array)| 6 x 1|\n",
150 | "|B (3d array)| 1 x 6 x 4|\n",
151 | "|Result (3d array)| 1 x 6 x 4|\n",
152 | "\n",
153 | "|Array |Shape |\n",
154 | "|:-----------------|---------------:|\n",
155 | "|A (4d array)| 3 x 1 x 6 x 1|\n",
156 | "|B (3d array)| 2 x 1 x 4|\n",
157 | "|Result (4d array)| 3 x 2 x 6 x 4|"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "Some other interpretations of compatibility:\n",
165 | " \n",
166 | " * Tails must be the same, ones are wild.\n",
167 | " \n",
168 | "\n",
169 | " * If one shape is shorter than the other, pad the shorter shape on the LHS with `1`s.\n",
170 | " * Now, from the right, the shapes must be identical with ones acting as wild cards."
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {
177 | "collapsed": false
178 | },
179 | "outputs": [],
180 | "source": [
181 | "a1 = np.array([1,2,3]) # 3 -> 1x3\n",
182 | "b1 = np.array([[10, 20, 30], # 2x3\n",
183 | " [40, 50, 60]]) \n",
184 | "print(a1+b1)"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {
191 | "collapsed": false
192 | },
193 | "outputs": [],
194 | "source": [
195 | "result = (np.ones(( 6,1)) + # 3rd dimension replicated\n",
196 | " np.ones((1,6,4)))\n",
197 | "print(result.shape)\n",
198 | "\n",
199 | "result = (np.ones((3,6,1)) + \n",
200 | " np.ones((1,6,4))) # 1st and 3rd dimension replicated\n",
201 | "print(result.shape)"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "Sometimes, it is useful to explicitly insert a new dimension in the shape. We can do this with a fancy slice that takes the value `np.newaxis`."
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "collapsed": false,
216 | "raises": "ValueError"
217 | },
218 | "outputs": [],
219 | "source": [
220 | "arr1 = np.arange(6).reshape((2,3)) # 2x3\n",
221 | "arr2 = np.array([10, 100]) # 2\n",
222 | "arr1 + arr2 # This will fail"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": null,
228 | "metadata": {
229 | "collapsed": false
230 | },
231 | "outputs": [],
232 | "source": [
233 | "# let's massage the shape\n",
234 | "arr3 = arr2[:, np.newaxis] # arr2 -> 2x1\n",
235 | "print(\"arr3 shape:\", arr3.shape)\n",
236 | "print(\"arr1 + arr3\")\n",
237 | "print(arr1+arr3)"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": null,
243 | "metadata": {
244 | "collapsed": false
245 | },
246 | "outputs": [],
247 | "source": [
248 | "arr = np.array([10, 100])\n",
249 | "print(\"original shape:\", arr.shape)\n",
250 | "\n",
251 | "arrNew = arr2[np.newaxis, :]\n",
252 | "print(\"arrNew shape:\", arrNew.shape)"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {
259 | "collapsed": false
260 | },
261 | "outputs": [],
262 | "source": [
263 | "arr1 = np.arange(0,6).reshape(2,3)\n",
264 | "arr2 = np.arange(10,22).reshape(4,3)\n",
265 | "np.tile(arr1, (2,1)) * arr2"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {
272 | "collapsed": true
273 | },
274 | "outputs": [],
275 | "source": []
276 | }
277 | ],
278 | "metadata": {
279 | "anaconda-cloud": {},
280 | "continuum": {
281 | "depends": [
282 | "np_slicing",
283 | "np_intro",
284 | "np_vectorization",
285 | "ip_essentials",
286 | "ip_datatypes"
287 | ],
288 | "requires": [
289 | "img/broadcasting2D.lightbg.scaled-noalpha.png"
290 | ],
291 | "tag": "np_broadcast"
292 | },
293 | "kernelspec": {
294 | "display_name": "Python [conda env:python3]",
295 | "language": "python",
296 | "name": "conda-env-python3-py"
297 | },
298 | "language_info": {
299 | "codemirror_mode": {
300 | "name": "ipython",
301 | "version": 3
302 | },
303 | "file_extension": ".py",
304 | "mimetype": "text/x-python",
305 | "name": "python",
306 | "nbconvert_exporter": "python",
307 | "pygments_lexer": "ipython3",
308 | "version": "3.5.2"
309 | }
310 | },
311 | "nbformat": 4,
312 | "nbformat_minor": 0
313 | }
314 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/5_NumpyRecords.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives](#Learning-Objectives)\n",
9 | "\t* [Some Simple Setup](#Some-Simple-Setup)\n",
10 | "\t* [Compound Data: Structured Arrays / Record Arrays: `np.record`](#Compound-Data:--Structured-Arrays-/-Record-Arrays:--np.record)\n",
11 | " * [IO on arrays](#IO-on-arrays)\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Learning Objectives"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "After completion of this module, learners should be able to:\n",
26 | "\n",
27 | "* Usage of `np.record` data type."
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "## Some Simple Setup"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "%matplotlib inline\n",
46 | "\n",
47 | "import numpy as np\n",
48 | "import matplotlib.pyplot as plt\n",
49 | "import os.path as osp\n",
50 | "import numpy.random as npr\n",
51 | "vsep = \"\\n-------------------\\n\"\n",
52 | "\n",
53 | "def dump_array(arr):\n",
54 | " print(\"%s array of %s:\" % (arr.shape, arr.dtype))\n",
55 | " print(arr)"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "## Compound Data: Structured Arrays / Record Arrays: `np.record`"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "NumPy arrays have elements with a single type. But, that type can be a compound type (i.e., a record or a struct).\n",
70 | "\n",
71 | "Two main recommended ways of specifying type codes:\n",
72 | " \n",
73 | " * b1, i1, i2, i4, i8, u1, u2, u4, u8, f2, f4, f8, c8, c16, a<n>\n",
74 | " (bytes, ints, unsigned ints, floats, complex and fixed length strings of a given *byte* lengths)\n",
75 | " * int8,...,uint8,...,float16, float32, float64, complex64, complex128\n",
76 | " (similar but with *bit* sizes)"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "collapsed": false
84 | },
85 | "outputs": [],
86 | "source": [
87 | "# a record with a 4 byte int, a 4 byte float, \n",
88 | "# and 10 bytes of characters (ascii values)\n",
89 | "x = np.zeros((2,), dtype=('i4,f4,a10'))\n",
90 | "print(x)\n",
91 | "print(repr(x), end=vsep)\n",
92 | "\n",
93 | "x[:] = [(1, 5., 'Hello'), (2, 6., 'World')]\n",
94 | "print(x)\n",
95 | "print(repr(x), end=vsep)\n",
96 | "\n",
97 | "print(\"a field:\")\n",
98 | "print(x['f1'])\n",
99 | "print(repr(x['f1']))"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {
106 | "collapsed": false
107 | },
108 | "outputs": [],
109 | "source": [
110 | "%%file tmp/patient-records.csv\n",
111 | "name,date,weight(kg),height(cm)\n",
112 | "Mark,2011-01-01,86.1,180\n",
113 | "Barb,2012-02-03,65.7,167\n",
114 | "Ethan,2013-04-06,29.45,127"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "collapsed": false
122 | },
123 | "outputs": [],
124 | "source": [
125 | "patient_dtype = [(\"name\", \"a10\"),\n",
126 | " (\"visit_date\", 'datetime64[D]'),\n",
127 | " (\"weight\", np.float),\n",
128 | " (\"height\", np.int)]\n",
129 | "data = np.loadtxt(\"tmp/patient-records.csv\", \n",
130 | " skiprows=1, \n",
131 | " delimiter=\",\", \n",
132 | " dtype=patient_dtype,\n",
133 | " converters = {1: np.datetime64})\n",
134 | "\n",
135 | "print(\"first row: \", data[0])\n",
136 | "print(\"all weights: \", data['weight'])\n",
137 | "\n",
138 | "# BMI = kg / m**2\n",
139 | "print(\"BMIs:\", data['weight'] / (data['height']/100.0)**2)"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {},
145 | "source": [
146 | "# IO on arrays"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "We can also save and load arrays"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {
160 | "collapsed": false
161 | },
162 | "outputs": [],
163 | "source": [
164 | "#saving / load data\n",
165 | "np.savez('tmp/data.npz',data=data) # list of arrays to store\n",
166 | "dataz = np.load('tmp/data.npz')\n",
167 | "\n",
168 | "print(dataz.files) # list of arrays stored in this archive\n",
169 | "print(dataz['data'])"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "metadata": {
176 | "collapsed": false
177 | },
178 | "outputs": [],
179 | "source": [
180 | "# cleanup\n",
181 | "!rm tmp/data.npz"
182 | ]
183 | }
184 | ],
185 | "metadata": {
186 | "continuum": {
187 | "depends": [
188 | "np_intro",
189 | "np_slicing",
190 | "np_vectorization",
191 | "ip_essentials",
192 | "ip_datatypes",
193 | "ip_containers"
194 | ],
195 | "requires": [],
196 | "tag": "np_records"
197 | },
198 | "kernelspec": {
199 | "display_name": "Python [conda env:python3]",
200 | "language": "python",
201 | "name": "conda-env-python3-py"
202 | },
203 | "language_info": {
204 | "codemirror_mode": {
205 | "name": "ipython",
206 | "version": 3
207 | },
208 | "file_extension": ".py",
209 | "mimetype": "text/x-python",
210 | "name": "python",
211 | "nbconvert_exporter": "python",
212 | "pygments_lexer": "ipython3",
213 | "version": "3.5.2"
214 | }
215 | },
216 | "nbformat": 4,
217 | "nbformat_minor": 0
218 | }
219 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/EX01_ArrayCreation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Exercise: Numpy Array Creation](#Exercise:-Numpy-Array-Creation)\n"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "# Exercise: Numpy Array Creation"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {
22 | "collapsed": true
23 | },
24 | "outputs": [],
25 | "source": [
26 | "import numpy as np"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "IPython notebooks have two special functions to measure the time it takes to perform a single line"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": false
41 | },
42 | "outputs": [],
43 | "source": [
44 | "%timeit np.sqrt(np.pi)"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "and the time it takes to perform an entire cell"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [],
61 | "source": [
62 | "%%timeit\n",
63 | "import numpy as np\n",
64 | "for i in range(0,1000):\n",
65 | " np.sqrt(np.pi)"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "A numpy array behave like any other container `for item in array:`."
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "Unless otherwise specified, try to solve these problems using NumPy but not using raw Python.\n",
80 | "\n",
81 | " 1. Create a Python list with the ints from 1 to 10. Create a NumPy array from that list. \n",
82 | " 1. For both, add one to each element.\n",
83 | " 1. For both, multiply each element by two.\n",
84 | " 1. Create an int array of all zeros.\n",
85 | " 1. Create a float array of all zeros.\n",
86 | " 1. Create an evenly spaced grid of 100 floating point values on [-10, 10].\n",
87 | " 1. Create an int array with the powers of two from 1 to 1024.\n",
88 | " 1. Bonus: Can you figure out a second \"NumPy only\" way to do it? (Hint: help(function) is your friend)\n",
89 | " 1. Explain what NumPy dtype would be well-suited for (and why):\n",
90 | " * Temperatures\n",
91 | " * Counts of occurances of an event\n",
92 | " * Differences in counts\n",
93 | " * Probabilities\n",
94 | " 1. Images can be stored as (R,G,B) value triples. Frequently, the color values (red, green, or blue) range from [0, 255]. What would be an ideal NumPy data type for one color value?\n",
95 | " 1. Come up with two ways to create a (2,5,3) shaped int array filled the value 42.\n",
96 | " 1. Generate a (5,5) array with values from a Normal distribution of mean 10 and standard deviation 1.\n",
97 | " 1. Now, try to do it another way.\n",
98 | " 1. Define a function of *N*, that returns an array with *N* values all equal to $1/N$.\n",
99 | " 1. Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]`. See the following note on timing.\n",
100 | " 1. Do the same with a NumPy array.\n",
101 | " 1. Time how long it takes to multiply each sequence by `np.pi`."
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": true
109 | },
110 | "outputs": [],
111 | "source": []
112 | }
113 | ],
114 | "metadata": {
115 | "continuum": {
116 | "depends": [
117 | "np_intro",
118 | "ip_essentials",
119 | "ip_datatypes",
120 | "ip_containers"
121 | ],
122 | "requires": [],
123 | "tag": "np_ex_intro"
124 | },
125 | "kernelspec": {
126 | "display_name": "Python 3",
127 | "language": "python",
128 | "name": "python3"
129 | },
130 | "language_info": {
131 | "codemirror_mode": {
132 | "name": "ipython",
133 | "version": 3
134 | },
135 | "file_extension": ".py",
136 | "mimetype": "text/x-python",
137 | "name": "python",
138 | "nbconvert_exporter": "python",
139 | "pygments_lexer": "ipython3",
140 | "version": "3.4.4"
141 | }
142 | },
143 | "nbformat": 4,
144 | "nbformat_minor": 0
145 | }
146 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/EX01_ArrayCreation_soln.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Exercise: Numpy Array Creation](#Exercise:-Numpy-Array-Creation)\n"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "# Exercise: Numpy Array Creation"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {
22 | "collapsed": true
23 | },
24 | "outputs": [],
25 | "source": [
26 | "import numpy as np"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "IPython notebooks have two special functions to measure the time it takes to perform a single line"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": false
41 | },
42 | "outputs": [],
43 | "source": [
44 | "%timeit np.sqrt(np.pi)"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "and the time it takes to perform an entire cell"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [],
61 | "source": [
62 | "%%timeit\n",
63 | "import numpy as np\n",
64 | "for i in range(0,1000):\n",
65 | " np.sqrt(np.pi)"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "A numpy array behave like any other container `for item in array:`."
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "Unless otherwise specified, try to solve these problems using NumPy but not using raw Python.\n",
80 | "\n",
81 | " 1. Create a Python list with the ints from 1 to 10. Create a NumPy array from that list. \n",
82 | " 1. For both, add one to each element.\n",
83 | " 1. For both, multiply each element by two.\n",
84 | " 1. Create an int array of all zeros.\n",
85 | " 1. Create a float array of all zeros.\n",
86 | " 1. Create an evenly spaced grid of 100 floating point values on [-10, 10].\n",
87 | " 1. Create an int array with the powers of two from 1 to 1024.\n",
88 | " 1. Bonus: Can you figure out a second \"NumPy only\" way to do it? (Hint: help(function) is your friend)\n",
89 | " 1. Explain what NumPy dtype would be well-suited for (and why):\n",
90 | " * Temperatures\n",
91 | " * Counts of occurances of an event\n",
92 | " * Differences in counts\n",
93 | " * Probabilities\n",
94 | " 1. Images can be stored as (R,G,B) value triples. Frequently, the color values (red, green, or blue) range from [0, 255]. What would be an ideal NumPy data type for one color value?\n",
95 | " 1. Come up with two ways to create a (2,5,3) shaped int array filled the value 42.\n",
96 | " 1. Generate a (5,5) array with values from a Normal distribution of mean 10 and standard deviation 1.\n",
97 | " 1. Now, try to do it another way.\n",
98 | " 1. Define a function of *N*, that returns an array with *N* values all equal to $1/N$.\n",
99 | " 1. Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]`. See the following note on timing.\n",
100 | " 1. Do the same with a NumPy array.\n",
101 | " 1. Time how long it takes to multiply each sequence by `np.pi`."
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": [
112 | "# Solution 1: \n",
113 | "# Create a Python list with the ints from 1 to 10. \n",
114 | "# Create a NumPy array from that list.\n",
115 | "list1 = list(range(1,11))\n",
116 | "array1 = np.array(list1)\n",
117 | "\n",
118 | "print(list1, array1, sep=\"\\n\")"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {
125 | "collapsed": false
126 | },
127 | "outputs": [],
128 | "source": [
129 | "# Solution 1A: \n",
130 | "# For both, add one to each element.\n",
131 | "list2 = [x+1 for x in list1]\n",
132 | "array2 = array1 + 1\n",
133 | "\n",
134 | "print(list2, array2, sep=\"\\n\")"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "collapsed": false
142 | },
143 | "outputs": [],
144 | "source": [
145 | "# Solution 1B: \n",
146 | "# For both, multiply each element by two.\n",
147 | "list3 = [2*x for x in list1]\n",
148 | "array3 = array1 * 2\n",
149 | "\n",
150 | "print(list3, array3, sep=\"\\n\")"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {
157 | "collapsed": false
158 | },
159 | "outputs": [],
160 | "source": [
161 | "# Solution 2:\n",
162 | "# Create an int array of all zeros.\n",
163 | "\n",
164 | "array = np.zeros(10, dtype=int)\n",
165 | "\n",
166 | "print(array, len(array), array.dtype, sep=\"\\n\")"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {
173 | "collapsed": false
174 | },
175 | "outputs": [],
176 | "source": [
177 | "# Solution 3:\n",
178 | "# Create a float array of all zeros\n",
179 | "\n",
180 | "array = np.zeros(10, dtype=float)\n",
181 | "\n",
182 | "print(array, len(array), array.dtype, sep=\"\\n\")"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "metadata": {
189 | "collapsed": false
190 | },
191 | "outputs": [],
192 | "source": [
193 | "# Solution 4:\n",
194 | "# Create an evenly spaced grid of 100 floating point values on [-10, 10].\n",
195 | "\n",
196 | "array = np.linspace(-10,+10,100)\n",
197 | "\n",
198 | "print(array, len(array), array.dtype, sep=\"\\n\")"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {
205 | "collapsed": false
206 | },
207 | "outputs": [],
208 | "source": [
209 | "# Solution 5:\n",
210 | "# Create an int array with the powers of two from 1 to 1024.\n",
211 | "\n",
212 | "array = 2**np.linspace(0,10,11, dtype=int)\n",
213 | "\n",
214 | "print(array, len(array), array.dtype, sep=\"\\n\")"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {
221 | "collapsed": false
222 | },
223 | "outputs": [],
224 | "source": [
225 | "# Solution 5 Bonus: \n",
226 | "# Can you figure out a second \"NumPy only\" way to do it? \n",
227 | "# (Hint: help(function) is your friend)\n",
228 | "\n",
229 | "array = np.logspace(0,10,num=11,base=2, dtype=int)\n",
230 | "\n",
231 | "print(array, len(array), array.dtype, sep=\"\\n\")"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {
238 | "collapsed": false
239 | },
240 | "outputs": [],
241 | "source": [
242 | "# Solution 6:\n",
243 | "# Explain what NumPy dtype would be well-suited for (and why):\n",
244 | "# * Temperatures\n",
245 | "# * Counts of occurances of an event\n",
246 | "# * Differences in counts\n",
247 | "# * Probabilities\n",
248 | "\n",
249 | "temps = np.array(98.6, dtype=float)\n",
250 | "counts = np.array(range(7), dtype=int)\n",
251 | "deltas = counts - 1\n",
252 | "probs = np.array( np.random.random_sample() )\n",
253 | "\n",
254 | "print( temps, temps.dtype, sep=' ' )\n",
255 | "print( counts, counts.dtype, sep=' ' )\n",
256 | "print( deltas, deltas.dtype, sep=' ' )\n",
257 | "print( probs, probs.dtype, sep=' ' )"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": null,
263 | "metadata": {
264 | "collapsed": false
265 | },
266 | "outputs": [],
267 | "source": [
268 | "# Solution 7:\n",
269 | "# Images can be stored as (R,G,B) value triples. \n",
270 | "# Frequently, the color values (red, green, or blue) range from [0, 255]. \n",
271 | "# What would be an ideal NumPy data type for one color value?\n",
272 | "\n",
273 | "array = 256*np.ones(2500).reshape(50, 50)\n",
274 | "f = np.vectorize(lambda x: np.int(x*np.random.random()))\n",
275 | "image = f(array)\n",
276 | "print(image[0], image.dtype, sep='\\n\\n')"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {
283 | "collapsed": false
284 | },
285 | "outputs": [],
286 | "source": [
287 | "# Solution 7 Bonus: Plot the image\n",
288 | "import matplotlib.pyplot as plt\n",
289 | "%matplotlib inline\n",
290 | "plt.imshow(image, cmap=plt.cm.viridis)\n",
291 | "plt.colorbar()"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {
298 | "collapsed": false
299 | },
300 | "outputs": [],
301 | "source": [
302 | "# Solution 8A: not the best way (see next cell)\n",
303 | "# Come up with two ways to create a (2,5,3) shaped\n",
304 | "# int array filled the value 42.\n",
305 | "\n",
306 | "array1 = np.ones((2,5,3))*42\n",
307 | "print(array1, array1.shape, sep='\\n\\n')"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {
314 | "collapsed": false
315 | },
316 | "outputs": [],
317 | "source": [
318 | "# Solution 8B: better way\n",
319 | "# Come up with two ways to create a (2,5,3) shaped\n",
320 | "# int array filled the value 42.\n",
321 | "\n",
322 | "array2 = np.tile(42,(2,5,3))\n",
323 | "print(array2, array2.shape, sep='\\n\\n')"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": null,
329 | "metadata": {
330 | "collapsed": false
331 | },
332 | "outputs": [],
333 | "source": [
334 | "# Solution 9A:\n",
335 | "# Generate a (5,5) array with \n",
336 | "# values from a Normal distribution \n",
337 | "# of mean 10 and standard deviation 1.\n",
338 | "\n",
339 | "array9A = np.random.normal(loc=10.0, scale=1.0, size=(5,5))\n",
340 | "print( array9A )"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": null,
346 | "metadata": {
347 | "collapsed": false
348 | },
349 | "outputs": [],
350 | "source": [
351 | "# Solution 9B:\n",
352 | "# Now, try to do it another way.\n",
353 | "\n",
354 | "array = np.ones(25).reshape(5, 5)\n",
355 | "f = np.vectorize(lambda x: x*np.random.normal(loc=10.0, scale=1.0))\n",
356 | "array9B = f(array)\n",
357 | "print( array9B )"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": null,
363 | "metadata": {
364 | "collapsed": false
365 | },
366 | "outputs": [],
367 | "source": [
368 | "# Solution 10:\n",
369 | "# Define a function of *N*, that returns an array with *N* values all equal to $1/N$.\n",
370 | "def one_over(N):\n",
371 | " return 1./np.linspace(1,N,N)\n",
372 | "\n",
373 | "one_over(5)"
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": null,
379 | "metadata": {
380 | "collapsed": false
381 | },
382 | "outputs": [],
383 | "source": [
384 | "%%timeit\n",
385 | "# Solution 11A: timeit\n",
386 | "# Create a Python list with the floating-point \n",
387 | "# values `[1.0, 2.0, 3.0, ..., 1E6]`.\n",
388 | "list11A = [x for x in range(1000000)]"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {
395 | "collapsed": false
396 | },
397 | "outputs": [],
398 | "source": [
399 | "%%timeit\n",
400 | "# Solution 11B: timeit\n",
401 | "# Create a NumPy array with the floating-point \n",
402 | "# values `[1.0, 2.0, 3.0, ..., 1E6]`.\n",
403 | "array11B = np.arange(1000000)"
404 | ]
405 | },
406 | {
407 | "cell_type": "code",
408 | "execution_count": null,
409 | "metadata": {
410 | "collapsed": true
411 | },
412 | "outputs": [],
413 | "source": [
414 | "# recreate because %%timeit weirdness\n",
415 | "list11A = [x for x in range(1000000)]\n",
416 | "array11B = np.arange(1000000)"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": null,
422 | "metadata": {
423 | "collapsed": false
424 | },
425 | "outputs": [],
426 | "source": [
427 | "%%timeit\n",
428 | "# Solution 11C: timeit\n",
429 | "# Time how long it takes to multiply each sequence by `np.pi`.\n",
430 | "list11C = [x*np.pi for x in list11A]"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": null,
436 | "metadata": {
437 | "collapsed": false
438 | },
439 | "outputs": [],
440 | "source": [
441 | "%%timeit\n",
442 | "# Solution 11C: timeit\n",
443 | "# Time how long it takes to multiply each sequence by `np.pi`.\n",
444 | "array11C = np.pi*array11B"
445 | ]
446 | }
447 | ],
448 | "metadata": {
449 | "continuum": {
450 | "depends": [
451 | "np_intro",
452 | "ip_essentials",
453 | "ip_datatypes",
454 | "ip_containers"
455 | ],
456 | "requires": [],
457 | "tag": "np_ex_intro_soln"
458 | },
459 | "kernelspec": {
460 | "display_name": "Python 3",
461 | "language": "python",
462 | "name": "python3"
463 | },
464 | "language_info": {
465 | "codemirror_mode": {
466 | "name": "ipython",
467 | "version": 3
468 | },
469 | "file_extension": ".py",
470 | "mimetype": "text/x-python",
471 | "name": "python",
472 | "nbconvert_exporter": "python",
473 | "pygments_lexer": "ipython3",
474 | "version": "3.4.4"
475 | }
476 | },
477 | "nbformat": 4,
478 | "nbformat_minor": 0
479 | }
480 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/EX02_IndexingSlicing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Exercise: Numpy Indexing and Slicing](#Exercise:-Numpy-Indexing-and-Slicing)\n",
9 | "\t* [Part 1: Slicing](#Part-1:-Slicing)\n",
10 | "\t* [Part 2: Indexing and Assignment](#Part-2:-Indexing-and-Assignment)\n",
11 | "\t* [Part 3: Shift an Array](#Part-3:-Shift-an-Array)\n",
12 | "\t* [Part 4: Element Replacement](#Part-4:-Element-Replacement)\n",
13 | "* [Exercise: Optional Parts](#Exercise:-Optional-Parts)\n",
14 | "\t* [Optional Part 5: Replacing Rows](#Optional-Part-5:-Replacing-Rows)\n",
15 | "\t* [Optional Part 6: Replacing Columns](#Optional-Part-6:-Replacing-Columns)\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "# Exercise: Numpy Indexing and Slicing"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {
29 | "collapsed": true
30 | },
31 | "outputs": [],
32 | "source": [
33 | "import numpy as np"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "## Part 1: Slicing"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "Let a = np.arange(200)\n",
48 | " 1. access the last element of the array\n",
49 | " 1. slice all but the last element of the array\n",
50 | " 1. slice the last 5 elements of the array\n",
51 | " 1. slice the first 5 elements of the array"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "outputs": [],
61 | "source": [
62 | "# Solution 1:\n"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "## Part 2: Indexing and Assignment"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "Create a sample array with shape (3,4).\n",
77 | " 1. Using single item assignments, place your favorite number in the four corners.\n",
78 | " 1. Make the first column equal to -1.\n",
79 | " 1. Make the last row equal to 99.\n",
80 | " 1. Make a 2x2 block in the bottom-center contain the values .25, .5, .75, and 1.0\n",
81 | " 1. Replace a row with the values: 2, 4, 8, and 16."
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "collapsed": true
89 | },
90 | "outputs": [],
91 | "source": [
92 | "# Solution 2:\n"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "## Part 3: Shift an Array"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {},
105 | "source": [
106 | "We used slicing to do a right shift on a 1-D array. Do a left shift on a 1-D array."
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {
113 | "collapsed": true
114 | },
115 | "outputs": [],
116 | "source": [
117 | "# Solution 3:\n"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "## Part 4: Element Replacement"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "Can you replace every element of an array with a particular value (say, 42.0)."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": true
139 | },
140 | "outputs": [],
141 | "source": [
142 | "# Solution 4:\n"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "# Exercise: Optional Parts"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "These two are more difficult. We won't answer them now (we will revisit them in a bit), but see if you can figure them out."
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": [
163 | "## Optional Part 5: Replacing Rows"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "Can you replace every row with a particular row (for example, 2, 4, 8, 16)?"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {
177 | "collapsed": true
178 | },
179 | "outputs": [],
180 | "source": [
181 | "# Solution 5:\n"
182 | ]
183 | },
184 | {
185 | "cell_type": "markdown",
186 | "metadata": {},
187 | "source": [
188 | "## Optional Part 6: Replacing Columns"
189 | ]
190 | },
191 | {
192 | "cell_type": "markdown",
193 | "metadata": {},
194 | "source": [
195 | "[Don't strain yourself] Can you replace every column with a particular column?"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": null,
201 | "metadata": {
202 | "collapsed": true
203 | },
204 | "outputs": [],
205 | "source": [
206 | "# Solution 6:\n"
207 | ]
208 | }
209 | ],
210 | "metadata": {
211 | "continuum": {
212 | "depends": [
213 | "np_slicing",
214 | "np_intro",
215 | "ip_essentials",
216 | "ip_datatypes"
217 | ],
218 | "requires": [],
219 | "tag": "np_ex_slicing"
220 | },
221 | "kernelspec": {
222 | "display_name": "Python 3",
223 | "language": "python",
224 | "name": "python3"
225 | },
226 | "language_info": {
227 | "codemirror_mode": {
228 | "name": "ipython",
229 | "version": 3
230 | },
231 | "file_extension": ".py",
232 | "mimetype": "text/x-python",
233 | "name": "python",
234 | "nbconvert_exporter": "python",
235 | "pygments_lexer": "ipython3",
236 | "version": "3.4.4"
237 | }
238 | },
239 | "nbformat": 4,
240 | "nbformat_minor": 0
241 | }
242 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/EX02_IndexingSlicing_soln.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Exercise: Numpy Indexing and Slicing](#Exercise:-Numpy-Indexing-and-Slicing)\n",
9 | "\t* [Part 1: Slicing](#Part-1:-Slicing)\n",
10 | "\t* [Part 2: Indexing and Assignment](#Part-2:-Indexing-and-Assignment)\n",
11 | "\t* [Part 3: Shift an Array](#Part-3:-Shift-an-Array)\n",
12 | "\t* [Part 4: Element Replacement](#Part-4:-Element-Replacement)\n",
13 | "* [Exercise: Optional Parts](#Exercise:-Optional-Parts)\n",
14 | "\t* [Optional Part 5: Replacing Rows](#Optional-Part-5:-Replacing-Rows)\n",
15 | "\t* [Optional Part 6: Replacing Columns](#Optional-Part-6:-Replacing-Columns)\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "# Exercise: Numpy Indexing and Slicing"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {
29 | "collapsed": true
30 | },
31 | "outputs": [],
32 | "source": [
33 | "import numpy as np"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "## Part 1: Slicing"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "Let a = np.arange(200)\n",
48 | " 1. access the last element of the array\n",
49 | " 1. slice all but the last element of the array\n",
50 | " 1. slice the last 5 elements of the array\n",
51 | " 1. slice the first 5 elements of the array"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [],
61 | "source": [
62 | "# Solution 1:\n",
63 | "\n",
64 | "a = np.arange(200)"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "# Solution 1.1: access the last element\n",
76 | "a[-1]"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": null,
82 | "metadata": {
83 | "collapsed": false
84 | },
85 | "outputs": [],
86 | "source": [
87 | "# Solution 1.2: slice all but last element\n",
88 | "a[:-1]"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {
95 | "collapsed": false
96 | },
97 | "outputs": [],
98 | "source": [
99 | "# Solution 1.3: slice the last 5 elements\n",
100 | "a[-5:]"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {
107 | "collapsed": false
108 | },
109 | "outputs": [],
110 | "source": [
111 | "# Solution 1.4: slice the first 5 elements\n",
112 | "a[:5]"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "## Part 2: Indexing and Assignment"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "Create a sample array with shape (3,4).\n",
127 | " 1. Using single item assignments, place your favorite number in the four corners.\n",
128 | " 1. Make the first column equal to -1.\n",
129 | " 1. Make the last row equal to 99.\n",
130 | " 1. Make a 2x2 block in the bottom-center contain the values .25, .5, .75, and 1.0\n",
131 | " 1. Replace a row with the values: 2, 4, 8, and 16."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [],
141 | "source": [
142 | "# Solution 2:\n",
143 | "\n",
144 | "a = np.zeros(12).reshape(3,4)\n",
145 | "a"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": null,
151 | "metadata": {
152 | "collapsed": false
153 | },
154 | "outputs": [],
155 | "source": [
156 | "# Solution 2.1: favorite number in four corners\n",
157 | "a[0,0] = 42\n",
158 | "a[0,-1] = 42\n",
159 | "a[-1,0] = 42\n",
160 | "a[-1,-1] = 42\n",
161 | "a"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {
168 | "collapsed": false
169 | },
170 | "outputs": [],
171 | "source": [
172 | "# Solution 2.2: first column equal to -1\n",
173 | "a[:,0] = -1\n",
174 | "a"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {
181 | "collapsed": false
182 | },
183 | "outputs": [],
184 | "source": [
185 | "# Solution 2.3: last row equal to 99\n",
186 | "a[-1,:] = 99\n",
187 | "a"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {
194 | "collapsed": false
195 | },
196 | "outputs": [],
197 | "source": [
198 | "# Solution 2.4: make a 2x2 block in bottom-center with values .25, .5, .75, and 1.0\n",
199 | "a[1:,1:3] = [[0.25,0.5],[0.75,1.0]]\n",
200 | "a"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {
207 | "collapsed": false
208 | },
209 | "outputs": [],
210 | "source": [
211 | "# Solution 2.5: Replace a row with the values: 2, 4, 8, and 16.\n",
212 | "a[0,:] = [2,4,8,16]\n",
213 | "a"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "## Part 3: Shift an Array"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "We used slicing to do a right shift on a 1-D array. Do a left shift on a 1-D array."
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": null,
233 | "metadata": {
234 | "collapsed": false
235 | },
236 | "outputs": [],
237 | "source": [
238 | "# Solution 3: don't use slicing for everything\n",
239 | "a = np.array([1,2,3,4])\n",
240 | "print(a)\n",
241 | "\n",
242 | "b = np.roll(a,-1)\n",
243 | "print(b)"
244 | ]
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "metadata": {},
249 | "source": [
250 | "## Part 4: Element Replacement"
251 | ]
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "metadata": {},
256 | "source": [
257 | "Can you replace every element of an array with a particular value (say, 42.0)."
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": null,
263 | "metadata": {
264 | "collapsed": false
265 | },
266 | "outputs": [],
267 | "source": [
268 | "# Solution 4:\n",
269 | "b[:] = 42\n",
270 | "b"
271 | ]
272 | },
273 | {
274 | "cell_type": "markdown",
275 | "metadata": {},
276 | "source": [
277 | "# Exercise: Optional Parts"
278 | ]
279 | },
280 | {
281 | "cell_type": "markdown",
282 | "metadata": {},
283 | "source": [
284 | "These two are more difficult. We won't answer them now (we will revisit them in a bit), but see if you can figure them out."
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "## Optional Part 5: Replacing Rows"
292 | ]
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "metadata": {},
297 | "source": [
298 | "Can you replace every row with a particular row (for example, 2, 4, 8, 16)?"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "metadata": {
305 | "collapsed": false
306 | },
307 | "outputs": [],
308 | "source": [
309 | "# Solution 5:\n",
310 | "a = np.zeros(12).reshape(3,4)\n",
311 | "a"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": null,
317 | "metadata": {
318 | "collapsed": false
319 | },
320 | "outputs": [],
321 | "source": [
322 | "a[:,:] = [2,4,8,16]\n",
323 | "a"
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | "## Optional Part 6: Replacing Columns"
331 | ]
332 | },
333 | {
334 | "cell_type": "markdown",
335 | "metadata": {},
336 | "source": [
337 | "[Don't strain yourself] Can you replace every column with a particular column?"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": null,
343 | "metadata": {
344 | "collapsed": false
345 | },
346 | "outputs": [],
347 | "source": [
348 | "# Solution 6: prep\n",
349 | "a = np.arange(12).reshape(3,4)\n",
350 | "a"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "metadata": {
357 | "collapsed": false
358 | },
359 | "outputs": [],
360 | "source": [
361 | "# Solution 6: soln\n",
362 | "for col in range(a.shape[1]):\n",
363 | " a[:, col] = a[:,1]\n",
364 | "\n",
365 | "print(a)"
366 | ]
367 | }
368 | ],
369 | "metadata": {
370 | "continuum": {
371 | "depends": [
372 | "np_slicing",
373 | "np_intro",
374 | "np_ex_slicing",
375 | "ip_essentials",
376 | "ip_datatypes"
377 | ],
378 | "requires": [],
379 | "tag": "np_ex_slicing_soln"
380 | },
381 | "kernelspec": {
382 | "display_name": "Python 3",
383 | "language": "python",
384 | "name": "python3"
385 | },
386 | "language_info": {
387 | "codemirror_mode": {
388 | "name": "ipython",
389 | "version": 3
390 | },
391 | "file_extension": ".py",
392 | "mimetype": "text/x-python",
393 | "name": "python",
394 | "nbconvert_exporter": "python",
395 | "pygments_lexer": "ipython3",
396 | "version": "3.4.4"
397 | }
398 | },
399 | "nbformat": 4,
400 | "nbformat_minor": 0
401 | }
402 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/EX03_VectorizedOperations.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Exercise: Numpy Vectorized Operations](#Exercise:-Numpy-Vectorized-Operations)\n",
9 | "\t* [Part 1: Timeit](#Part-1:-Timeit)\n",
10 | "\t* [Part 2: Ufuncs and Plotting](#Part-2:-Ufuncs-and-Plotting)\n",
11 | "\t\t* [Part 2.1](#Part-2.1)\n",
12 | "\t\t* [Part 2.2](#Part-2.2)\n",
13 | "\t* [Part 3: All Disappear](#Part-3:-All-Disappear)\n",
14 | "\t* [Part 4: Wallis Formula](#Part-4:-Wallis-Formula)\n",
15 | "\t\t* [Part 4.1](#Part-4.1)\n",
16 | "\t\t* [Part 4.2](#Part-4.2)\n",
17 | "\t\t* [Part 4.3](#Part-4.3)\n"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "# Exercise: Numpy Vectorized Operations"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "metadata": {
31 | "collapsed": false
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import numpy as np"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "## Part 1: Timeit"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]` and with Numpy. \n",
50 | "\n",
51 | "Time how long it takes to multiply each sequence by `np.pi`."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [],
61 | "source": [
62 | "# Solution 1:\n"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "collapsed": false
70 | },
71 | "outputs": [],
72 | "source": [
73 | "%%timeit"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "## Part 2: Ufuncs and Plotting"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "collapsed": true
88 | },
89 | "outputs": [],
90 | "source": [
91 | "import matplotlib.pyplot as plt\n",
92 | "%matplotlib inline"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "Use numpy and matplotlib for the following:\n",
100 | "* **numpy** allows us to easily compute expressions like\n",
101 | "> $y=x^2$ using vectorized expression `y = x**2` where x is a numpy array\n",
102 | "\n",
103 | "* **matplotlib** lets us graph xy-values very quickly using: \n",
104 | "> `plt.plot(x, y)` where `x = [x1, x2, x3, ...]`, and `y = [y1, y2, y3, ...]` \n",
105 | "\n",
106 | "* Repeated `plt.plot` commands will go to the same graph. "
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "### Part 2.1"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "Graph the following functions on the interval [-2.0, 2.0):\n",
121 | "\n",
122 | " * $y=x + 1$\n",
123 | " * $y=e^x$\n",
124 | " * $y=cos(x^2) + sin(x^2)$\n",
125 | " * $y=cos(x)^2 + sin(x)^2$"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "collapsed": false
133 | },
134 | "outputs": [],
135 | "source": [
136 | "# Solution 2.1:\n"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "### Part 2.2"
144 | ]
145 | },
146 | {
147 | "cell_type": "markdown",
148 | "metadata": {},
149 | "source": [
150 | "Graph a parametric equation over $t$ on $[0,2\\pi]$ defined by:\n",
151 | " \n",
152 | " * $y(t) = sin(t)$\n",
153 | " * $x(t) = cos(t)$\n",
154 | "\n",
155 | "You may want to issue a matplotlib statement: \n",
156 | "> `plot.axis(\"equal\")` \n",
157 | "\n",
158 | "to ensure you don't get a skewed perspective on your result."
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {
165 | "collapsed": false
166 | },
167 | "outputs": [],
168 | "source": [
169 | "# Solution 2.2\n"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "## Part 3: All Disappear"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "A. Suppose, *poof*, `arr.all()` (and `np.all()`) just disappeared. Write a function `myAll` that replaces them.\n",
184 | "\n",
185 | "B. Define a function `noneTrue` that returns `True` when no element of an array is `True` and `False` otherwise."
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [],
195 | "source": [
196 | "# Reminder\n",
197 | "bool(1)"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {
204 | "collapsed": false
205 | },
206 | "outputs": [],
207 | "source": [
208 | "# Solution 3A:\n"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "collapsed": false
216 | },
217 | "outputs": [],
218 | "source": [
219 | "# Solution 3B:\n"
220 | ]
221 | },
222 | {
223 | "cell_type": "markdown",
224 | "metadata": {},
225 | "source": [
226 | "## Part 4: Wallis Formula"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "The value of $\\pi$ can be computed with the Wallis formula, developed in 1655.\n",
234 | "\n",
235 | "$$\\pi=2\\prod_{i=1}^{\\infty}\\frac{4i^2}{4i^2-1}$$"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "### Part 4.1"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {},
248 | "source": [
249 | "Implement this method using native Python"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "collapsed": false,
257 | "raises": "IndentationError"
258 | },
259 | "outputs": [],
260 | "source": [
261 | "# Solution 4.1\n",
262 | "def py_wallis(n):\n",
263 | " # your solution goes here\n",
264 | "\n",
265 | "print(py_wallis(100000))"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "### Part 4.2"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "Implement this method using Numpy vectorization."
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {
286 | "collapsed": false,
287 | "raises": "IndentationError"
288 | },
289 | "outputs": [],
290 | "source": [
291 | "# Solution 4.2\n",
292 | "def np_wallis(n):\n",
293 | " # your solution goes here\n",
294 | "\n",
295 | "print(np_wallis(100000))"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {},
301 | "source": [
302 | "### Part 4.3"
303 | ]
304 | },
305 | {
306 | "cell_type": "markdown",
307 | "metadata": {},
308 | "source": [
309 | "How much faster is the Numpy implementation?"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": null,
315 | "metadata": {
316 | "collapsed": true
317 | },
318 | "outputs": [],
319 | "source": [
320 | "# Solution 4.3\n"
321 | ]
322 | }
323 | ],
324 | "metadata": {
325 | "celltoolbar": "Edit Metadata",
326 | "continuum": {
327 | "depends": [
328 | "np_vectorization",
329 | "np_intro",
330 | "ip_essentials",
331 | "ip_datatypes",
332 | "ip_containers",
333 | "ip_functions"
334 | ],
335 | "requires": [],
336 | "tag": "np_ex_vectorization"
337 | },
338 | "kernelspec": {
339 | "display_name": "Python 2",
340 | "language": "python",
341 | "name": "python2"
342 | },
343 | "language_info": {
344 | "codemirror_mode": {
345 | "name": "ipython",
346 | "version": 2
347 | },
348 | "file_extension": ".py",
349 | "mimetype": "text/x-python",
350 | "name": "python",
351 | "nbconvert_exporter": "python",
352 | "pygments_lexer": "ipython2",
353 | "version": "2.7.11"
354 | }
355 | },
356 | "nbformat": 4,
357 | "nbformat_minor": 0
358 | }
359 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/EX03_VectorizedOperations_soln.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Exercise: Numpy Vectorized Operations](#Exercise:-Numpy-Vectorized-Operations)\n",
9 | "\t* [Part 1: Timeit](#Part-1:-Timeit)\n",
10 | "\t* [Part 2: Ufuncs and Plotting](#Part-2:-Ufuncs-and-Plotting)\n",
11 | "\t\t* [Part 2.1](#Part-2.1)\n",
12 | "\t\t* [Part 2.2](#Part-2.2)\n",
13 | "\t* [Part 3: All Disappear](#Part-3:-All-Disappear)\n",
14 | "\t* [Part 4: Wallis Formula](#Part-4:-Wallis-Formula)\n",
15 | "\t\t* [Part 4.1](#Part-4.1)\n",
16 | "\t\t* [Part 4.2](#Part-4.2)\n",
17 | "\t\t* [Part 4.3](#Part-4.3)\n"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "# Exercise: Numpy Vectorized Operations"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "metadata": {
31 | "collapsed": false
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import numpy as np"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "## Part 1: Timeit"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "Create a Python list with the floating-point values `[1.0, 2.0, 3.0, ..., 1E6]` and with Numpy. \n",
50 | "\n",
51 | "Time how long it takes to multiply each sequence by `np.pi`."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [],
61 | "source": [
62 | "# Solution 1:\n",
63 | "\n",
64 | "list1 = [x for x in range(1000000)]\n",
65 | "array1 = np.arange(1000000)"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "collapsed": false
73 | },
74 | "outputs": [],
75 | "source": [
76 | "%%timeit\n",
77 | "list2 = [x*np.pi for x in list1]"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {
84 | "collapsed": false
85 | },
86 | "outputs": [],
87 | "source": [
88 | "%%timeit\n",
89 | "array2 = np.pi*array1"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "## Part 2: Ufuncs and Plotting"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {
103 | "collapsed": true
104 | },
105 | "outputs": [],
106 | "source": [
107 | "import matplotlib.pyplot as plt\n",
108 | "%matplotlib inline"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "Use numpy and matplotlib for the following:\n",
116 | "* **numpy** allows us to easily compute expressions like\n",
117 | "> $y=x^2$ using vectorized expression `y = x**2` where x is a numpy array\n",
118 | "\n",
119 | "* **matplotlib** lets us graph xy-values very quickly using: \n",
120 | "> `plt.plot(x, y)` where `x = [x1, x2, x3, ...]`, and `y = [y1, y2, y3, ...]` \n",
121 | "\n",
122 | "* Repeated `plt.plot` commands will go to the same graph. "
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "### Part 2.1"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "Graph the following functions on the interval [-2.0, 2.0):\n",
137 | "\n",
138 | " * $y=x + 1$\n",
139 | " * $y=e^x$\n",
140 | " * $y=cos(x^2) + sin(x^2)$\n",
141 | " * $y=cos(x)^2 + sin(x)^2$"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [],
151 | "source": [
152 | "# Solution 2.1:\n",
153 | "x = np.arange(-2,2, 0.01)\n",
154 | "\n",
155 | "y1 = x + 1\n",
156 | "y2 = np.exp(x)\n",
157 | "y3 = np.cos(x**2) + np.sin(x**2)\n",
158 | "y4 = np.cos(x)**2 + np.sin(x)**2\n",
159 | "\n",
160 | "plt.plot(x,y1,\n",
161 | " x,y2,\n",
162 | " x,y3,\n",
163 | " x,y4)"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "### Part 2.2"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "Graph a parametric equation over $t$ on $[0,2\\pi]$ defined by:\n",
178 | " \n",
179 | " * $y(t) = sin(t)$\n",
180 | " * $x(t) = cos(t)$\n",
181 | "\n",
182 | "You may want to issue a matplotlib statement: \n",
183 | "> `plot.axis(\"equal\")` \n",
184 | "\n",
185 | "to ensure you don't get a skewed perspective on your result."
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [],
195 | "source": [
196 | "# Solution 2.2\n",
197 | "t = np.linspace(-np.pi, +np.pi, 1000)\n",
198 | "y = np.sin(t)\n",
199 | "x = np.cos(t)\n",
200 | "plt.plot(x,y)"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "## Part 3: All Disappear"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {},
213 | "source": [
214 | "A. Suppose, *poof*, `arr.all()` (and `np.all()`) just disappeared. Write a function `myAll` that replaces them.\n",
215 | "\n",
216 | "B. Define a function `noneTrue` that returns `True` when no element of an array is `True` and `False` otherwise."
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {
223 | "collapsed": false
224 | },
225 | "outputs": [],
226 | "source": [
227 | "# Reminder\n",
228 | "bool(1)"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {
235 | "collapsed": false
236 | },
237 | "outputs": [],
238 | "source": [
239 | "# Solution 3A: all_true == not_any_false\n",
240 | "\n",
241 | "def not_any_false(x):\n",
242 | " return not any(x==False)\n",
243 | "\n",
244 | "x1 = np.array([1,1])\n",
245 | "x2 = np.array([0,1])\n",
246 | "x3 = np.array([0,0])\n",
247 | "\n",
248 | "print( not_any_false(x1) )\n",
249 | "print( not_any_false(x2) )\n",
250 | "print( not_any_false(x3) )"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "metadata": {
257 | "collapsed": false
258 | },
259 | "outputs": [],
260 | "source": [
261 | "# Solution 3B: not_any_true\n",
262 | "\n",
263 | "def not_any_true(x):\n",
264 | " return not any(x==True)\n",
265 | "\n",
266 | "x1 = np.array([1,1])\n",
267 | "x2 = np.array([0,1])\n",
268 | "x3 = np.array([0,0])\n",
269 | "\n",
270 | "print( not_any_true(x1) )\n",
271 | "print( not_any_true(x2) )\n",
272 | "print( not_any_true(x3) ) "
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "## Part 4: Wallis Formula"
280 | ]
281 | },
282 | {
283 | "cell_type": "markdown",
284 | "metadata": {},
285 | "source": [
286 | "The value of $\\pi$ can be computed with the Wallis formula, developed in 1655.\n",
287 | "\n",
288 | "$$\\pi=2\\prod_{i=1}^{\\infty}\\frac{4i^2}{4i^2-1}$$"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "### Part 4.1"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {},
301 | "source": [
302 | "Implement this method using native Python"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": null,
308 | "metadata": {
309 | "collapsed": false
310 | },
311 | "outputs": [],
312 | "source": [
313 | "# Solution 4.1\n",
314 | "def py_wallis(n):\n",
315 | " prod = 1.0\n",
316 | " for i in range(1,n):\n",
317 | " term1 = 4*(i**2)\n",
318 | " prod = prod * term1/(term1-1)\n",
319 | " return 2*prod\n",
320 | "\n",
321 | "print(py_wallis(100000))"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "metadata": {},
327 | "source": [
328 | "### Part 4.2"
329 | ]
330 | },
331 | {
332 | "cell_type": "markdown",
333 | "metadata": {},
334 | "source": [
335 | "Implement this method using Numpy vectorization."
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": null,
341 | "metadata": {
342 | "collapsed": false
343 | },
344 | "outputs": [],
345 | "source": [
346 | "# Solution 4.2\n",
347 | "def np_wallis(n):\n",
348 | " x = np.arange(1,n)\n",
349 | " y = (4*x**2)/(4*x**2 - 1)\n",
350 | " z = 2.0*y.prod()\n",
351 | " return z\n",
352 | "\n",
353 | "print(np_wallis(100000))"
354 | ]
355 | },
356 | {
357 | "cell_type": "markdown",
358 | "metadata": {},
359 | "source": [
360 | "### Part 4.3"
361 | ]
362 | },
363 | {
364 | "cell_type": "markdown",
365 | "metadata": {},
366 | "source": [
367 | "How much faster is the Numpy implementation?"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": null,
373 | "metadata": {
374 | "collapsed": true
375 | },
376 | "outputs": [],
377 | "source": [
378 | "# Solution 4.3"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": null,
384 | "metadata": {
385 | "collapsed": false
386 | },
387 | "outputs": [],
388 | "source": [
389 | "n = 1000\n",
390 | "%timeit pi = py_wallis(n)"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": null,
396 | "metadata": {
397 | "collapsed": false
398 | },
399 | "outputs": [],
400 | "source": [
401 | "n = 1000\n",
402 | "%timeit pi = np_wallis(n)"
403 | ]
404 | }
405 | ],
406 | "metadata": {
407 | "continuum": {
408 | "depends": [
409 | "np_vectorization",
410 | "np_intro",
411 | "np_ex_vectorization",
412 | "ip_essentials",
413 | "ip_datatypes",
414 | "ip_containers",
415 | "ip_functions"
416 | ],
417 | "requires": [],
418 | "tag": "np_ex_vectorization_soln"
419 | },
420 | "kernelspec": {
421 | "display_name": "Python 2",
422 | "language": "python",
423 | "name": "python2"
424 | },
425 | "language_info": {
426 | "codemirror_mode": {
427 | "name": "ipython",
428 | "version": 2
429 | },
430 | "file_extension": ".py",
431 | "mimetype": "text/x-python",
432 | "name": "python",
433 | "nbconvert_exporter": "python",
434 | "pygments_lexer": "ipython2",
435 | "version": "2.7.11"
436 | }
437 | },
438 | "nbformat": 4,
439 | "nbformat_minor": 0
440 | }
441 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/broadcasting2D.lightbg.scaled-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/broadcasting2D.lightbg.scaled-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/ecosystem.lightbg.scaled-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/ecosystem.lightbg.scaled-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/mef_numpy_selection-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/mef_numpy_selection-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/mef_numpy_slice_01-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/mef_numpy_slice_01-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/mef_numpy_slice_02-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/mef_numpy_slice_02-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/numpyzerosdims-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/numpyzerosdims-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/img/ufunc.lightbg.scaled-noalpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/img/ufunc.lightbg.scaled-noalpha.png
--------------------------------------------------------------------------------
/2-NumPy_SciPy/NumPy/tmp/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/2-NumPy_SciPy/NumPy/tmp/.keep
--------------------------------------------------------------------------------
/2-NumPy_SciPy/README.md:
--------------------------------------------------------------------------------
1 | NumPy and SciPy tutorial materials
--------------------------------------------------------------------------------
/2-NumPy_SciPy/SciPy/1_Introduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Solving mathematical problems with Scientific Python](#Solving-mathematical-problems-with-Scientific-Python)\n",
9 | "* [SciPy - Library of scientific algorithms for Python](#SciPy---Library-of-scientific-algorithms-for-Python)\n",
10 | "\t* [Introduction](#Introduction)\n",
11 | "\t* [Further Reading](#Further-Reading)\n",
12 | "\t* [Special functions](#Special-functions)\n"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "# Solving mathematical problems with Scientific Python"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# SciPy - Library of scientific algorithms for Python"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "This curriculum builds on material by J. Robert Johansson from his \"Introduction to scientific computing with Python,\" generously made available under a [Creative Commons Attribution 3.0 Unported License](http://creativecommons.org/licenses/by/3.0/) at https://github.com/jrjohansson/scientific-python-lectures. The Continuum Analytics enhancements use the [Creative Commons Attribution-NonCommercial 4.0 International License](https://creativecommons.org/licenses/by-nc/4.0/).\n",
34 | "\n",
35 | "****\n"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "## Introduction"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "The SciPy framework builds on top of the low-level NumPy framework for multidimensional arrays, and provides a large number of higher-level scientific algorithms. Some of the topics that SciPy covers are:\n",
50 | "\n",
51 | "* Special functions ([scipy.special](http://docs.scipy.org/doc/scipy/reference/special.html))\n",
52 | "* Integration ([scipy.integrate](http://docs.scipy.org/doc/scipy/reference/integrate.html))\n",
53 | "* Optimization ([scipy.optimize](http://docs.scipy.org/doc/scipy/reference/optimize.html))\n",
54 | "* Interpolation ([scipy.interpolate](http://docs.scipy.org/doc/scipy/reference/interpolate.html))\n",
55 | "* Fourier Transforms ([scipy.fftpack](http://docs.scipy.org/doc/scipy/reference/fftpack.html))\n",
56 | "* Signal Processing ([scipy.signal](http://docs.scipy.org/doc/scipy/reference/signal.html))\n",
57 | "* Linear Algebra ([scipy.linalg](http://docs.scipy.org/doc/scipy/reference/linalg.html))\n",
58 | "* Sparse Eigenvalue Problems ([scipy.sparse](http://docs.scipy.org/doc/scipy/reference/sparse.html))\n",
59 | "* Statistics ([scipy.stats](http://docs.scipy.org/doc/scipy/reference/stats.html))\n",
60 | "* Multi-dimensional image processing ([scipy.ndimage](http://docs.scipy.org/doc/scipy/reference/ndimage.html))\n",
61 | "* File IO ([scipy.io](http://docs.scipy.org/doc/scipy/reference/io.html))\n",
62 | "\n",
63 | "Each of these submodules provides a number of functions and classes that can be used to solve problems in their respective topics.\n",
64 | "\n",
65 | "In this tutorial, we will look at how to use some of these subpackages."
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "## Further Reading"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "* http://www.scipy.org - The official web page for the SciPy project.\n",
80 | "* http://docs.scipy.org/doc/scipy/reference/tutorial/index.html - A tutorial on how to get started using SciPy. \n",
81 | "* https://github.com/scipy/scipy/ - The SciPy source code."
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "If we only need to use part of the SciPy framework, we can selectively include only those modules we are interested in. For example, to include the linear algebra package under the name `la`, we can do:\n",
89 | "\n",
90 | "```python\n",
91 | "import scipy.linalg as la\n",
92 | "```"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "While running `from scipy import *` may be convenient it is not recommended particularly because this will also import all of Numpy into the global namespace."
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {},
105 | "source": [
106 | "## Special functions"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "A large number of mathematical special functions are important for many computional physics problems. SciPy provides implementations of a very extensive set of special functions. For details, see the list of functions in the reference documentation at http://docs.scipy.org/doc/scipy/reference/special.html#module-scipy.special. \n",
114 | "\n",
115 | "To demonstrate the typical usage of special functions, we will look in more detail at the Bessel functions:"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "collapsed": false
123 | },
124 | "outputs": [],
125 | "source": [
126 | "#\n",
127 | "# The scipy.special module includes a large number of Bessel functions\n",
128 | "# Here we will use the functions jn and yn, which are the Bessel functions \n",
129 | "# of the first and second kind and real-valued order. We also include the \n",
130 | "# function jn_zeros and yn_zeros that gives the zeroes of the functions jn\n",
131 | "# and yn. Bessel functions are useful in partial differential equations,\n",
132 | "# like the wave equation.\n",
133 | "from scipy.special import jn, yn, jn_zeros, yn_zeros\n",
134 | "import numpy as np\n",
135 | "import matplotlib.pyplot as plt\n",
136 | "%matplotlib inline"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {
143 | "collapsed": false
144 | },
145 | "outputs": [],
146 | "source": [
147 | "n = 0 # order\n",
148 | "x = 0.0\n",
149 | "\n",
150 | "# Bessel function of first kind\n",
151 | "print(\"J_%d(%f) = %f\" % (n, x, jn(n, x)))\n",
152 | "\n",
153 | "x = 1.0\n",
154 | "# Bessel function of second kind\n",
155 | "print(\"Y_%d(%f) = %f\" % (n, x, yn(n, x)))"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "collapsed": false
163 | },
164 | "outputs": [],
165 | "source": [
166 | "x = np.linspace(0, 10, 100)\n",
167 | "\n",
168 | "fig, ax = plt.subplots()\n",
169 | "for n in range(4):\n",
170 | " ax.plot(x, jn(n, x), label=r\"$J_%d(x)$\" % n)\n",
171 | "ax.legend();"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {
178 | "collapsed": false
179 | },
180 | "outputs": [],
181 | "source": [
182 | "# zeros of Bessel functions\n",
183 | "n = 0 # order\n",
184 | "m = 4 # number of roots to compute\n",
185 | "jn_zeros(n, m)"
186 | ]
187 | }
188 | ],
189 | "metadata": {
190 | "anaconda-cloud": {},
191 | "continuum": {
192 | "depends": [
193 | "np_join"
194 | ],
195 | "tag": "math_intro"
196 | },
197 | "kernelspec": {
198 | "display_name": "Python [conda env:python3]",
199 | "language": "python",
200 | "name": "conda-env-python3-py"
201 | },
202 | "language_info": {
203 | "codemirror_mode": {
204 | "name": "ipython",
205 | "version": 3
206 | },
207 | "file_extension": ".py",
208 | "mimetype": "text/x-python",
209 | "name": "python",
210 | "nbconvert_exporter": "python",
211 | "pygments_lexer": "ipython3",
212 | "version": "3.5.2"
213 | }
214 | },
215 | "nbformat": 4,
216 | "nbformat_minor": 0
217 | }
218 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/SciPy/2_Integration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "* [Integration](#Integration)\n",
10 | "\t* [Numerical integration: quadrature](#Numerical-integration:-quadrature)\n"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "# Learning Objectives:"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "After completion of this module, learners should be able to:\n",
25 | "\n",
26 | "* compute numerical integrals (quadrature) and solutions of initial-value ordinary differential equations"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "# Integration"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "## Numerical integration: quadrature"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "Numerical evaluation of a function of the type\n",
48 | "\n",
49 | "$\\displaystyle \\int_a^b f(x) dx$\n",
50 | "\n",
51 | "is called *numerical quadrature*, or simply *quadrature*. SciPy provides a series of functions for different kind of quadrature, for example the `quad`, `dblquad` and `tplquad` for single, double and triple integrals, respectively.\n",
52 | "\n"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {
59 | "collapsed": false
60 | },
61 | "outputs": [],
62 | "source": [
63 | "from scipy import Inf\n",
64 | "from scipy.special import jn, yn, jn_zeros, yn_zeros\n",
65 | "from scipy.integrate import quad, dblquad, tplquad\n",
66 | "import numpy as np"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {},
72 | "source": [
73 | "The `quad` function takes a large number of optional arguments which can be used to fine-tune the behavior of the function (try `help(quad)` for details).\n",
74 | "\n",
75 | "The basic usage is as follows:"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "metadata": {
82 | "collapsed": false
83 | },
84 | "outputs": [],
85 | "source": [
86 | "# define a simple function for the integrand\n",
87 | "def f(x):\n",
88 | " return x"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {
95 | "collapsed": false
96 | },
97 | "outputs": [],
98 | "source": [
99 | "x_lower = 0 # the lower limit of x\n",
100 | "x_upper = 1 # the upper limit of x\n",
101 | "\n",
102 | "val, abserr = quad(f, x_lower, x_upper)\n",
103 | "\n",
104 | "print(\"integral value =\", val, \", absolute error =\", abserr )"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "If we need to pass extra arguments to the integrand function, we can use the `args` keyword argument:"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {
118 | "collapsed": false
119 | },
120 | "outputs": [],
121 | "source": [
122 | "def integrand(x, n):\n",
123 | " \"\"\"\n",
124 | " Bessel function of first kind and order n. \n",
125 | " \"\"\"\n",
126 | " return jn(n, x)\n",
127 | "\n",
128 | "\n",
129 | "x_lower = 0 # the lower limit of x\n",
130 | "x_upper = 10 # the upper limit of x\n",
131 | "\n",
132 | "val, abserr = quad(integrand, x_lower, x_upper, args=(3,))\n",
133 | "\n",
134 | "print(val, abserr)"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "For simple functions, we can use a lambda function (nameless function) instead of explicitly defining a function for the integrand:"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [],
151 | "source": [
152 | "val, abserr = quad(lambda x: np.exp(-x ** 2), -Inf, Inf)\n",
153 | "\n",
154 | "print(\"numerical =\", val, abserr)\n",
155 | "\n",
156 | "analytical = np.sqrt(np.pi)\n",
157 | "print(\"analytical =\", analytical)"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "As shown in the example above, we can also use 'Inf' or '-Inf' as integral limits.\n",
165 | "\n",
166 | "Higher-dimensional integration works in the same way:"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {
173 | "collapsed": false
174 | },
175 | "outputs": [],
176 | "source": [
177 | "def integrand(x, y):\n",
178 | " return np.exp(-x**2-y**2)\n",
179 | "\n",
180 | "x_lower = 0 \n",
181 | "x_upper = 10\n",
182 | "y_lower = 0\n",
183 | "y_upper = 10\n",
184 | "\n",
185 | "val, abserr = dblquad(integrand, x_lower, x_upper, lambda x : y_lower, lambda x: y_upper)\n",
186 | "\n",
187 | "print(val, abserr)"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "metadata": {},
193 | "source": [
194 | "Note how we had to pass lambda functions for the limits for the y integration, since these in general can be functions of x."
195 | ]
196 | }
197 | ],
198 | "metadata": {
199 | "continuum": {
200 | "depends": [
201 | "math_intro"
202 | ],
203 | "tag": "math_integration"
204 | },
205 | "kernelspec": {
206 | "display_name": "Python [conda env:python3]",
207 | "language": "python",
208 | "name": "conda-env-python3-py"
209 | },
210 | "language_info": {
211 | "codemirror_mode": {
212 | "name": "ipython",
213 | "version": 3
214 | },
215 | "file_extension": ".py",
216 | "mimetype": "text/x-python",
217 | "name": "python",
218 | "nbconvert_exporter": "python",
219 | "pygments_lexer": "ipython3",
220 | "version": "3.5.2"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 0
225 | }
226 |
--------------------------------------------------------------------------------
/2-NumPy_SciPy/SciPy/3_FFT.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "* [Fourier transform](#Fourier-transform)\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# Learning Objectives:"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "After completion of this module, learners should be able to:\n",
24 | "\n",
25 | "* figure out how to apply Python library functions for statistical tests, for special functions, and for integral transforms (e.g., FFTs)"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "# Fourier transform"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "Fourier transforms are one of the universal tools in computational physics; they appear over and over again in different contexts. SciPy provides functions for accessing the classic [FFTPACK](http://www.netlib.org/fftpack/) library from NetLib, an efficient and well tested FFT library written in FORTRAN. The SciPy API has a few additional convenience functions, but overall the API is closely related to the original FORTRAN library.\n",
40 | "\n",
41 | "To use the `fftpack` module in a python program, include it using:"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {
48 | "collapsed": false
49 | },
50 | "outputs": [],
51 | "source": [
52 | "import numpy as np\n",
53 | "import scipy.fftpack as fft\n",
54 | "import matplotlib.pyplot as plt\n",
55 | "%matplotlib inline"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "To demonstrate how to do a fast Fourier transform with SciPy, let's look at the FFT of the solution to the damped oscillator from the previous section:"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "collapsed": true
70 | },
71 | "outputs": [],
72 | "source": [
73 | "from scipy.integrate import odeint\n",
74 | "def dy(y, t, zeta, w0):\n",
75 | " \"\"\"\n",
76 | " The right-hand side of the damped oscillator ODE\n",
77 | " \"\"\"\n",
78 | " x, p = y[0], y[1]\n",
79 | " \n",
80 | " dx = p\n",
81 | " dp = -2 * zeta * w0 * p - w0**2 * x\n",
82 | "\n",
83 | " return [dx, dp]\n",
84 | "y0 = [1.0, 0.0]\n",
85 | "t = np.linspace(0, 10, 1000)\n",
86 | "w0 = 2*np.pi*1.0\n",
87 | "y1 = odeint(dy, y0, t, args=(0.0, w0)) # undamped\n",
88 | "y2 = odeint(dy, y0, t, args=(0.2, w0)) # under damped\n",
89 | "y3 = odeint(dy, y0, t, args=(1.0, w0)) # critial damping\n",
90 | "y4 = odeint(dy, y0, t, args=(5.0, w0)) # over damped"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {
97 | "collapsed": false
98 | },
99 | "outputs": [],
100 | "source": [
101 | "fig, ax = plt.subplots()\n",
102 | "ax.plot(t, y1[:,0], 'k', label=\"undamped\", linewidth=0.25)\n",
103 | "ax.plot(t, y2[:,0], 'r', label=\"under damped\")\n",
104 | "ax.plot(t, y3[:,0], 'b', label=r\"critical damping\")\n",
105 | "ax.plot(t, y4[:,0], 'g', label=\"over damped\")\n",
106 | "ax.legend();"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {
113 | "collapsed": false
114 | },
115 | "outputs": [],
116 | "source": [
117 | "N = len(t)\n",
118 | "dt = t[1]-t[0]\n",
119 | "\n",
120 | "# calculate the fast fourier transform\n",
121 | "# y2 is the solution to the under-damped oscillator from the previous section\n",
122 | "F = fft.fft(y2[:,0]) \n",
123 | "\n",
124 | "# calculate the frequencies for the components in F\n",
125 | "w = fft.fftfreq(N, dt)"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "collapsed": false
133 | },
134 | "outputs": [],
135 | "source": [
136 | "fig, ax = plt.subplots(figsize=(9,3))\n",
137 | "ax.plot(w, abs(F));"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "Since the signal is real, the spectrum is symmetric. We therefore only need to plot the part that corresponds to the postive frequencies. To extract that part of the `w` and `F`, we can use some of the indexing tricks for NumPy arrays we saw in Lecture 2:"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "metadata": {
151 | "collapsed": false
152 | },
153 | "outputs": [],
154 | "source": [
155 | "# select only indices for elements that corresponds to positive frequencies\n",
156 | "indices = np.where(w > 0) \n",
157 | "w_pos = w[indices]\n",
158 | "F_pos = F[indices]"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {
165 | "collapsed": false
166 | },
167 | "outputs": [],
168 | "source": [
169 | "fig, ax = plt.subplots(figsize=(9,3))\n",
170 | "ax.plot(w_pos, abs(F_pos))\n",
171 | "ax.set_xlim(0, 5);"
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {},
177 | "source": [
178 | "As expected, we now see a peak in the spectrum that is centered around 1, which is the frequency we used in the damped oscillator example."
179 | ]
180 | }
181 | ],
182 | "metadata": {
183 | "continuum": {
184 | "depends": [
185 | "math_intro"
186 | ],
187 | "tag": "math_fft"
188 | },
189 | "kernelspec": {
190 | "display_name": "Python [conda env:python3]",
191 | "language": "python",
192 | "name": "conda-env-python3-py"
193 | },
194 | "language_info": {
195 | "codemirror_mode": {
196 | "name": "ipython",
197 | "version": 3
198 | },
199 | "file_extension": ".py",
200 | "mimetype": "text/x-python",
201 | "name": "python",
202 | "nbconvert_exporter": "python",
203 | "pygments_lexer": "ipython3",
204 | "version": "3.5.2"
205 | }
206 | },
207 | "nbformat": 4,
208 | "nbformat_minor": 0
209 | }
210 |
--------------------------------------------------------------------------------
/3-Analytics/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/README.md
--------------------------------------------------------------------------------
/3-Analytics/pandas/EX01_ExcelFiles.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives](#Learning-Objectives)\n",
9 | "* [Pandas Exercise 3: Relational Normalization](#Pandas-Exercise-3:-Relational-Normalization)\n",
10 | "\t* [Background on Reading Excel](#Background-on-Reading-Excel)\n",
11 | "\t* [Background on Relational Normalization](#Background-on-Relational-Normalization)\n",
12 | "\t* [Background on Categorical Data](#Background-on-Categorical-Data)\n",
13 | "\t* [Set-up](#Set-up)\n",
14 | "\t* [Part 1: Read the data](#Part-1:-Read-the-data)\n",
15 | "\t* [Part 2: Normalize](#Part-2:-Normalize)\n",
16 | "\t* [Part 3: Create a Sqlite3 database](#Part-3:-Create-a-Sqlite3-database)\n",
17 | "\t* [Part 4: Compare file sizes](#Part-4:-Compare-file-sizes)\n",
18 | "\t* [Part 5: Optional](#Part-5:-Optional)\n",
19 | "\t* [Part 6: Optional](#Part-6:-Optional)\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# Learning Objectives"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "After completion of this module, learners should be able to:\n",
34 | "* list various python modules used for reading Excel files\n",
35 | "* read an Excel data file into a pandas DataFrame\n",
36 | "* use categorials and other techniques to reduce data size\n",
37 | "* use pandas to convert an Excel file into an Sqlite database file"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "# Pandas Exercise 3: Relational Normalization"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "## Background on Reading Excel"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "There are several 3rd party Python modules for working with Microsoft Excel spreadsheets. A list of them is collected at:\n",
59 | "\n",
60 | "* [Working with Excel Files in Python](http://www.python-excel.org/)\n",
61 | "\n",
62 | "I've used [openpyxl](https://openpyxl.readthedocs.org/en/latest/) successfully in some projects.\n",
63 | "\n",
64 | "However, within the Scientific Python toolstack, the most common way of accessing the Excel format is the [Pandas](http://pandas.pydata.org/) framework. This is heavier weight than other options if all you wanted to do was read Excel, but in a scientific context, you already need most of the requirements (NumPy, etc), and you probably want to be using Pandas for numerous other purposes anyway.\n",
65 | "\n",
66 | "Pandas relies internally uses `xlrd` to read Excel files, but provides a higher-level wrapper. You probably need to run:\n",
67 | "\n",
68 | "```bash\n",
69 | "conda install xlrd\n",
70 | "```"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "## Background on Relational Normalization"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "Description from [Wikipedia](https://en.wikipedia.org/wiki/Database_normalization):\n",
85 | "> *Database normalization ... is the process of organizing the columns (attributes) and tables (relations) of a relational database to minimize data redundancy. Normalization involves decomposing a table into less redundant tables without losing information*"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "## Background on Categorical Data"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "Description from the [documentation](https://pandas-docs.github.io/pandas-docs-travis/categorical.html):\n",
100 | "\n",
101 | "> *Categoricals are a pandas data type, which correspond to categorical variables in statistics: **a variable, which can take on only a limited, and usually fixed, number of possible values** (categories; levels in R). Examples are gender, social class, blood types, country affiliations, observation time or ratings via Likert scales.*"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": [
112 | "# Categorical example: notice the counts for each category\n",
113 | "import pandas as pd\n",
114 | "s = pd.Series(pd.Categorical([\"a\",\"b\",\"c\",\"c\",\"e\"], categories=[\"c\",\"a\",\"b\",\"d\"]))\n",
115 | "\n",
116 | "s.value_counts()"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {
123 | "collapsed": false
124 | },
125 | "outputs": [],
126 | "source": [
127 | "# Categorical example: notice the NaN for a value that did not match any category\n",
128 | "\n",
129 | "s"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "## Set-up"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {
143 | "collapsed": false
144 | },
145 | "outputs": [],
146 | "source": [
147 | "## Optional: Uncomment to install the python module `xlrd` for reading Excel files\n",
148 | "## Recommendation: use the built-in pandas methods instead.\n",
149 | "\n",
150 | "# !conda install -y xlrd"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {
157 | "collapsed": true
158 | },
159 | "outputs": [],
160 | "source": [
161 | "# Required: imports needed in this exercise\n",
162 | "%matplotlib inline\n",
163 | "import pandas as pd"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "## Part 1: Read the data"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {},
176 | "source": [
177 | "Read the NYC Harbor data from the excel data file ``data/nyc_harbor_wq_2006-2014.xlsx`` into DataFrame.\n",
178 | "\n",
179 | "*Note: This Excel file is roughly 24 MB in size, contining 300k rows of largely categorical data. It may take some time to load...*"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "collapsed": false,
187 | "scrolled": true
188 | },
189 | "outputs": [],
190 | "source": [
191 | "# Solution:\n"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {},
197 | "source": [
198 | "## Part 2: Normalize"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "A large fraction of all values in a given column are duplicates.\n",
206 | "* Use the unique `STATION` values as categories to reduce data duplication stored in memory"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {
213 | "collapsed": false
214 | },
215 | "outputs": [],
216 | "source": [
217 | "# Solution:\n"
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "## Part 3: Create a Sqlite3 database"
225 | ]
226 | },
227 | {
228 | "cell_type": "markdown",
229 | "metadata": {},
230 | "source": [
231 | "Using the NYC Harbor data set, create an Sqlite3 single-file database containing all of the data inside the spreadsheet.\n",
232 | "\n",
233 | "* Store the data in its native types per column/cell (Pandas does a good job of inferring data types)"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "collapsed": true
241 | },
242 | "outputs": [],
243 | "source": [
244 | "#Solution\n"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "## Part 4: Compare file sizes"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "Write code that compares the file size of the resulting sqlite3 file compared to the original Excel file."
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": null,
264 | "metadata": {
265 | "collapsed": true
266 | },
267 | "outputs": [],
268 | "source": [
269 | "#Solution\n"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "## Part 5: Optional"
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {},
282 | "source": [
283 | "Compose some interesting queries of the database to extract patterns or features of the data."
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": null,
289 | "metadata": {
290 | "collapsed": true
291 | },
292 | "outputs": [],
293 | "source": [
294 | "#Solution\n"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {},
300 | "source": [
301 | "## Part 6: Optional"
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "If you have access configured, try the exercise using a general purpose RDBMS, such as MySQL, PostgreSQL, SQL Server, etc."
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "Related to the normalization, we might notice that our Pandas `DataFrame` itself is inefficient for the same reasons that normalization is desirable. A large number of copies of the same strings are stored within the same column `Series` objects. Moreover, in many cases what is stored are strings which need to be stored as Python objects, and processed much more slowly and indirectly than with basic numeric types that leverage their underlying `numpy` arrays. We can improve this quite a bit."
316 | ]
317 | }
318 | ],
319 | "metadata": {
320 | "anaconda-cloud": {},
321 | "continuum": {
322 | "depends": [
323 | "pd_intro",
324 | "pd_data_io"
325 | ],
326 | "requires": [
327 | "data/nyc_harbor_wq_2006-2014.xlsx"
328 | ],
329 | "tag": "pd_ex_excel"
330 | },
331 | "kernelspec": {
332 | "display_name": "Python [conda env:python3]",
333 | "language": "python",
334 | "name": "conda-env-python3-py"
335 | },
336 | "language_info": {
337 | "codemirror_mode": {
338 | "name": "ipython",
339 | "version": 3
340 | },
341 | "file_extension": ".py",
342 | "mimetype": "text/x-python",
343 | "name": "python",
344 | "nbconvert_exporter": "python",
345 | "pygments_lexer": "ipython3",
346 | "version": "3.5.2"
347 | }
348 | },
349 | "nbformat": 4,
350 | "nbformat_minor": 0
351 | }
352 |
--------------------------------------------------------------------------------
/3-Analytics/pandas/EX02_weather.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Timeseries with weather data](#Timeseries-with-weather-data)\n",
9 | "\t* [EX1](#EX1)\n",
10 | "\t* [EX2](#EX2)\n",
11 | "\t* [EX3](#EX3)\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Timeseries with weather data"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "Weather data has been downloaded from [Wunderground](http://www.wunderground.com/history) and stored in `data/pittsburgh2013.csv`."
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": [
36 | "import pandas as pd\n",
37 | "import numpy as np\n",
38 | "%matplotlib inline"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {
45 | "collapsed": false
46 | },
47 | "outputs": [],
48 | "source": [
49 | "df = pd.read_csv('data/pittsburgh2013.csv', parse_dates=['Date'], index_col='Date')\n",
50 | "df.head()"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "## EX1"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "Compute the average precipitation for each month."
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "# your solution here"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "## EX2"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "Which month got the most precipitation?"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {
96 | "collapsed": false
97 | },
98 | "outputs": [],
99 | "source": [
100 | "# your solution here"
101 | ]
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "## EX3"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "Use the *Events* column to determine how many days a **Snow** event occurred in each month.\n",
115 | " * Notice that events may be hyphenated, e.g. **Snow-Rain**."
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {
122 | "collapsed": false
123 | },
124 | "outputs": [],
125 | "source": [
126 | "# your solution here"
127 | ]
128 | }
129 | ],
130 | "metadata": {
131 | "anaconda-cloud": {},
132 | "continuum": {
133 | "depends": [
134 | "pd_intro",
135 | "pd_series"
136 | ],
137 | "requires": [
138 | "data/pittsburgh2013.csv"
139 | ],
140 | "tag": "pd_ex_weather"
141 | },
142 | "kernelspec": {
143 | "display_name": "Python [conda env:python3]",
144 | "language": "python",
145 | "name": "conda-env-python3-py"
146 | },
147 | "language_info": {
148 | "codemirror_mode": {
149 | "name": "ipython",
150 | "version": 3
151 | },
152 | "file_extension": ".py",
153 | "mimetype": "text/x-python",
154 | "name": "python",
155 | "nbconvert_exporter": "python",
156 | "pygments_lexer": "ipython3",
157 | "version": "3.5.2"
158 | }
159 | },
160 | "nbformat": 4,
161 | "nbformat_minor": 0
162 | }
163 |
--------------------------------------------------------------------------------
/3-Analytics/pandas/data/201509-citibike-tripdata.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/data/201509-citibike-tripdata.csv.gz
--------------------------------------------------------------------------------
/3-Analytics/pandas/data/beer2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/data/beer2.csv.gz
--------------------------------------------------------------------------------
/3-Analytics/pandas/data/nyc_harbor_wq_2006-2014.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/data/nyc_harbor_wq_2006-2014.xlsx
--------------------------------------------------------------------------------
/3-Analytics/pandas/img/pydata_stack_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/3-Analytics/pandas/img/pydata_stack_model.png
--------------------------------------------------------------------------------
/3-Analytics/sklearn/1_Intro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "* [Machine Learning with Scikit Learn](#Machine-Learning-with-Scikit-Learn)\n",
10 | "\t* [API and Terminology](#API-and-Terminology)\n",
11 | "\t\t* [Scikit Learn modules](#Scikit-Learn-modules)\n",
12 | "\t* [Chosing an Estimator](#Chosing-an-Estimator)\n"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "# Learning Objectives:"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "After completion of this module, learners should be able to:\n",
27 | "\n",
28 | "* Understand and explain estimators, models and scoring metrics\n",
29 | "* Import scikit-learn modules"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "# Machine Learning with Scikit Learn"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "`scikit-learn` is an open source Machine Learning Toolkit built on Numpy and Scipy. Methods available in `scikit-learn` can be used for supervised and unsupervised learning. Among the many features of `scikit-learn` are\n",
44 | "\n",
45 | "* classification\n",
46 | "* regression\n",
47 | "* clustering\n",
48 | "* support vector machines\n",
49 | "* random forests\n",
50 | "* gradient boosting\n",
51 | "* k-means\n",
52 | "* DBSCAN\n",
53 | "\n",
54 | "The [User Guide](http://scikit-learn.org/stable/user_guide.html) and [Documentation](http://scikit-learn.org/stable/documentation.html) are the best place to learn how to use the methods available in `scikit-learn` and there are several [tutorals avilable online](http://scikit-learn.org/stable/tutorial/index.html)"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "This course will provide an introduction to `sklearn` with a focus on highlighting how the methods work together to understand the performance of a given model."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "## API and Terminology"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "While the following definitions may be the most widely accepted in the fields of Machine Learning and Statistics, they are useful to help understand the `sklearn` modules and API\n",
76 | "\n",
77 | "* **estimator**: A method used to make a prediction for supervised and unsupervised learning\n",
78 | " * **classifier**: An estimator with a discrete response to input data. *Assign a label to each data point.* Classifiers implement a `fit` member function.\n",
79 | " * **regressor**: An estimator with a continuous response to input data. *Predict output value of each data point.*\n",
80 | " * **cluster**: Performs clustering of input data. *Discover grouping within the data set.*\n",
81 | " * **transformer**: Transforms input data according to a set of requirements. *Preprocess data to have zero mean and unit variance*\n",
82 | "* **model**: Nearly synonymous with **estimator**. A **model** may be a more concrete instance of an **estimator**.\n",
83 | "* **metric**: A set of scores given to a **model** or **estimator** to indicate its accuracy. *Estimators for supervised learning implement a `score` member function.*"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "### Scikit Learn modules"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "Each of the following modules must be individually imported. The modules listed here include **estimators** and higher-level methods to perform operations such as cross validation, grid search and pipelining."
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "collapsed": false
105 | },
106 | "outputs": [],
107 | "source": [
108 | "import sklearn\n",
109 | "sklearn.__all__"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {
116 | "collapsed": false,
117 | "scrolled": true
118 | },
119 | "outputs": [],
120 | "source": [
121 | "import sklearn.cluster\n",
122 | "help(sklearn.cluster)"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "## Chosing an Estimator"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "See the [Scikit Learn Flowchart](http://scikit-learn.org/stable/tutorial/machine_learning_map/)"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {
142 | "collapsed": true
143 | },
144 | "source": [
145 | "As shown in the flowchart, the algorithms in scikit-learn mainly fall into:\n",
146 | " \n",
147 | "* Classification - Predicting the label or class membership of observation\n",
148 | "* Dimensionality reduction (Principle component analysis, independent component analysis)\n",
149 | "* Regression - Predicting a continuous response variable rather than class membership\n",
150 | "* Clustering - Unsupervised algorithms grouping similar observations"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "In the scikit-learn notebooks we work algorithms from each of these groups."
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "metadata": {
164 | "collapsed": true
165 | },
166 | "outputs": [],
167 | "source": []
168 | }
169 | ],
170 | "metadata": {
171 | "anaconda-cloud": {},
172 | "continuum": {
173 | "depends": [],
174 | "tag": "ml_intro"
175 | },
176 | "kernelspec": {
177 | "display_name": "Python [conda env:python3]",
178 | "language": "python",
179 | "name": "conda-env-python3-py"
180 | },
181 | "language_info": {
182 | "codemirror_mode": {
183 | "name": "ipython",
184 | "version": 3
185 | },
186 | "file_extension": ".py",
187 | "mimetype": "text/x-python",
188 | "name": "python",
189 | "nbconvert_exporter": "python",
190 | "pygments_lexer": "ipython3",
191 | "version": "3.5.2"
192 | }
193 | },
194 | "nbformat": 4,
195 | "nbformat_minor": 0
196 | }
197 |
--------------------------------------------------------------------------------
/3-Analytics/sklearn/2_KNN_and_Validation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives](#Learning-Objectives)\n",
9 | "* [Importing our libraries](#Importing-our-libraries)\n",
10 | "\t* [Some Simple Data](#Some-Simple-Data)\n",
11 | "\t* [A Simple kNN Classifier](#A-Simple-kNN-Classifier)\n",
12 | "\t* [Simple Evaluation](#Simple-Evaluation)\n",
13 | "\t* [Visualization using two features](#Visualization-using-two-features)\n",
14 | "\t* [Exercise (exploring grid_step and number of neighbors)](#Exercise-%28exploring-grid_step-and-number-of-neighbors%29)\n",
15 | "* [Simple Comparison](#Simple-Comparison)\n",
16 | "* [Synthetic Datasets](#Synthetic-Datasets)\n",
17 | "\t* [make_blobs](#make_blobs)\n",
18 | "\t* [make_classification](#make_classification)\n",
19 | "* [Downloading Common Datasets](#Downloading-Common-Datasets)\n",
20 | "\t* [Exercise](#Exercise)\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Learning Objectives:"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "After completion of this module, learners should be able to:\n",
35 | "\n",
36 | "* Explain what KNN classification and logistic regression are\n",
37 | "* Apply the KNN classifier\n",
38 | "* Develop training/testing sets and perform model validation.\n",
39 | "\n",
40 | "\n",
41 | "* Work with primary component analysis and support vector machines.\n",
42 | "* Compare optimization and curve fitting techniques.\n",
43 | "\n",
44 | "K-Nearest neighbor algorithms fall into regression and classification. In classification, a K-nearest neighbor method uses local vote counts for class membership based on K nearest neighbors considered. A K==1 model considers only the nearest neighbor.\n",
45 | "\n",
46 | "Logistic regression is fitting a logistic distribution to continuous data to model a binomial or multinomial response. An example, described [here](https://en.wikipedia.org/wiki/Logistic_regression), is a logistic regression that predicts probability of success/failure on an exam given observations of passing/failing and the hours studied in advance.\n",
47 | " "
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "# Importing our libraries"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {
61 | "collapsed": false
62 | },
63 | "outputs": [],
64 | "source": [
65 | "import numpy as np\n",
66 | "import pandas as pd\n",
67 | "import matplotlib.pyplot as plt\n",
68 | "from sklearn import (cross_validation, datasets,\n",
69 | " decomposition,\n",
70 | " grid_search, linear_model, \n",
71 | " neighbors, metrics)\n",
72 | "%matplotlib inline"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "## Some Simple Data"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": null,
85 | "metadata": {
86 | "collapsed": false,
87 | "scrolled": true
88 | },
89 | "outputs": [],
90 | "source": [
91 | "iris = datasets.load_iris()\n",
92 | "examples = iris.data\n",
93 | "classes = iris.target\n",
94 | "print(iris.DESCR)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {
101 | "collapsed": false,
102 | "scrolled": true
103 | },
104 | "outputs": [],
105 | "source": [
106 | "# Let's take a look at the \"shape\" of the data\n",
107 | "df_iris = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
108 | "df_iris['species'] = iris.target\n",
109 | "df_iris['species_name'] = [iris.target_names[i] for i in iris.target]\n",
110 | "df_iris"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {
117 | "collapsed": false
118 | },
119 | "outputs": [],
120 | "source": [
121 | "# Create a training and a testing set from this data by choosing indices\n",
122 | "# (wait a few cells for a better API)\n",
123 | "\n",
124 | "# Random order of indices\n",
125 | "n_examples = len(examples)\n",
126 | "shuffled_indices = np.random.permutation(n_examples)\n",
127 | "\n",
128 | "# Pick a trainig/testing split\n",
129 | "train_pct = 0.8\n",
130 | "train_ct = int(n_examples * train_pct)\n",
131 | "\n",
132 | "# Select indices for training and testing\n",
133 | "train_idx, test_idx = shuffled_indices[:train_ct], shuffled_indices[train_ct:]\n",
134 | "train_idx, test_idx"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "## A Simple kNN Classifier"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [],
151 | "source": [
152 | "knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {},
158 | "source": [
159 | "## Simple Evaluation"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {
166 | "collapsed": false
167 | },
168 | "outputs": [],
169 | "source": [
170 | "knn5.fit(examples[train_idx], classes[train_idx])\n",
171 | "predictions = knn5.predict(examples[test_idx])\n",
172 | "print(metrics.accuracy_score(predictions, classes[test_idx]))"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "## Visualization using two features"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "collapsed": true
187 | },
188 | "outputs": [],
189 | "source": [
190 | "datasets.make_classification?"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "metadata": {
197 | "collapsed": false
198 | },
199 | "outputs": [],
200 | "source": [
201 | "# the punch line is to predict for a large grid of data points\n",
202 | "# http://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html\n",
203 | "def KNN_2D_map(twodim):\n",
204 | " grid_step = 0.1\n",
205 | " knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)\n",
206 | " knn5.fit(twodim, classes)\n",
207 | "\n",
208 | " # create testing data points on the standard \n",
209 | " # Cartesian grid (over our data range)\n",
210 | " # to color the background\n",
211 | " maxes = np.max(twodim, axis=0) + 2*grid_step\n",
212 | " mins = np.min(twodim, axis=0) - grid_step\n",
213 | "\n",
214 | " xs,ys = np.mgrid[mins[0]:maxes[0]:grid_step, \n",
215 | " mins[1]:maxes[1]:grid_step]\n",
216 | " grid_points = np.c_[xs.ravel(), ys.ravel()]\n",
217 | "\n",
218 | " p = knn5.predict(grid_points)\n",
219 | "\n",
220 | " # plot the predictions at the grid points\n",
221 | " fig = plt.figure(figsize=(10,5))\n",
222 | " ax = fig.gca()\n",
223 | " ax.pcolormesh(xs,ys,p.reshape(xs.shape))\n",
224 | "\n",
225 | " ax.set_xlim(mins[0], maxes[0]-grid_step)\n",
226 | " ax.set_ylim(mins[1], maxes[1]-grid_step)\n",
227 | " \n",
228 | "twodim = examples[:,:2] # select first two features\n",
229 | "KNN_2D_map(twodim)"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {
236 | "collapsed": false
237 | },
238 | "outputs": [],
239 | "source": [
240 | "twodim2 = examples[:,2:] # choose different features\n",
241 | "KNN_2D_map(twodim2)"
242 | ]
243 | },
244 | {
245 | "cell_type": "markdown",
246 | "metadata": {},
247 | "source": [
248 | "## Exercise (exploring grid_step and number of neighbors)"
249 | ]
250 | },
251 | {
252 | "cell_type": "markdown",
253 | "metadata": {},
254 | "source": [
255 | "Quick question: why did we add an extra `grid_step` value to the maxes, above?\n",
256 | "\n",
257 | "Investigate what happens to the decision boundary as we raise or lower the number of neighbors? You could start answering this trying a range of neighbor values: $k=3,5,10,15$. Could the `grid_step` parameter mislead us, if we aren't paying close attention?"
258 | ]
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "metadata": {},
263 | "source": [
264 | "# Simple Comparison"
265 | ]
266 | },
267 | {
268 | "cell_type": "markdown",
269 | "metadata": {},
270 | "source": [
271 | "We'll learn about a more efficient comparison method in the next section"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {
278 | "collapsed": false
279 | },
280 | "outputs": [],
281 | "source": [
282 | "knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)\n",
283 | "logreg = linear_model.LogisticRegression()"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": null,
289 | "metadata": {
290 | "collapsed": false
291 | },
292 | "outputs": [],
293 | "source": [
294 | "knn5.fit(examples[train_idx], classes[train_idx])\n",
295 | "logreg.fit(examples[train_idx], classes[train_idx])\n",
296 | "\n",
297 | "lr_preds = logreg.predict(examples[test_idx])\n",
298 | "knn5_preds = knn5.predict(examples[test_idx])\n",
299 | "\n",
300 | "for preds in [lr_preds, knn5_preds]:\n",
301 | " print(metrics.accuracy_score(preds, classes[test_idx]))"
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "# Synthetic Datasets"
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "## make_blobs"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "`sklearn.datasets.make_blobs(n_samples=100, \n",
323 | " n_features=2,\n",
324 | " centers=3, # number of classes\n",
325 | " cluster_std=1.0) # shared -or- class-by-class`"
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": null,
331 | "metadata": {
332 | "collapsed": false
333 | },
334 | "outputs": [],
335 | "source": [
336 | "x, y = datasets.make_blobs(n_samples=50)\n",
337 | "plt.scatter(x[:,0], x[:,1], c=y, s=50)"
338 | ]
339 | },
340 | {
341 | "cell_type": "markdown",
342 | "metadata": {},
343 | "source": [
344 | "## make_classification"
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "metadata": {},
350 | "source": [
351 | "`sklearn.datasets.make_classification()`\n",
352 | "\n",
353 | "Many, many arguments. See: http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html\n",
354 | "\n",
355 | "For examples, see: http://scikit-learn.org/stable/auto_examples/datasets/plot_random_dataset.html"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": null,
361 | "metadata": {
362 | "collapsed": false
363 | },
364 | "outputs": [],
365 | "source": [
366 | "x,y = datasets.make_classification(n_features=2, n_redundant=0, n_informative=2,\n",
367 | " n_clusters_per_class=1, n_classes=3)\n",
368 | "plt.scatter(x[:, 0], x[:, 1], c=y, s=50);"
369 | ]
370 | },
371 | {
372 | "cell_type": "markdown",
373 | "metadata": {},
374 | "source": [
375 | "# Downloading Common Datasets"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "metadata": {
382 | "collapsed": false
383 | },
384 | "outputs": [],
385 | "source": [
386 | "iris_dwn_1 = datasets.fetch_mldata('iris', data_home=\"./data\")\n",
387 | "print(iris_dwn_1.data.shape)\n",
388 | "print(iris_dwn_1.target.shape)"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {
395 | "collapsed": false
396 | },
397 | "outputs": [],
398 | "source": [
399 | "iris_dwn_2 = datasets.fetch_mldata('datasets-UCI Iris',\n",
400 | " target_name='class', \n",
401 | " data_name='double0',\n",
402 | " data_home=\"./data\")\n",
403 | "print(iris_dwn_2.data.shape)\n",
404 | "print(iris_dwn_2.target.shape)"
405 | ]
406 | }
407 | ],
408 | "metadata": {
409 | "anaconda-cloud": {},
410 | "continuum": {
411 | "depends": [],
412 | "requires": [
413 | "data/wine.csv"
414 | ],
415 | "tag": "ml_knn"
416 | },
417 | "kernelspec": {
418 | "display_name": "Python [conda env:python3]",
419 | "language": "python",
420 | "name": "conda-env-python3-py"
421 | },
422 | "language_info": {
423 | "codemirror_mode": {
424 | "name": "ipython",
425 | "version": 3
426 | },
427 | "file_extension": ".py",
428 | "mimetype": "text/x-python",
429 | "name": "python",
430 | "nbconvert_exporter": "python",
431 | "pygments_lexer": "ipython3",
432 | "version": "3.5.2"
433 | }
434 | },
435 | "nbformat": 4,
436 | "nbformat_minor": 0
437 | }
438 |
--------------------------------------------------------------------------------
/3-Analytics/sklearn/3_ModelComparison.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "* [Model Comparison](#Model-Comparison)\n",
10 | "\t* [Exercise](#Exercise)\n"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "# Learning Objectives:"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "After completion of this module, learners should be able to:\n",
25 | "\n",
26 | "* Perform model comparisons using CV scores\n",
27 | "\n",
28 | "We compare K-nearest neighbors classifiers, logistic regression, and a decision tree classifier. Background material on these classifiers can be found at:\n",
29 | "* [nearest neighbors](http://scikit-learn.org/stable/modules/neighbors.html)\n",
30 | "* [logistic regression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n",
31 | "* [decision trees](http://scikit-learn.org/stable/modules/tree.html)"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "# Model Comparison"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {
45 | "collapsed": true
46 | },
47 | "outputs": [],
48 | "source": [
49 | "# We saw some model comparison earlier. Let's add a few more models\n",
50 | "# (http://scikit-learn.org/stable/auto_examples/plot_classifier_comparison.html)\n",
51 | "%matplotlib inline\n",
52 | "from sklearn.tree import DecisionTreeClassifier\n",
53 | "from sklearn import neighbors, linear_model,cross_validation, datasets\n",
54 | "import matplotlib.pyplot as plt"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {
61 | "collapsed": true
62 | },
63 | "outputs": [],
64 | "source": [
65 | "iris = datasets.load_iris()\n",
66 | "examples = iris.data\n",
67 | "classes = iris.target\n",
68 | "n_examples = len(examples)"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {
75 | "collapsed": false
76 | },
77 | "outputs": [],
78 | "source": [
79 | "knn20 = neighbors.KNeighborsClassifier(n_neighbors=20)\n",
80 | "knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)\n",
81 | "logreg = linear_model.LogisticRegression()\n",
82 | "dectree = DecisionTreeClassifier()\n",
83 | "\n",
84 | "modelsAndMarker = [(knn20, 'x', 'knn20'), (knn5, 'o', 'knn5'), \n",
85 | " (logreg, '^', 'logreg'), (dectree, '.', 'dectree')]\n",
86 | "\n",
87 | "for mod, marker, label in modelsAndMarker:\n",
88 | " k_fold = cross_validation.KFold(n_examples, n_folds=10)\n",
89 | " cv_scores = cross_validation.cross_val_score(mod, examples, classes, \n",
90 | " cv=k_fold, \n",
91 | " scoring='accuracy', \n",
92 | " n_jobs=-1) # all CPUs \n",
93 | " plt.plot(cv_scores, marker=marker, label=label)\n",
94 | " \n",
95 | "plt.ylim(0.5, 1.05)\n",
96 | "plt.legend(bbox_to_anchor=(1.05, 1), loc=2)"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "## Exercise"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "In the \"Model Comparison\" notebook, the k-folds are created inside the for-loop (see line 7). What are the effects of moving that line outside (above) the loop? Is this strictly a benefit or are there any drawbacks? What factors might influence your answer?"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {
117 | "collapsed": true
118 | },
119 | "outputs": [],
120 | "source": []
121 | }
122 | ],
123 | "metadata": {
124 | "anaconda-cloud": {},
125 | "continuum": {
126 | "depends": [
127 | "ml_kfold"
128 | ],
129 | "requires": [],
130 | "tag": "ml_compare"
131 | },
132 | "kernelspec": {
133 | "display_name": "Python [conda env:python3]",
134 | "language": "python",
135 | "name": "conda-env-python3-py"
136 | },
137 | "language_info": {
138 | "codemirror_mode": {
139 | "name": "ipython",
140 | "version": 3
141 | },
142 | "file_extension": ".py",
143 | "mimetype": "text/x-python",
144 | "name": "python",
145 | "nbconvert_exporter": "python",
146 | "pygments_lexer": "ipython3",
147 | "version": "3.5.2"
148 | }
149 | },
150 | "nbformat": 4,
151 | "nbformat_minor": 0
152 | }
153 |
--------------------------------------------------------------------------------
/3-Analytics/sklearn/4_RegressionModels.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Learning Objectives:](#Learning-Objectives:)\n",
9 | "* [Regression Models](#Regression-Models)\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# Learning Objectives:"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "After completion of this module, learners should be able to:\n",
24 | "\n",
25 | "* Apply the Linear Regression Model\n",
26 | "* Cross validate the model"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "# Regression Models"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": false
41 | },
42 | "outputs": [],
43 | "source": [
44 | "import numpy as np\n",
45 | "from sklearn import datasets, linear_model, cross_validation, metrics\n",
46 | "diabetes_dataset = datasets.load_diabetes()\n",
47 | "dd_examples, dd_targets = diabetes_dataset.data, diabetes_dataset.target\n",
48 | "\n",
49 | "linreg = linear_model.LinearRegression()\n",
50 | "kfold = cross_validation.KFold(len(dd_examples), n_folds=3, shuffle=True)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "collapsed": false
58 | },
59 | "outputs": [],
60 | "source": [
61 | "# manually extract a k-fold train/test split\n",
62 | "train, test = next(iter(kfold))\n",
63 | "linreg.fit(dd_examples[train], dd_targets[train])\n",
64 | "\n",
65 | "preds = linreg.predict(dd_examples[test])\n",
66 | "errors = preds - dd_targets[test]\n",
67 | "\n",
68 | "print(np.mean(errors**2))"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {
75 | "collapsed": false
76 | },
77 | "outputs": [],
78 | "source": [
79 | "for train, test in kfold:\n",
80 | " preds = linreg.fit(dd_examples[train], dd_targets[train]).predict(dd_examples[test])\n",
81 | " print(metrics.mean_squared_error(preds, dd_targets[test]))"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "collapsed": false
89 | },
90 | "outputs": [],
91 | "source": [
92 | "cv_scores = cross_validation.cross_val_score(linreg, dd_examples, dd_targets, \n",
93 | " cv=kfold, \n",
94 | " scoring='mean_squared_error', \n",
95 | " n_jobs=-1) # all CPUs\n",
96 | "print(cv_scores)"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "Additional regression metrics are described here:\n",
104 | "\n",
105 | "http://scikit-learn.org/stable/modules/model_evaluation.html#regression-metrics\n",
106 | "\n",
107 | "Their names are all importable from `sklearn.metrics`:\n",
108 | "\n",
109 | "`mean_absolute_error \n",
110 | "mean_squared_error \n",
111 | "median_absolute_error \n",
112 | "r2` \n"
113 | ]
114 | }
115 | ],
116 | "metadata": {
117 | "continuum": {
118 | "depends": [
119 | "ml_eval"
120 | ],
121 | "tag": "ml_regression"
122 | },
123 | "kernelspec": {
124 | "display_name": "Python [conda env:python3]",
125 | "language": "python",
126 | "name": "conda-env-python3-py"
127 | },
128 | "language_info": {
129 | "codemirror_mode": {
130 | "name": "ipython",
131 | "version": 3
132 | },
133 | "file_extension": ".py",
134 | "mimetype": "text/x-python",
135 | "name": "python",
136 | "nbconvert_exporter": "python",
137 | "pygments_lexer": "ipython3",
138 | "version": "3.5.2"
139 | }
140 | },
141 | "nbformat": 4,
142 | "nbformat_minor": 0
143 | }
144 |
--------------------------------------------------------------------------------
/3-Analytics/sklearn/EX01_CrossValidation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Table of Contents\n",
8 | "* [Classification Exercise](#Classification-Exercise)\n"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "After completion of this module, learners should be able to:\n",
16 | "\n",
17 | "* Be aware of the range of facilities in scikit-learn.\n",
18 | "* Apply classifiers, such as K-nearest neighbor, logistic regression, decision tree, and linear discriminant analysis."
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "# Classification Exercise"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "1. Experiment with `make_gaussian_quantiles` to generate some data:\n",
33 | "\n",
34 | "http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html\n",
35 | "\n",
36 | " 2. It turns out that the wine dataset at mldata is broken. Here's is how you can grab it directly from UCI (it will be downloaded to wine.csv in your local directory):\n",
37 | "\n",
38 | "```python\n",
39 | "import urllib\n",
40 | "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'\n",
41 | "urllib.request.urlretrieve(url, 'data/wine.csv')\n",
42 | "tbl = np.genfromtxt('data/wine.csv', delimiter=\",\")\n",
43 | "classes = tbl[:,0]\n",
44 | "examples = tbl[:,1:]\n",
45 | "```"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "Pick two classifiers from `sklearn` (a partial list is given below) and compare their performance on your random dataset and the wine dataset. Use manual train/test splitting and KFold cross validation methods.\n",
53 | "\n",
54 | "\n",
55 | "```python\n",
56 | "from sklearn.neighbors import KNeighborsClassifier\n",
57 | "from sklearn.svm import SVC\n",
58 | "from sklearn.tree import DecisionTreeClassifier\n",
59 | "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n",
60 | "from sklearn.naive_bayes import GaussianNB\n",
61 | "from sklearn.qda import QDA\n",
62 | "from sklearn.lda import LDA\n",
63 | "classifiers = [\n",
64 | " KNeighborsClassifier(3),\n",
65 | " SVC(kernel=\"linear\", C=0.025),\n",
66 | " SVC(gamma=2, C=1),\n",
67 | " DecisionTreeClassifier(max_depth=5),\n",
68 | " RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), # ftrs @ split\n",
69 | " AdaBoostClassifier(),\n",
70 | " GaussianNB(),\n",
71 | " LDA(),\n",
72 | " QDA()]\n",
73 | "```"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {
80 | "collapsed": true
81 | },
82 | "outputs": [],
83 | "source": [
84 | "import numpy as np\n",
85 | "import pandas as pd\n",
86 | "import matplotlib.pyplot as plt\n",
87 | "from sklearn import (cross_validation, datasets,\n",
88 | " decomposition,\n",
89 | " grid_search, linear_model, \n",
90 | " neighbors, metrics)\n",
91 | "%matplotlib inline"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {
98 | "collapsed": true
99 | },
100 | "outputs": [],
101 | "source": []
102 | }
103 | ],
104 | "metadata": {
105 | "anaconda-cloud": {},
106 | "continuum": {
107 | "depends": [
108 | "ml_kfold",
109 | "ml_knn"
110 | ],
111 | "requires": [
112 | "data/wine.csv"
113 | ],
114 | "tag": "ml_ex_cv"
115 | },
116 | "kernelspec": {
117 | "display_name": "Python [conda env:python3]",
118 | "language": "python",
119 | "name": "conda-env-python3-py"
120 | },
121 | "language_info": {
122 | "codemirror_mode": {
123 | "name": "ipython",
124 | "version": 3
125 | },
126 | "file_extension": ".py",
127 | "mimetype": "text/x-python",
128 | "name": "python",
129 | "nbconvert_exporter": "python",
130 | "pygments_lexer": "ipython3",
131 | "version": "3.5.2"
132 | }
133 | },
134 | "nbformat": 4,
135 | "nbformat_minor": 0
136 | }
137 |
--------------------------------------------------------------------------------
/4-AcceleratedPython/Accel Python Offloading to Intel Xeon Phi (Co)processors.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/4-AcceleratedPython/Accel Python Offloading to Intel Xeon Phi (Co)processors.pdf
--------------------------------------------------------------------------------
/4-AcceleratedPython/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/4-AcceleratedPython/README.md
--------------------------------------------------------------------------------
/4-AcceleratedPython/numba/1_Numba_Basics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Using `jit`\n",
8 | "\n",
9 | "We'll start with a trivial example but get to some more realistic applications shortly."
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### Array sum\n",
17 | "\n",
18 | "The function below is a naive `sum` function that sums all the elements of a given array."
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 1,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "def sum_array(inp):\n",
30 | " J, I = inp.shape\n",
31 | " \n",
32 | " #this is a bad idea\n",
33 | " mysum = 0\n",
34 | " for j in range(J):\n",
35 | " for i in range(I):\n",
36 | " mysum += inp[j, i]\n",
37 | " \n",
38 | " return mysum"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 2,
44 | "metadata": {
45 | "collapsed": true
46 | },
47 | "outputs": [],
48 | "source": [
49 | "import numpy"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {
56 | "collapsed": false
57 | },
58 | "outputs": [],
59 | "source": [
60 | "arr = numpy.random.random((300, 300))"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 4,
66 | "metadata": {
67 | "collapsed": false
68 | },
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/plain": [
73 | "45041.071854295071"
74 | ]
75 | },
76 | "execution_count": 4,
77 | "metadata": {},
78 | "output_type": "execute_result"
79 | }
80 | ],
81 | "source": [
82 | "sum_array(arr)"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 5,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "10 loops, best of 3: 20.5 ms per loop\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "plain = %timeit -o sum_array(arr)"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "# Let's get started"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 6,
114 | "metadata": {
115 | "collapsed": true
116 | },
117 | "outputs": [],
118 | "source": [
119 | "from numba import jit"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "## As a function call"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 7,
132 | "metadata": {
133 | "collapsed": true
134 | },
135 | "outputs": [],
136 | "source": [
137 | "sum_array_numba = jit()(sum_array)"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "What's up with the weird double `()`s? We'll cover that in a little bit."
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 8,
150 | "metadata": {
151 | "collapsed": false
152 | },
153 | "outputs": [
154 | {
155 | "data": {
156 | "text/plain": [
157 | "45041.07185429507"
158 | ]
159 | },
160 | "execution_count": 8,
161 | "metadata": {},
162 | "output_type": "execute_result"
163 | }
164 | ],
165 | "source": [
166 | "sum_array_numba(arr)"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 9,
172 | "metadata": {
173 | "collapsed": false
174 | },
175 | "outputs": [
176 | {
177 | "name": "stdout",
178 | "output_type": "stream",
179 | "text": [
180 | "10000 loops, best of 3: 86.2 µs per loop\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "jitted = %timeit -o sum_array_numba(arr)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 10,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [
195 | {
196 | "data": {
197 | "text/plain": [
198 | "238.1571011913437"
199 | ]
200 | },
201 | "execution_count": 10,
202 | "metadata": {},
203 | "output_type": "execute_result"
204 | }
205 | ],
206 | "source": [
207 | "plain.best / jitted.best"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {},
213 | "source": [
214 | "## (more commonly) As a decorator"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 11,
220 | "metadata": {
221 | "collapsed": true
222 | },
223 | "outputs": [],
224 | "source": [
225 | "@jit\n",
226 | "def sum_array(inp):\n",
227 | " I, J = inp.shape\n",
228 | " \n",
229 | " mysum = 0\n",
230 | " for i in range(I):\n",
231 | " for j in range(J):\n",
232 | " mysum += inp[i, j]\n",
233 | " \n",
234 | " return mysum"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 12,
240 | "metadata": {
241 | "collapsed": false
242 | },
243 | "outputs": [
244 | {
245 | "data": {
246 | "text/plain": [
247 | "45041.07185429507"
248 | ]
249 | },
250 | "execution_count": 12,
251 | "metadata": {},
252 | "output_type": "execute_result"
253 | }
254 | ],
255 | "source": [
256 | "sum_array(arr)"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 13,
262 | "metadata": {
263 | "collapsed": false
264 | },
265 | "outputs": [
266 | {
267 | "name": "stdout",
268 | "output_type": "stream",
269 | "text": [
270 | "10000 loops, best of 3: 89.1 µs per loop\n"
271 | ]
272 | }
273 | ],
274 | "source": [
275 | "%timeit sum_array(arr)"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "metadata": {},
281 | "source": [
282 | "## How does this compare to NumPy?"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 14,
288 | "metadata": {
289 | "collapsed": false
290 | },
291 | "outputs": [
292 | {
293 | "name": "stdout",
294 | "output_type": "stream",
295 | "text": [
296 | "The slowest run took 5.33 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
297 | "10000 loops, best of 3: 40.7 µs per loop\n"
298 | ]
299 | }
300 | ],
301 | "source": [
302 | "%timeit arr.sum()"
303 | ]
304 | },
305 | {
306 | "cell_type": "markdown",
307 | "metadata": {},
308 | "source": [
309 | "## When is Numba faster than NumPy?\n",
310 | "\n",
311 | "When doing more complex things, or when using less common integer types, like int16:"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 15,
317 | "metadata": {
318 | "collapsed": false
319 | },
320 | "outputs": [],
321 | "source": [
322 | "arr_int16 = (arr * 4096).astype(numpy.int16)"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 17,
328 | "metadata": {
329 | "collapsed": false
330 | },
331 | "outputs": [
332 | {
333 | "name": "stdout",
334 | "output_type": "stream",
335 | "text": [
336 | "10000 loops, best of 3: 20 µs per loop\n"
337 | ]
338 | }
339 | ],
340 | "source": [
341 | "jitted_int16 = %timeit -o sum_array_numba(arr_int16)"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 18,
347 | "metadata": {
348 | "collapsed": false
349 | },
350 | "outputs": [
351 | {
352 | "name": "stdout",
353 | "output_type": "stream",
354 | "text": [
355 | "The slowest run took 7.39 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
356 | "10000 loops, best of 3: 108 µs per loop\n"
357 | ]
358 | }
359 | ],
360 | "source": [
361 | "numpy_int16 = %timeit -o arr_int16.sum()"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 19,
367 | "metadata": {
368 | "collapsed": false
369 | },
370 | "outputs": [
371 | {
372 | "data": {
373 | "text/plain": [
374 | "5.420978311244756"
375 | ]
376 | },
377 | "execution_count": 19,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "numpy_int16.best / jitted_int16.best"
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {},
389 | "source": [
390 | "NumPy doesn't have a specialized version of `sum()` for 16-bit integers, but Numba just generated one that was many times faster! Numba can take advantage of things like AVX support for packed integers while NumPy has to cast to a larger datatype to use one of the precompiled implementations."
391 | ]
392 | },
393 | {
394 | "cell_type": "markdown",
395 | "metadata": {},
396 | "source": [
397 | "## When does `numba` compile things?"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "The first time you call the function. "
405 | ]
406 | }
407 | ],
408 | "metadata": {
409 | "anaconda-cloud": {},
410 | "kernelspec": {
411 | "display_name": "Python [conda env:python3]",
412 | "language": "python",
413 | "name": "conda-env-python3-py"
414 | },
415 | "language_info": {
416 | "codemirror_mode": {
417 | "name": "ipython",
418 | "version": 3
419 | },
420 | "file_extension": ".py",
421 | "mimetype": "text/x-python",
422 | "name": "python",
423 | "nbconvert_exporter": "python",
424 | "pygments_lexer": "ipython3",
425 | "version": "3.5.2"
426 | }
427 | },
428 | "nbformat": 4,
429 | "nbformat_minor": 0
430 | }
431 |
--------------------------------------------------------------------------------
/4-AcceleratedPython/numba/EX02_Direct_Summation-Solution.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy\n",
12 | "from numba import njit"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {
19 | "collapsed": true
20 | },
21 | "outputs": [],
22 | "source": [
23 | "particle_dtype = numpy.dtype({'names':['x','y','z','m','phi'], \n",
24 | " 'formats':[numpy.double, \n",
25 | " numpy.double, \n",
26 | " numpy.double, \n",
27 | " numpy.double, \n",
28 | " numpy.double]})"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "# Exercise 1\n",
36 | "\n",
37 | "Write a function `create_n_random_particles` that takes the arguments `n` (number of particles), `m` (mass of every particle) and a domain within to generate a random number (as in the class above).\n",
38 | "It should create an array with `n` elements and `dtype=particle_dtype` and then return that array.\n",
39 | "\n",
40 | "For each particle, the mass should be initialized to the value of `m` and the potential `phi` initialized to zero.\n",
41 | "\n",
42 | "For the `x` component of a given particle `p`, you might do something like\n",
43 | "\n",
44 | "```python\n",
45 | "p['x'] = domain * numpy.random.random()\n",
46 | "```"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 8,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "def create_n_random_particles(n, m, domain=1):\n",
58 | " '''\n",
59 | " Creates `n` particles with mass `m` with random coordinates\n",
60 | " between 0 and `domain`\n",
61 | " '''\n",
62 | " parts = numpy.zeros((n), dtype=particle_dtype)\n",
63 | " \n",
64 | " parts['x'] = numpy.random.random(size=n) * domain\n",
65 | " parts['y'] = numpy.random.random(size=n) * domain\n",
66 | " parts['z'] = numpy.random.random(size=n) * domain\n",
67 | " parts['m'] = m\n",
68 | " parts['phi'] = 0.0\n",
69 | "\n",
70 | " return parts"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "Test it out!"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 10,
83 | "metadata": {
84 | "collapsed": false
85 | },
86 | "outputs": [
87 | {
88 | "data": {
89 | "text/plain": [
90 | "array([ (0.07865253058714916, 0.17845767290893022, 0.2782564508743751, 0.001, 0.0),\n",
91 | " (0.6098656647837719, 0.465900008549502, 0.7708386758735862, 0.001, 0.0),\n",
92 | " (0.5407396799472325, 0.43441139551555785, 0.5205542751741511, 0.001, 0.0),\n",
93 | " (0.6289394790346508, 0.5203392254721185, 0.510620859464995, 0.001, 0.0),\n",
94 | " (0.08541443823778716, 0.12960520559911615, 0.5964363323868767, 0.001, 0.0)], \n",
95 | " dtype=[('x', '= 4 and int(sys.argv[-1]) <= 64:
71 | thread_count=int(sys.argv[-1])
72 |
73 | start_time = time.time()
74 |
75 | pi = calcpi_threads(samples, thread_count)
76 |
77 | end_time = time.time()
78 |
79 | util.output(samples, pi, start_time, end_time)
80 |
--------------------------------------------------------------------------------
/5-AdvancedScaling/pi/util.py:
--------------------------------------------------------------------------------
1 | # File: util.py
2 | # Author: William Scullin
3 | # Date: 2015-11-28
4 | #
5 | # Utility functions used by all demo programs
6 | #
7 |
8 | """This module contains utility functions used by pi calculating
9 | demo programs.
10 | """
11 |
12 |
13 | from sys import argv
14 | from math import pi as const_pi
15 | from decimal import Decimal, InvalidOperation
16 |
17 |
18 | def output(samples=0, pi=0, start_time=0, end_time=0):
19 | """Print the program output"""
20 | perr = (abs(const_pi-pi)/const_pi)*100
21 | print "Pi value is %f, with error %02f%%" % (pi, perr)
22 | print "Run time for %s samples was %s" % (samples, end_time-start_time)
23 |
24 |
25 | def get_sample_count(samples=1.2e7):
26 | """get input from argv or set default"""
27 | if len(argv) > 1:
28 | try:
29 | samples = int(Decimal(argv[1]))
30 | except (ValueError, InvalidOperation):
31 | return samples
32 | return samples
33 |
--------------------------------------------------------------------------------
/5-AdvancedScaling/pi/util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/5-AdvancedScaling/pi/util.pyc
--------------------------------------------------------------------------------
/5-AdvancedScaling/scaling_python_with_mpi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/5-AdvancedScaling/scaling_python_with_mpi.pdf
--------------------------------------------------------------------------------
/5-AdvancedScaling/scaling_python_with_mpi.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/5-AdvancedScaling/scaling_python_with_mpi.pptx
--------------------------------------------------------------------------------
/6-Profiling/Profiling.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Profiling Tricks in Jupyter"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "def filter_bad_values_try1(values, lower, upper):\n",
30 | " good = []\n",
31 | " for v in values:\n",
32 | " if lower < v < upper:\n",
33 | " good.append(v)\n",
34 | " return np.array(good)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 3,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "def filter_bad_values_try4(values, lower, upper):\n",
46 | " selector_lower = lower < values\n",
47 | " selector_upper = values < upper\n",
48 | " selector = selector_lower & selector_upper\n",
49 | " return values[selector]"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 4,
55 | "metadata": {
56 | "collapsed": false
57 | },
58 | "outputs": [
59 | {
60 | "name": "stdout",
61 | "output_type": "stream",
62 | "text": [
63 | "500065\n",
64 | "500065\n"
65 | ]
66 | }
67 | ],
68 | "source": [
69 | "a = np.random.uniform(-2000, 2000, 1000000)\n",
70 | "print(len(filter_bad_values_try1(a, -1000, 1000)))\n",
71 | "print(len(filter_bad_values_try4(a, -1000, 1000)))"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "## %time"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 5,
84 | "metadata": {
85 | "collapsed": false
86 | },
87 | "outputs": [
88 | {
89 | "name": "stdout",
90 | "output_type": "stream",
91 | "text": [
92 | "CPU times: user 620 ms, sys: 10.3 ms, total: 630 ms\n",
93 | "Wall time: 629 ms\n",
94 | "CPU times: user 8.75 ms, sys: 1.17 ms, total: 9.91 ms\n",
95 | "Wall time: 9.79 ms\n"
96 | ]
97 | },
98 | {
99 | "data": {
100 | "text/plain": [
101 | "array([ 450.2143654 , -948.85901315, -293.47172022, ..., -775.49455528,\n",
102 | " -986.98275299, 601.84069558])"
103 | ]
104 | },
105 | "execution_count": 5,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "%time filter_bad_values_try1(a, -1000, 1000)\n",
112 | "%time filter_bad_values_try4(a, -1000, 1000)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "## %timeit"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 6,
125 | "metadata": {
126 | "collapsed": false
127 | },
128 | "outputs": [
129 | {
130 | "name": "stdout",
131 | "output_type": "stream",
132 | "text": [
133 | "100 loops, best of 3: 8.82 ms per loop\n"
134 | ]
135 | }
136 | ],
137 | "source": [
138 | "%timeit filter_bad_values_try4(a, -1000, 1000)"
139 | ]
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {},
144 | "source": [
145 | "## %%prun - Profile a cell with cProfile"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": 7,
151 | "metadata": {
152 | "collapsed": false
153 | },
154 | "outputs": [
155 | {
156 | "name": "stdout",
157 | "output_type": "stream",
158 | "text": [
159 | " "
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "%%prun \n",
165 | "b = np.random.uniform(-2000, 2000, 1000000)\n",
166 | "filter_bad_values_try1(b, -1000, 1000)\n",
167 | "filter_bad_values_try4(b, -1000, 1000)"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "## %lprun - Profile line execution with line_profiler"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 8,
180 | "metadata": {
181 | "collapsed": false
182 | },
183 | "outputs": [
184 | {
185 | "name": "stdout",
186 | "output_type": "stream",
187 | "text": [
188 | "Requirement already up-to-date: line_profiler in /Users/stan/anaconda/envs/sc2016/lib/python3.5/site-packages\n",
189 | "Requirement already up-to-date: IPython>=0.13 in /Users/stan/anaconda/envs/sc2016/lib/python3.5/site-packages (from line_profiler)\n"
190 | ]
191 | }
192 | ],
193 | "source": [
194 | "!pip install --upgrade line_profiler"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 9,
200 | "metadata": {
201 | "collapsed": false
202 | },
203 | "outputs": [],
204 | "source": [
205 | "%load_ext line_profiler"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 10,
211 | "metadata": {
212 | "collapsed": false
213 | },
214 | "outputs": [],
215 | "source": [
216 | "%lprun -f filter_bad_values_try1 filter_bad_values_try1(a, -1000, 1000)"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {
223 | "collapsed": true
224 | },
225 | "outputs": [],
226 | "source": []
227 | }
228 | ],
229 | "metadata": {
230 | "anaconda-cloud": {},
231 | "kernelspec": {
232 | "display_name": "Python [conda env:sc2016]",
233 | "language": "python",
234 | "name": "conda-env-sc2016-py"
235 | },
236 | "language_info": {
237 | "codemirror_mode": {
238 | "name": "ipython",
239 | "version": 3
240 | },
241 | "file_extension": ".py",
242 | "mimetype": "text/x-python",
243 | "name": "python",
244 | "nbconvert_exporter": "python",
245 | "pygments_lexer": "ipython3",
246 | "version": "3.5.2"
247 | }
248 | },
249 | "nbformat": 4,
250 | "nbformat_minor": 1
251 | }
252 |
--------------------------------------------------------------------------------
/6-Profiling/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/README.md
--------------------------------------------------------------------------------
/6-Profiling/vtune/VTune_Python_Tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/vtune/VTune_Python_Tutorial.pdf
--------------------------------------------------------------------------------
/6-Profiling/vtune/demo.py:
--------------------------------------------------------------------------------
1 | class Encoder:
2 | CHAR_MAP = {'a': 'b', 'b': 'c'}
3 | def __init__(self, input):
4 | self.input = input
5 |
6 | def process_slow(self):
7 | result = ''
8 | for ch in self.input:
9 | result += self.CHAR_MAP.get(ch, ch)
10 | return result
11 |
12 | def process_fast(self):
13 | result = []
14 | for ch in self.input:
15 | result.append(self.CHAR_MAP.get(ch, ch))
16 | return ''.join(result)
17 |
18 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/run.py:
--------------------------------------------------------------------------------
1 | import demo
2 | import time
3 |
4 | def slow_encode(input):
5 | return demo.Encoder(input).process_slow()
6 |
7 | def fast_encode(input):
8 | return demo.Encoder(input).process_fast()
9 |
10 | if __name__ == '__main__':
11 | input = 'a' * 10000000 # 10 millions of 'a'
12 | start = time.time()
13 | s1 = slow_encode(input)
14 | slow_stop = time.time()
15 | print('slow: %.2f sec' % (slow_stop - start))
16 | s2 = fast_encode(input)
17 | print('fast: %.2f sec' % (time.time() - slow_stop))
18 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/run_th.py:
--------------------------------------------------------------------------------
1 | import demo
2 | import time
3 | import threading
4 |
5 | def slow_encode(input):
6 | return demo.Encoder(input).process_slow()
7 |
8 | def fast_encode(input):
9 | return demo.Encoder(input).process_fast()
10 |
11 | if __name__ == '__main__':
12 | input = 'a' * 10000000 # 10 millions of 'a'
13 | th1 = threading.Thread(target=slow_encode, args=(input,))
14 | th2 = threading.Thread(target=fast_encode, args=(input,))
15 | th1.start()
16 | th2.start()
17 | th1.join()
18 | th2.join()
19 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/t_0.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 |
4 | try:
5 | xrange
6 | except NameError:
7 | # python3
8 | xrange = range
9 |
10 | class BigObject:
11 | STR_VALUE = ''.join(str(x) for x in xrange(10000))
12 | def __str__(self):
13 | return self.STR_VALUE
14 |
15 | def makeParams():
16 | objects = tuple(BigObject() for _ in xrange(50))
17 | template = ''.join('{%d}' % i for i in xrange(len(objects)))
18 | return template, objects
19 |
20 | def doLog():
21 | template, objects = makeParams()
22 | for _ in xrange(1000):
23 | logging.info(template.format(*objects))
24 |
25 | def main():
26 | logging.basicConfig()
27 |
28 | start = time.time()
29 | doLog()
30 | stop = time.time()
31 | print('run took: %.3f' % (stop - start))
32 |
33 | if __name__ == '__main__':
34 | main()
35 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/t_1.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import time
3 |
4 | try:
5 | xrange
6 | except NameError:
7 | # python3
8 | xrange = range
9 |
10 | class BigObject:
11 | STR_VALUE = ''.join(str(x) for x in xrange(10000))
12 | def __str__(self):
13 | return self.STR_VALUE
14 |
15 | def makeParams():
16 | objects = tuple(BigObject() for _ in xrange(50))
17 | template = ''.join('{%d}' % i for i in xrange(len(objects)))
18 | return template, objects
19 |
20 | def doLog():
21 | template, objects = makeParams()
22 | for _ in xrange(1000):
23 | logging.info(template, *objects)
24 |
25 | def main():
26 | logging.basicConfig()
27 |
28 | start = time.time()
29 | doLog()
30 | stop = time.time()
31 | print('run took: %.3f' % (stop - start))
32 |
33 | if __name__ == '__main__':
34 | main()
35 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/test_class_sample.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import sys
4 | if sys.version_info < (3,0,0):
5 | from thread import get_ident
6 | else:
7 | from threading import get_ident
8 | import threading
9 | import time
10 |
11 | class WaitClass:
12 | def __init__(self, time):
13 | self.time = time
14 |
15 | def __call__(self):
16 | ident = get_ident()
17 | print('START task_noop_waiter: %s\n' % ident, end='')
18 | time.sleep(self.time)
19 | print('STOP task_noop_waiter: %s\n' % ident, end='')
20 |
21 |
22 | def do_work(self):
23 | ident = get_ident()
24 | print('START1 task_cpu_eater: %s\n' % ident, end='')
25 | stop = time.time() + self.time
26 | while time.time() < stop:
27 | pass
28 | print('STOP1 task_cpu_eater: %s\n' % ident, end='')
29 |
30 | class SpinClass:
31 | def __init__(self, time):
32 | self.time = time
33 |
34 | def __call__(self):
35 | do_work(self);
36 |
37 | def main():
38 | ident = get_ident()
39 | print('START main: %s\n' % ident, end='')
40 | wc = WaitClass(9)
41 | sc = SpinClass(7)
42 | t1 = threading.Thread(target=wc)
43 | t2 = threading.Thread(target=sc)
44 | t1.start()
45 | t2.start()
46 | t1.join()
47 | t2.join()
48 | print('STOP main: %s\n' % ident, end='')
49 |
50 | if __name__ == '__main__':
51 | main()
52 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/main.py:
--------------------------------------------------------------------------------
1 | from slowpoke import SlowpokeCore
2 | import logging
3 | import time
4 |
5 | def makeParams():
6 | objects = tuple(SlowpokeCore(50000) for _ in xrange(50))
7 | template = ''.join('{%d}' % i for i in xrange(len(objects)))
8 | return template, objects
9 |
10 | def doLog():
11 | template, objects = makeParams()
12 | for _ in xrange(1000):
13 | logging.info(template.format(*objects))
14 |
15 | def main():
16 | logging.basicConfig()
17 | start = time.time()
18 | doLog()
19 | stop = time.time()
20 | print('run took: %.3f' % (stop - start))
21 |
22 | if __name__ == '__main__':
23 | main()
24 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/pythonset.txt:
--------------------------------------------------------------------------------
1 | set VS90COMNTOOLS=C:\Users\kpoleary\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python\9.0\VC\bin
2 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/slowpoke/__init__.py:
--------------------------------------------------------------------------------
1 | from core import SlowpokeCore
2 |
3 | class Slowpoke(SlowpokeCore):
4 | pass
5 |
6 | __all__ = ['Slowpoke', 'SlowpokeCore']
7 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/slowpoke/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/vtune/webinar/slowpoke/__init__.pyc
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/slowpoke/compile.bat1:
--------------------------------------------------------------------------------
1 | python -d setup.py build_ext --inplace
2 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/slowpoke/core.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContinuumIO/supercomputing2016-python/9b10892f4368519c989d31de62497710dec5a852/6-Profiling/vtune/webinar/slowpoke/core.pyd
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/slowpoke/core.pyx:
--------------------------------------------------------------------------------
1 | import math
2 | cdef class SlowpokeCore:
3 | cdef public object N
4 | cdef public int divisor
5 | def __init__(self, N):
6 | self.N = N
7 | self.divisor = 1
8 |
9 | cdef double doWork(self, int N) except *:
10 | cdef int i, j, k
11 | cdef double res
12 | res = 1
13 | for j in range(N / self.divisor):
14 | k = 1
15 | for i in range(N):
16 | k += 1
17 | res += k
18 | return math.log(res)
19 |
20 | def __str__(self):
21 | return 'SlowpokeCore: %f' % self.doWork(self.N)
22 |
--------------------------------------------------------------------------------
/6-Profiling/vtune/webinar/slowpoke/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Distutils import build_ext
4 |
5 | setup(
6 | cmdclass = {'build_ext': build_ext},
7 | ext_modules = [Extension('core', sources=["core.pyx"],
8 | extra_compile_args=['/Z7'],
9 | extra_link_args=['/DEBUG']),
10 | ],
11 | )
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # supercomputing2016-python
2 | Materials for the Supercomputing 2016 tutorial on high performance Python
3 |
4 | # Environment
5 |
6 | ```
7 | conda create -n sc2016 python=3.5 jupyter pandas scipy bokeh matplotlib numba line_profiler pandas-datareader xlwt xlrd sqlalchemy scikit-learn pytables
8 | ```
9 |
--------------------------------------------------------------------------------