├── .gitignore ├── README.md ├── chapter1 ├── 12-installation.ipynb ├── 13-nbui.ipynb ├── 14-python.ipynb ├── 15-ten.ipynb └── images │ ├── ipython-console.png │ ├── kernel-selector.png │ ├── markdown-both.png │ ├── markdown.png │ ├── nbui-1.png │ ├── nbui-2.png │ ├── nbui-3.png │ ├── notebook-terminal.png │ ├── notebook.png │ ├── pager.png │ ├── slider.png │ ├── tab-completion.png │ ├── text-editor.png │ ├── unicode-completion.png │ └── youtube.png ├── chapter2 ├── 21-exploring.ipynb ├── 22-manipulating.ipynb ├── 23-groupby.ipynb ├── cleaning │ ├── README.md │ ├── cleanup.ipynb │ └── subset.ipynb └── data │ ├── .gitignore │ ├── nycTaxiFareData2013.torrent │ └── nycTaxiTripData2013.torrent ├── chapter3 ├── 31-primer.ipynb ├── 32-creating.ipynb ├── 33-basic.ipynb └── 34-computing.ipynb ├── chapter4 ├── 41-notebook.ipynb ├── 42-mpl.ipynb ├── 43-image.ipynb └── 44-other.ipynb ├── chapter5 ├── 51-numba.ipynb ├── 52-cython.ipynb ├── 53-parallel.ipynb └── 54-further.ipynb ├── chapter6 ├── 61-magic.ipynb ├── 62-kernel.ipynb ├── 63-rich.ipynb └── 64-nbapp.ipynb └── utils └── gentoc.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | *intro.ipynb 3 | *summary.ipynb 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IPython minibook, second edition 2 | 3 | This repository contains all the code examples as IPython notebooks. 4 | 5 | ## Table of contents 6 | 7 | ### 1. Getting started with IPython 8 | 9 | * 1.1. What are Python, IPython, and Jupyter? 10 | * [1.2. Installing Python with Anaconda](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter1/12-installation.ipynb) (**Complete sample!**) 11 | * [1.3. Introducing the Notebook](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter1/13-nbui.ipynb) (**Complete sample!**) 12 | * [1.4. A crash course on Python](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter1/14-python.ipynb) (**Complete sample!**) 13 | * [1.5. Ten Jupyter/IPython essentials](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter1/15-ten.ipynb) 14 | * 1.6. Summary 15 | 16 | ### 2. Interactive data analysis with pandas 17 | 18 | * [2.1. Exploring a dataset in the Notebook](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter2/21-exploring.ipynb) 19 | * [2.2. Manipulating data](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter2/22-manipulating.ipynb) 20 | * [2.3. Complex operations](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter2/23-groupby.ipynb) 21 | * 2.4. Summary 22 | 23 | ### 3. Numerical computing with NumPy 24 | 25 | * [3.1. A primer to vector computing](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter3/31-primer.ipynb) 26 | * [3.2. Creating and loading arrays](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter3/32-creating.ipynb) 27 | * [3.3. Basic array manipulations](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter3/33-basic.ipynb) 28 | * [3.4. Computing with NumPy arrays](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter3/34-computing.ipynb) (**Complete sample!**) 29 | * 3.5. Summary 30 | 31 | ### 4. Interactive plotting and Graphical Interfaces 32 | 33 | * [4.1. Choosing a plotting backend](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter4/41-notebook.ipynb) 34 | * [4.2. matplotlib and seaborn essentials](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter4/42-mpl.ipynb) 35 | * [4.3. Image processing](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter4/43-image.ipynb) 36 | * 4.4. Further plotting and visualization libraries 37 | * 4.5. Summary 38 | 39 | ### 5. High-performance and parallel computing 40 | 41 | * [5.1. Accelerating Python code with Numba](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter5/51-numba.ipynb) 42 | * [5.2. Writing C in Python with Cython](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter5/52-cython.ipynb) 43 | * [5.3. Distributing tasks on several cores with IPython.parallel](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter5/53-parallel.ipynb) 44 | * 5.4. Further high-performance computing techniques 45 | * 5.5. Summary 46 | 47 | ### 6. Customizing IPython 48 | 49 | * [6.1. Creating a custom magic command in an IPython extension](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter6/61-magic.ipynb) 50 | * [6.2. Writing a new Jupyter kernel](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter6/62-kernel.ipynb) 51 | * [6.3. Displaying rich HTML elements in the Notebook](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter6/63-rich.ipynb) 52 | * [6.4. Customizing the Notebook interface with JavaScript](http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/chapter6/64-nbapp.ipynb) 53 | * 6.5. Summary 54 | 55 | -------------------------------------------------------------------------------- /chapter1/12-installation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Installing Python with Anaconda" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "> **This is a sample section from [Learning IPython for Interactive Computing and Data Visualization, second edition](http://ipython-books.github.io/minibook/).**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Although Python is an open-source, cross-platform language, installing it with the usual scientific packages used to be overly complicated. Fortunately, there is an all-in-one scientific Python distribution, **Anaconda** (by Continuum Analytics), that is free, cross-platform, and easy to install. Anaconda comes with Jupyter and all of the scientific packages we will use in this book. There are other distributions and installation options (like Canopy, WinPython, Python(x, y), and others), but for the purpose of this book we will use Anaconda throughout." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "> TIP (Running Jupyter in the cloud): You can also use Jupyter directly from your web browser, without installing anything on your local computer: go to http://try.jupyter.org. Note that the notebooks created there are not saved. Let's also mention a similar service, [Wakari](https://wakari.io), by Continuum Analytics." 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Anaconda comes with a package manager named **conda**, which lets you manage your Python distribution and install new packages." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "> TIP (Miniconda): **Miniconda** (http://conda.pydata.org/miniconda.html) is a light version of Anaconda that gives you the ability to only install the packages you need." 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "\n", 50 | "### Downloading Anaconda" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "The first step is to [download Anaconda from Continuum Analytics' website](http://continuum.io/downloads). This is actually not the easiest part since several versions are available. Three properties define a particular version:" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "* The **operating system** (OS): Linux, Mac OS X, or Windows. This will depend on the computer you want to install Python on.\n", 65 | "* 32-bit or 64-bit: **you want the 64-bit version**, unless you're on a old or low-end computer. The 64-bit version will allow you to manipulate large datasets.\n", 66 | "* The version of Python: 2.7, or 3.4 (or later). In this book, **we will use Python 3.4**, because that's the latest version available at the time of this writing. However, it is easy to temporarily switch to a Python 2.7 environment with Anaconda if necessary (see below)." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "> INFO (Python 2 or Python 3?): Python 3 brought a few backward-incompatible changes. This is why many people are still using Python 2.7 at this time, even though Python 3 was released in 2008. **We will use Python 3 in this book, and we recommend that newcomers learn Python 3**. If you need to use legacy Python code that hasn't yet been updated to Python 3, you can use conda to temporarily switch to a Python 2 interpreter." 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "Once you have found the right link for your OS and Python 3 64-bit, you can download the package. You should then find it in your `downloads` directory (depending on your OS and your browser's settings)." 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "\n", 88 | "### Installing Anaconda" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "The Anaconda installer comes in different flavors depending on your OS:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "* Linux: the installer is a bash `.sh` script. Run it with a command like `bash Anaconda3-2.3.0-Linux-x86_64.sh` (if necessary, replace the filename by the one you downloaded).\n", 103 | "* The Mac graphical installer is a `.pkg` file that you can run with a double-click.\n", 104 | "* The Windows graphical installer is a `.exe` file that you can run with a double-click." 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "Then, follow the instructions to install Anaconda on your computer. A few remarks:" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "* You don't need administrator rights to install Anaconda; in most cases, you can choose to install it in your personal user account.\n", 119 | "* Choose to put Anaconda in your system path, so that Anaconda's Python is the system default." 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "> INFO (Graphical launcher): Anaconda comes with a graphical launcher that you can use to start IPython, manage environments, and so on. You will find more details at http://docs.continuum.io/anaconda-launcher/" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "\n", 134 | "### Before you get started..." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Before you get started with Anaconda, there are a few things you need to know:" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "* Opening a terminal\n", 149 | "* Finding your home directory\n", 150 | "* Manipulating your system's PATH" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "You can skip this paragraph if you already know how to do these things." 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "#### Opening a terminal" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "A **terminal** is a command-line application that lets you interact with your computer by typing commands with the keyboard, instead of clicking on windows with the mouse. While most computer users only know Graphical User Interfaces, developers and scientists generally need to know how to use the command-line interface for advanced usage. To use the command-line interface, follow the instructions that are specific to your OS:" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "* On Windows, you can use **Powershell**. Press the *Windows* and *R* keys, type `powershell` in the *Run* box, and press *Enter*. You will find more information about Powershell at https://blog.udemy.com/powershell-tutorial/. Alternatively, you can use the older Windows terminal by typing `cmd` in the *Run* box.\n", 179 | "* On Mac OS X, you can open the Terminal application, for example by pressing *Cmd-Space*, typing `terminal`, and pressing *Enter*.\n", 180 | "* On Linux, you can open the Terminal from your application manager." 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "In a terminal, use the `cd /path/to/directory` command to move to a given directory. For example, `cd ~` moves to your home directory, which is introduced in the next paragraph." 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "\n", 195 | "#### Finding your home directory" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "Your **home directory** is specific to your user account on your computer. It generally contains your applications' settings. It is often referred to as `~`. Depending on the OS, the location of the home directory is as follows:" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "* On Windows, its location is `C:\\Users\\YourName\\` where `YourName` is the name of your account\n", 210 | "* On Mac OS X, its location is `/Users/YourName/` where `YourName` is the name of your account\n", 211 | "* On Linux, its location is generally `/home/yourname/` where `yourname` is the name of your account" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "For example, the directory `~/anaconda3` refers to `C:\\Users\\YourName\\anaconda3\\` on Windows and `/home/yourname/anaconda3/` on Linux." 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "\n", 226 | "#### Manipulating your system's PATH" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "The **PATH** is a global variable (also called an **environment variable**) defined by your operating system with the list of directories where executable programs are located. If you type a command like `python` in your terminal, you generally need to have a `python` (or `python.exe` on Windows) executable in one of the directories listed in the PATH. If that's not the case, an error may be raised." 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "You can manually add directories to your system's PATH:" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "* On Windows, press the *Windows* and *R* keys, type `rundll32.exe sysdm.cpl,EditEnvironmentVariables`, and press *Enter*. You can then edit the PATH variable and append `;C:\\path\\to\\directory` if you want to add that directory. You will find more detailed instructions at http://www.computerhope.com/issues/ch000549.htm.\n", 248 | "* On Mac OS X, edit or create the file `~/.bash_profile` and add `export PATH=\"$PATH:/path/to/directory\"` at the end of the file.\n", 249 | "* On Linux, edit or create the file `~/.bashrc` and add `export PATH=\"$PATH:/path/to/directory\"` at the end of the file." 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "\n", 257 | "### Testing your installation" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "To test Anaconda once it has been installed, open a terminal and type `python`. This opens a **Python console**, not to be confused with the **OS terminal**. The Python console is identified with a `>>>` prompt string, whereas the OS terminal is identified with a `$` (Linux/OS X) or `>` (Windows) prompt string. These strings are displayed in the terminal, often preceded by your computer's name, your login, and the current directory (for example, `yourname@computer:~$` on Linux or `PS C:\\Users\\YourName>` on Windows). You can type commands after the prompt string. After typing `python`, you should see something like the following:" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "```bash\n", 272 | "$ python\n", 273 | "Python 3.4.3 |Anaconda 2.3.0 (64-bit)| (default, Jun 4 2015, 15:29:08)\n", 274 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux\n", 275 | "Type \"help\", \"copyright\", \"credits\" or \"license\" for more information.\n", 276 | ">>>\n", 277 | "```" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "What matters is that `Anaconda` or `Continuum Analytics` is mentioned here. Otherwise, typing `python` might have launched your system's default Python, which is *not* the one you want to use in this book." 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "If you have this problem, you may need to add the path to the Anaconda executables to your PATH. For example, this path will be `~/anaconda3/bin` if you chose to install Anaconda in `~/anaconda3`. The `bin` directory contains Anaconda executables including `python`." 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "If you have any problem installing and testing Anaconda, you can ask for help on the mailing list (see the link at the end of this section)." 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "Next, exit the Python prompt by typing `exit()` and pressing *Enter*." 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "\n", 313 | "### Managing environments" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "Anaconda lets you create different isolated Python environments. For example, you can have a Python 2 distribution for the rare cases where you need to temporarily switch to Python 2." 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "To create a new environment for Python 2, type the following command in an OS terminal:" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "```bash\n", 335 | "$ conda create -n py2 anaconda python=2.7\n", 336 | "```" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "This will create a new isolated environment named `py2` based on the original Anaconda distribution, but with Python 2.7. You could also use the command `conda env`: type `conda env -h` to see the details." 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "You can now activate your `py2` environment by typing the following command in a terminal:" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "* Windows: `activate py2` (note that you might have problems with Powershell, see https://github.com/conda/conda/issues/626, or use the old `cmd` terminal)\n", 358 | "* Linux and Mac OS X: `source activate py2`" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "Now, you should see a `(py2)` prefix in front of your terminal prompt. Typing `python` in your terminal with the `py2` environment activated will open a Python 2 interpreter." 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "Type `deactivate` on Windows or `source deactivate` on Linux/Mac OS X to deactivate the environment in the terminal." 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "### Common conda commands" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "Here is a list of common commands:" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": {}, 392 | "source": [ 393 | "* `conda help`: display the list of conda commands.\n", 394 | "* `conda list`: list all packages installed in the current environment.\n", 395 | "* `conda info`: display system information.\n", 396 | "* `conda env list`: display the list of environments installed. The currently active one is marked by a star `*`.\n", 397 | "* `conda install somepackage`: install a Python package (replace `somepackage` by the name of the package you want to install).\n", 398 | "* `conda install somepackage=0.7`: install a specific version of a package.\n", 399 | "* `conda update somepackage`: update a Python package to the latest available version.\n", 400 | "* `conda update anaconda`: update all packages.\n", 401 | "* `conda update conda`: update conda itself.\n", 402 | "* `conda update --all`: update all packages.\n", 403 | "* `conda remove somepackage`: uninstall a Python package.\n", 404 | "* `conda remove -n myenv --all`: remove the environment named `myenv` (replace this by the name of the environment you want to uninstall).\n", 405 | "* `conda clean -t`: remove the old tarballs that are left over after installation and updates." 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "Some commands ask for confirmation (you need to press `y` to confirm). You can also use the `-y` option to avoid the confirmation prompt." 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "If `conda install somepackage` fails, you can try `pip install somepackage` instead. This will use the **Python Package Index (PyPI)** instead of Anaconda. Many scientific Anaconda packages are easier to install than the corresponding PyPI packages, because they are precompiled for your platform. However, many packages are available on PyPI but not on Anaconda." 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "Here are some references:" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "* pip documentation at https://pip.pypa.io/en/stable/\n", 434 | "* PyPI repository at https://pypi.python.org/pypi" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "### References" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "Here are a few references about Anaconda:" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "* Continuum Analytics' website: http://continuum.io/\n", 456 | "* Anaconda main page: https://store.continuum.io/cshop/anaconda/\n", 457 | "* Anaconda downloads: http://continuum.io/downloads\n", 458 | "* List of Anaconda packages: http://docs.continuum.io/anaconda/pkg-docs\n", 459 | "* Conda main page: http://conda.io/\n", 460 | "* Anaconda mailing list: https://groups.google.com/a/continuum.io/forum/#!forum/anaconda\n", 461 | "* Continuum Analytics Twitter account at https://twitter.com/ContinuumIO\n", 462 | "* Conda FAQ: http://conda.pydata.org/docs/faq.html\n", 463 | "* Curated list of Python packages at http://awesome-python.com/" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "\n", 471 | "### Downloading the notebooks" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "All of this book's code is available on GitHub as notebooks. We recommend that you download the notebooks and experiment with them as you're working through the book." 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "> INFO (Git and GitHub): **GitHub** is a popular online service that hosts open source projects. It is based on the **Git Distributed Version Control System (DVCS)**. Git keeps track of file changes and enables collaborative work on a given project. Learning a version control system like Git is highly recommended for all programmers. Not using a version control system when working with code or even text documents is now considered as bad practice. You will find several references at https://help.github.com/articles/good-resources-for-learning-git-and-github/. The IPython Cookbook also contains several recipes about Git and best interactive programming practices." 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "Here is how to download the book's notebooks:" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "* Install git: http://git-scm.com/downloads\n", 500 | "* Check your git installation: open a new OS terminal and type `git version`. You should see the version of git and not an error message.\n", 501 | "* Type the following command:" 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "metadata": {}, 507 | "source": [ 508 | "```bash\n", 509 | "$ git clone https://github.com/ipython-books/minibook-2nd-code.git \"$HOME/minibook\"\n", 510 | "```" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "This will download the very latest version of the code into a `minibook` subdirectory in your home directory. You can also choose another directory." 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "From this directory, you can update to the latest version at any time by typing `git pull`." 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "> TIP (Notebooks on GitHub): Notebook documents stored on GitHub (with the file extension `.ipynb`) are automatically rendered on the GitHub website." 532 | ] 533 | } 534 | ], 535 | "metadata": { 536 | "kernelspec": { 537 | "display_name": "Python 3", 538 | "language": "python", 539 | "name": "python3" 540 | }, 541 | "language_info": { 542 | "codemirror_mode": { 543 | "name": "ipython", 544 | "version": 3 545 | }, 546 | "file_extension": ".py", 547 | "mimetype": "text/x-python", 548 | "name": "python", 549 | "nbconvert_exporter": "python", 550 | "pygments_lexer": "ipython3", 551 | "version": "3.4.3" 552 | } 553 | }, 554 | "nbformat": 4, 555 | "nbformat_minor": 0 556 | } 557 | -------------------------------------------------------------------------------- /chapter1/13-nbui.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introducing the Notebook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "> **This is a sample section from [Learning IPython for Interactive Computing and Data Visualization, second edition](http://ipython-books.github.io/minibook/).**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Originally, IPython provided an enhanced command-line console to run Python code interactively. The Jupyter Notebook is a more recent and more sophisticated alternative to the console. Today, both tools are available, and we recommend that you learn to use both." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Launching the IPython console" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "To run the IPython console, type `ipython` in an OS terminal. There, you can write Python commands and see the results instantly. Here is a screenshot:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "![IPython console](images/ipython-console.png)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "The IPython console is most convenient when you have a command-line-based workflow and you want to execute some quick Python commands." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "You can exit the IPython console by typing `exit`." 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "> INFO (The Qt console): Let's mention the **Qt console**, which is similar to the IPython console, but offers additional features such as multiline editing, enhanced tab completion, image support, and so on. The Qt console can also be integrated within a graphical application written with Python and Qt. See http://jupyter.org/qtconsole/stable/ for more information." 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "\n", 71 | "### Launching the Jupyter Notebook" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "To run the Jupyter Notebook, open an OS terminal, go to `~/minibook/` (or into the directory where you've downloaded the book's notebooks), and type `jupyter notebook`. This will start the Jupyter server and open a new window in your browser (if that's not the case, go to the following URL: `http://localhost:8888`). Here is a screenshot of Jupyter's entry point, the **Notebook dashboard**:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "![The Notebook dashboard](images/nbui-1.png)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "> INFO: At the time of writing, the following browsers are officially supported: Chrome 13 and greater; Safari 5 and greater; Firefox 6 or greater. Other browsers may work also. Your mileage may vary." 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "The Notebook is most convenient when you start a complex analysis project that will involve a substantial amount of interactive experimentation with your code. Other common use-cases include keeping track of your interactive session (like a lab notebook), or writing technical documents that involve code, equations, and figures." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "In the rest of this section, we will focus on the Notebook interface." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "> TIP (Closing the Notebook server): To close the Notebook server, go to the OS terminal where you launched the server from, and press *Ctrl-C*. You may need to confirm with *y*." 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "\n", 121 | "### The Notebook dashboard" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "The dashboard contains several tabs:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "* *Files* shows all files and notebooks in the current directory.\n", 136 | "* *Running* shows all kernels currently running on your computer.\n", 137 | "* *Clusters* lets you launch kernels for parallel computing (covered in *Chapter 5, High-Performance Computing*)." 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "A **notebook** is an interactive document containing code, text, and other elements. A notebook is saved in a file with the `.ipynb` extension. This file is a plain text file storing a JSON data structure." 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "A **kernel** is a process running an interactive session. When using IPython, this kernel is a Python process. There are kernels in many languages other than Python." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "> INFO (Notebook and notebook): We follow the convention to use the term of *notebook* for a file, and *Notebook* for the application and the web interface." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "In Jupyter, notebooks and kernels are strongly separated. A notebook is a file, whereas a kernel is a process. The kernel receives snippets of code from the Notebook interface, executes them, and sends the outputs and possible errors back to the Notebook interface. Thus, in general, the kernel has no notion of Notebook. A notebook is persistent (it's a file), whereas a kernel may be closed at the end of an interactive session and it is therefore not persistent. When a notebook is re-opened, it needs to be re-executed." 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "In general, no more than one Notebook interface can be connected to a given kernel. However, several IPython console can be connected to a given kernel." 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "\n", 180 | "### The Notebook user interface" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "To create a new notebook, click on the *New* button, and select `Notebook (Python 3)`. A new browser tab opens and shows the Notebook interface:" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "![A new notebook](images/nbui-2.png)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "Here are the main components of the interface, from top to bottom:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "* The *notebook name*, that you can change by clicking on it. This is also the name of the `.ipynb` file.\n", 209 | "* The *menu bar* gives you access to several actions pertaining to either the notebook or the kernel.\n", 210 | "* To the right of the menu bar is the *Kernel* name. You can change the kernel language of your notebook from the *Kernel* menu. We will see in *Chapter 6, Customizing IPython* how to manage different kernel languages.\n", 211 | "* The *toolbar* contains icons for common actions. In particular, the dropdown menu showing `Code` lets you change the type of a cell.\n", 212 | "* Below is the main component of the UI: the actual Notebook. It consists of a linear list of *cells*. We will detail below the structure of a cell." 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "\n", 220 | "### Structure of a notebook cell" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "There are two main types of cells: Markdown cells and code cells." 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "* A **Markdown cell** contains rich text. In addition to classic formatting options like bold or italics, we can add links, images, HTML elements, LaTeX mathematical equations, and more. We will cover Markdown in more detail in the *Ten Jupyter/IPython essentials* section of this chapter.\n", 235 | "* A **code cell** contains code to be executed by the kernel. The programming language corresponds to the kernel's language. We will only use Python in this book, but you can use many other languages." 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "You can change the type of a cell by first clicking on a cell to select it, and then choosing the cell's type in the toolbar's dropdown menu showing `Markdown` or `Code`." 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "\n", 250 | "#### Markdown cells" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "Here is a screenshot of a Markdown cell:" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "![A Markdown cell](images/markdown-both.png)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "The top panel shows the cell in edit mode, while the bottom one shows it in render mode. The edit mode lets you edit the text, while the render mode lets you display the rendered cell. We will explain the differences between these modes in greater detail below." 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "\n", 279 | "#### Code cells" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "Here is a screenshot of a complex code cell:" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "![Structure of a code cell](images/nbui-3.png)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "This code cell contains several parts:" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "* The **prompt number** shows the cell's number. This number increases everytime you run the cell. Since you can run cells of a notebook out of order, nothing guarantees that code numbers are linearly increasing in a given notebook.\n", 308 | "* The **input area** contains a multiline text editor that lets you write one or several lines of code with syntax highlighting.\n", 309 | "* The **widget area** may contain graphical controls; here, it displays a slider.\n", 310 | "* The **output area** can contain multiple outputs, here:\n", 311 | " * Standard output (text in black)\n", 312 | " * Error output (text with a red background)\n", 313 | " * Rich output (an HTML table and an image here)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "\n", 321 | "### The Notebook modal interface" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "The Notebook implements a modal interface similar to some text editors such as vim. Mastering this interface may represent a small learning curve for some users." 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "* Use the **edit mode** to write code (the selected cell has a green border, and a pen icon appears at the top right of the interface). Click inside a cell to enable the edit mode for this cell (you need to double-click with Markdown cells).\n", 336 | "* Use the **command mode** to operate on cells (the selected cell has a gray border, and there is no pen icon). Click outside the text area of a cell to enable the command mode (you can also press the *Escape* key)." 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "Keyboard shortcuts are available in the Notebook interface. Type `h` to show them. We review here the most common ones (for Windows and Linux; shortcuts for Mac OS X may be slightly different)." 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "\n", 351 | "#### Keyboard shortcuts available in both modes" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "Here are a few keyboard shortcuts that are always available when a cell is selected:" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "* `Ctrl`-`Enter`: run the cell\n", 366 | "* `Shift`-`Enter`: run the cell and select the cell below\n", 367 | "* `Alt`-`Enter`: run the cell and insert a new cell below\n", 368 | "* `Ctrl`-`s`: save the notebook" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "\n", 376 | "#### Keyboard shortcuts available in edit mode" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "In edit mode, you can type code as usual, and you have access to the following keyboard shortcuts:" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "* `Esc`: switch to command mode\n", 391 | "* `Ctrl`-`Shift`-`-`: split the cell" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": {}, 397 | "source": [ 398 | "\n", 399 | "#### Keyboard shortcuts available in command mode" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "In command mode, keystrokes are bound to cell operations. **Don't write code in command mode** or unexpected things will happen! For example, typing `dd` in command mode will delete the selected cell! Here are some keyboard shortcuts available in command mode:" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "* `Enter`: switch to edit mode\n", 414 | "* `Up` or `k`: select the previous cell\n", 415 | "* `Down` or `j`: select the next cell\n", 416 | "* `y` / `m`: change the cell type to code cell / Markdown cell\n", 417 | "* `a` / `b`: insert a new cell above / below the current cell\n", 418 | "* `x` / `c` / `v`: cut / copy / paste the current cell\n", 419 | "* `dd`: delete the current cell\n", 420 | "* `z`: undo the last delete operation\n", 421 | "* `Shift`-`=`: merge the cell below\n", 422 | "* `h`: display the help menu with the list of keyboard shorcuts" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "Spending some time learning these shortcuts is highly recommended." 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "### References" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "Here are a few references:" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "* Main documentation of Jupyter at http://jupyter.readthedocs.org/en/latest/\n", 451 | "* Jupyter Notebook interface explained at http://jupyter-notebook.readthedocs.org/en/latest/notebook.html" 452 | ] 453 | } 454 | ], 455 | "metadata": { 456 | "kernelspec": { 457 | "display_name": "Python 3", 458 | "language": "python", 459 | "name": "python3" 460 | }, 461 | "language_info": { 462 | "codemirror_mode": { 463 | "name": "ipython", 464 | "version": 3 465 | }, 466 | "file_extension": ".py", 467 | "mimetype": "text/x-python", 468 | "name": "python", 469 | "nbconvert_exporter": "python", 470 | "pygments_lexer": "ipython3", 471 | "version": "3.4.3" 472 | } 473 | }, 474 | "nbformat": 4, 475 | "nbformat_minor": 0 476 | } 477 | -------------------------------------------------------------------------------- /chapter1/14-python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## A crash course on Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "> **This is a sample chapter from [Learning IPython for Interactive Computing and Data Visualization, second edition](http://ipython-books.github.io/minibook/).**" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "If you don't know Python, read this section to learn the fundamentals. Python is a very accessible language and is even taught to school children. If you have ever programmed, it will only take you a few minutes to learn the basics." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Hello world" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Open a new notebook and type the following in the first cell:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "Hello world!" 49 | ] 50 | }, 51 | "execution_count": 1, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "print(\"Hello world!\")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "> TIP (Prompt string): Note that the convention chosen in this book is to show Python code (also called the `input`) prefixed with `In [x]: ` (which shouldn't be typed). This is the standard IPython prompt. Here, you should just type `print(\"Hello world!\")` and then press `Shift`-`Enter`." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Congratulations! You are now a Python programmer." 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "### Variables" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "Let's use Python as a calculator." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 2, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "4" 99 | ] 100 | }, 101 | "execution_count": 2, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "2 * 2" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "Here, `2 * 2` is an _expression statement_. This operation is performed, the result is returned, and IPython displays it in the notebook cell's output." 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "> TIP (Division): In Python 3, `3 / 2` returns `1.5` (floating-point division), whereas it returns `1` in Python 2 (integer division). This can be source of errors when porting Python 2 code to Python 3. It is recommended to always use the explicit `3.0 / 2.0` for floating-point division (by using floating-point numbers) and `3 // 2` for integer division. Both syntaxes work in Python 2 and Python 3. See http://python3porting.com/differences.html#integer-division for more details." 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Other built-in mathematical operators include `+`, `-`, `**` for the exponentiation, and others. You will find more details at https://docs.python.org/3/reference/expressions.html#the-power-operator." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "**Variables** form a fundamental concept of any programming language. A variable has a name and a value. Here is how to create a new variable in Python:" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "a = 2" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "And here is how to use an existing variable:" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 4, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "6" 167 | ] 168 | }, 169 | "execution_count": 4, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "a * 3" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Several variables can be defined at once (this is called **unpacking**):" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 5, 188 | "metadata": { 189 | "collapsed": true 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "a, b = 2, 6" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "There are different types of variables. Here, we have used a number (more precisely, an **integer**). Other important types include **floating-point numbers** to represent real numbers, **strings** to represent text, and **booleans** to represent `True/False` values. Here are a few examples:" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 6, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "pi is about 3.1415" 214 | ] 215 | }, 216 | "execution_count": 6, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "somefloat = 3.1415\n", 223 | "sometext = 'pi is about' # You can also use double quotes.\n", 224 | "print(sometext, somefloat) # Display several variables." 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "Note how we used the `#` character to write **comments**. Whereas Python discards the comments completely, adding comments in the code is important when the code is to be read by other humans (including yourself in the future)." 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "### String escaping" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "String escaping refers to the ability to insert special characters in a string. For example, how can you insert `'` and `\"`, given that these characters are used to delimit a string in Python code? The backslash `\\` is the go-to escape character in Python (and in many other languages too). Here are a few examples:" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 7, 251 | "metadata": { 252 | "collapsed": false 253 | }, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "Hello \"world\"\n", 259 | "A list:\n", 260 | "* item 1\n", 261 | "* item 2\n", 262 | "C:\\path\\on\\windows\n", 263 | "C:\\path\\on\\windows" 264 | ] 265 | }, 266 | "execution_count": 7, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "print(\"Hello \\\"world\\\"\")\n", 273 | "print(\"A list:\\n* item 1\\n* item 2\")\n", 274 | "print(\"C:\\\\path\\\\on\\\\windows\")\n", 275 | "print(r\"C:\\path\\on\\windows\")" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "The special character `\\n` is the **new line** (or line feed) character. To insert a backslash, you need to escape it, which explains why it needs to be doubled as `\\\\`." 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "You can also disable escaping by using **raw literals** with a `r` prefix before the string, like in the last example above. In this case, backslashes are considered as normal characters." 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "This is convenient when writing Windows paths, since Windows uses backslash separators instead of forward slashes like on Unix systems. **A very common error on Windows is forgetting to escape backslashes in paths**: writing `\"C:\\path\"` may lead to subtle errors." 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "You will find the list of special characters in Python at https://docs.python.org/3.4/reference/lexical_analysis.html#string-and-bytes-literals" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "### Lists" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "A list contains a sequence of items. You can concisely instruct Python to perform repeated actions on the elements of a list. Let's first create a list of numbers:" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 8, 323 | "metadata": { 324 | "collapsed": true 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "items = [1, 3, 0, 4, 1]" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "Note the syntax we used to create the list: square brackets `[]`, and commas `,` to separate the items." 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "The *built-in* function `len()` returns the number of elements in a list:" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 9, 348 | "metadata": { 349 | "collapsed": false 350 | }, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "5" 356 | ] 357 | }, 358 | "execution_count": 9, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "len(items)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "> INFO (Built-in functions): Python comes with a set of built-in functions, including `print()`, `len()`, `max()`, functional routines like `filter()` and `map()`, and container-related routines like `all()`, `any()`, `range()` and `sorted()`. You will find the full list of built-in functions at https://docs.python.org/3.4/library/functions.html." 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "Now, let's compute the sum of all elements in the list. Python provides a _built-in_ function for this:" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 10, 384 | "metadata": { 385 | "collapsed": false 386 | }, 387 | "outputs": [ 388 | { 389 | "data": { 390 | "text/plain": [ 391 | "9" 392 | ] 393 | }, 394 | "execution_count": 10, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "sum(items)" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "We can also access individual elements in the list, using the following syntax:" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 11, 413 | "metadata": { 414 | "collapsed": false 415 | }, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "1" 421 | ] 422 | }, 423 | "execution_count": 11, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "items[0]" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 12, 435 | "metadata": { 436 | "collapsed": false 437 | }, 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "1" 443 | ] 444 | }, 445 | "execution_count": 12, 446 | "metadata": {}, 447 | "output_type": "execute_result" 448 | } 449 | ], 450 | "source": [ 451 | "items[-1]" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "Note that indexing starts at `0` in Python: the first element of the list is indexed by `0`, the second by `1`, and so on. Also, `-1` refers to the last element, `-2`, to the penultimate element, and so on." 459 | ] 460 | }, 461 | { 462 | "cell_type": "markdown", 463 | "metadata": {}, 464 | "source": [ 465 | "The same syntax can be used to alter elements in the list:" 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": 13, 471 | "metadata": { 472 | "collapsed": false 473 | }, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "[1, 9, 0, 4, 1]" 479 | ] 480 | }, 481 | "execution_count": 13, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "items[1] = 9\n", 488 | "items" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "We can access sublists with the following syntax:" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 14, 501 | "metadata": { 502 | "collapsed": false 503 | }, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "[9, 0]" 509 | ] 510 | }, 511 | "execution_count": 14, 512 | "metadata": {}, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "items[1:3]" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "Here, `1:3` represents a **slice** going from element `1` _included_ (this is the second element of the list) to element `3` _excluded_. Thus, we get a sublist with the second and third element of the original list. The first-included/last-excluded asymmetry leads to an intuitive treatment of overlaps between consecutive slices. Also, note that a sublist refers to a dynamic *view* of the original list, not a copy; changing elements in the sublist automatically changes them in the original list." 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "Python provides several other types of containers:" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": {}, 537 | "source": [ 538 | "**Tuples** are immutable and contain a fixed number of elements:" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 15, 544 | "metadata": { 545 | "collapsed": false 546 | }, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "2" 552 | ] 553 | }, 554 | "execution_count": 15, 555 | "metadata": {}, 556 | "output_type": "execute_result" 557 | } 558 | ], 559 | "source": [ 560 | "my_tuple = (1, 2, 3)\n", 561 | "my_tuple[1]" 562 | ] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "**Dictionaries** contain key-value pairs. They are extremely useful and common:" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 16, 574 | "metadata": { 575 | "collapsed": false 576 | }, 577 | "outputs": [ 578 | { 579 | "data": { 580 | "text/plain": [ 581 | "a: 1" 582 | ] 583 | }, 584 | "execution_count": 16, 585 | "metadata": {}, 586 | "output_type": "execute_result" 587 | } 588 | ], 589 | "source": [ 590 | "my_dict = {'a': 1, 'b': 2, 'c': 3}\n", 591 | "print('a:', my_dict['a'])" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 17, 597 | "metadata": { 598 | "collapsed": false 599 | }, 600 | "outputs": [ 601 | { 602 | "data": { 603 | "text/plain": [ 604 | "dict_keys(['c', 'a', 'b'])" 605 | ] 606 | }, 607 | "execution_count": 17, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "print(my_dict.keys())" 614 | ] 615 | }, 616 | { 617 | "cell_type": "markdown", 618 | "metadata": {}, 619 | "source": [ 620 | "There is no notion of order in a dictionary. However, the native **collections** module provides an `OrderedDict` structure that keeps the insertion order (see https://docs.python.org/3.4/library/collections.html)." 621 | ] 622 | }, 623 | { 624 | "cell_type": "markdown", 625 | "metadata": {}, 626 | "source": [ 627 | "**Sets**, like mathematical sets, contain distinct elements:" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 18, 633 | "metadata": { 634 | "collapsed": false 635 | }, 636 | "outputs": [ 637 | { 638 | "data": { 639 | "text/plain": [ 640 | "{1, 2, 3}" 641 | ] 642 | }, 643 | "execution_count": 18, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "my_set = set([1, 2, 3, 2, 1])\n", 650 | "my_set" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "> INFO (Mutable and immutable objects): A Python object is **mutable** if its value can change after it has been created. Otherwise, it is **immutable**. For example, a string is immutable; to change it, a new string needs to be created. A list, a dictionary, or a set is mutable; elements can be added or removed. By contrast, a tuple is immutable, and it is not possible to change the elements it contains without recreating the tuple. See https://docs.python.org/3.4/reference/datamodel.html for more details." 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "### Loops" 665 | ] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "We can run through all elements of a list using a `for` loop:" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 19, 677 | "metadata": { 678 | "collapsed": false 679 | }, 680 | "outputs": [ 681 | { 682 | "data": { 683 | "text/plain": [ 684 | "1\n", 685 | "9\n", 686 | "0\n", 687 | "4\n", 688 | "1" 689 | ] 690 | }, 691 | "execution_count": 19, 692 | "metadata": {}, 693 | "output_type": "execute_result" 694 | } 695 | ], 696 | "source": [ 697 | "for item in items:\n", 698 | " print(item)" 699 | ] 700 | }, 701 | { 702 | "cell_type": "markdown", 703 | "metadata": {}, 704 | "source": [ 705 | "There are several things to note here:" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "metadata": {}, 711 | "source": [ 712 | "* The `for item in items` syntax means that a temporary variable named `item` is created at every iteration. This variable contains the value of every item in the list, one at a time.\n", 713 | "* Note the colon `:` at the end of the `for` statement. Forgetting it will lead to a syntax error!\n", 714 | "* The statement `print(item)` will be executed for all items in the list.\n", 715 | "* Note the four spaces before `print`: this is called the **indentation**. You will find more details about indentation in the next subsection." 716 | ] 717 | }, 718 | { 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "Python supports a concise syntax to perform a given operation on all elements of a list:" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 20, 728 | "metadata": { 729 | "collapsed": false 730 | }, 731 | "outputs": [ 732 | { 733 | "data": { 734 | "text/plain": [ 735 | "[1, 81, 0, 16, 1]" 736 | ] 737 | }, 738 | "execution_count": 20, 739 | "metadata": {}, 740 | "output_type": "execute_result" 741 | } 742 | ], 743 | "source": [ 744 | "squares = [item * item for item in items]\n", 745 | "squares" 746 | ] 747 | }, 748 | { 749 | "cell_type": "markdown", 750 | "metadata": {}, 751 | "source": [ 752 | "This is called a **list comprehension**. A new list is created here; it contains the squares of all numbers in the list. This concise syntax leads to highly readable and *Pythonic* code." 753 | ] 754 | }, 755 | { 756 | "cell_type": "markdown", 757 | "metadata": {}, 758 | "source": [ 759 | "### Indentation" 760 | ] 761 | }, 762 | { 763 | "cell_type": "markdown", 764 | "metadata": {}, 765 | "source": [ 766 | "Indentation refers to the spaces that may appear at the beginning of some lines of code. This is a particular aspect of Python's syntax." 767 | ] 768 | }, 769 | { 770 | "cell_type": "markdown", 771 | "metadata": {}, 772 | "source": [ 773 | "In most programming languages, indentation is optional and is generally used to make the code visually clearer. But in Python, indentation also has a syntactic meaning. Particular indentation rules need to be followed for Python code to be correct." 774 | ] 775 | }, 776 | { 777 | "cell_type": "markdown", 778 | "metadata": {}, 779 | "source": [ 780 | "In general, there are two ways to indent some text: by inserting a *tab character* (also referred as `\\t`), or by inserting a number of spaces (typically, four). It is recommended to use spaces instead of tab characters. Your text editor should be configured such that the *Tabular* key on the keyboard inserts four spaces instead of a tab character." 781 | ] 782 | }, 783 | { 784 | "cell_type": "markdown", 785 | "metadata": {}, 786 | "source": [ 787 | "In the Notebook, indentation is automatically configured properly; so you shouldn't worry about this issue. The question only arises if you use another text editor for your Python code." 788 | ] 789 | }, 790 | { 791 | "cell_type": "markdown", 792 | "metadata": {}, 793 | "source": [ 794 | "Finally, what is the meaning of indentation? In Python, indentation delimits coherent blocks of code, for example, the contents of a loop, a conditional branch, a function, and other objects. Where other languages such as C or JavaScript use curly braces to delimit such blocks, Python uses indentation." 795 | ] 796 | }, 797 | { 798 | "cell_type": "markdown", 799 | "metadata": {}, 800 | "source": [ 801 | "### Conditional branches" 802 | ] 803 | }, 804 | { 805 | "cell_type": "markdown", 806 | "metadata": {}, 807 | "source": [ 808 | "Sometimes, you need to perform different operations on your data depending on some condition. For example, let's display all even numbers in our list:" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": 21, 814 | "metadata": { 815 | "collapsed": false 816 | }, 817 | "outputs": [ 818 | { 819 | "data": { 820 | "text/plain": [ 821 | "0\n", 822 | "4" 823 | ] 824 | }, 825 | "execution_count": 21, 826 | "metadata": {}, 827 | "output_type": "execute_result" 828 | } 829 | ], 830 | "source": [ 831 | "for item in items:\n", 832 | " if item % 2 == 0:\n", 833 | " print(item)" 834 | ] 835 | }, 836 | { 837 | "cell_type": "markdown", 838 | "metadata": {}, 839 | "source": [ 840 | "Again, here are several things to note:" 841 | ] 842 | }, 843 | { 844 | "cell_type": "markdown", 845 | "metadata": {}, 846 | "source": [ 847 | "* An `if` statement is followed by a boolean expression.\n", 848 | "* If `a` and `b` are two integers, the **modulo** operand `a % b` returns the remainder from the division of `a` by `b`. Here, `item % 2` is 0 for even numbers, and 1 for odd numbers.\n", 849 | "* The equality is represented by a double equal sign `==` to avoid confusion with the _assignment_ operator `=` that we use when we create variables.\n", 850 | "* Like with the `for` loop, the `if` statement ends with a colon `:`.\n", 851 | "* The part of the code that is executed when the condition is satisfied follows the `if` statement. It is indented. Indentation is cumulative: since this `if` is inside a `for` loop, there are eight spaces before the `print(item)` statement." 852 | ] 853 | }, 854 | { 855 | "cell_type": "markdown", 856 | "metadata": {}, 857 | "source": [ 858 | "Python supports a concise syntax to select all elements in a list that satisfy certain properties. Here is how to create a sublist with only even numbers:" 859 | ] 860 | }, 861 | { 862 | "cell_type": "code", 863 | "execution_count": 22, 864 | "metadata": { 865 | "collapsed": false 866 | }, 867 | "outputs": [ 868 | { 869 | "data": { 870 | "text/plain": [ 871 | "[0, 4]" 872 | ] 873 | }, 874 | "execution_count": 22, 875 | "metadata": {}, 876 | "output_type": "execute_result" 877 | } 878 | ], 879 | "source": [ 880 | "even = [item for item in items if item % 2 == 0]\n", 881 | "even" 882 | ] 883 | }, 884 | { 885 | "cell_type": "markdown", 886 | "metadata": {}, 887 | "source": [ 888 | "This is also a form of list comprehension." 889 | ] 890 | }, 891 | { 892 | "cell_type": "markdown", 893 | "metadata": {}, 894 | "source": [ 895 | "### Functions" 896 | ] 897 | }, 898 | { 899 | "cell_type": "markdown", 900 | "metadata": {}, 901 | "source": [ 902 | "Code is typically organized into functions. A **function** encapsulates part of your code. Functions allow you to reuse bits of functionality without copy-pasting the code. Here is a function that tells whether an integer number is even or not:" 903 | ] 904 | }, 905 | { 906 | "cell_type": "code", 907 | "execution_count": 23, 908 | "metadata": { 909 | "collapsed": true 910 | }, 911 | "outputs": [], 912 | "source": [ 913 | "def is_even(number):\n", 914 | " \"\"\"Return whether an integer is even or not.\"\"\"\n", 915 | " return number % 2 == 0" 916 | ] 917 | }, 918 | { 919 | "cell_type": "markdown", 920 | "metadata": {}, 921 | "source": [ 922 | "There are several things to note here:" 923 | ] 924 | }, 925 | { 926 | "cell_type": "markdown", 927 | "metadata": {}, 928 | "source": [ 929 | "* A function is defined with the `def` keyword.\n", 930 | "* After `def` comes the function name. A general convention in Python is to only use lowercase characters, and separate words with an underscore `_`. A function name generally starts with a verb.\n", 931 | "* The function name is followed by parentheses, with one or several variable names called the **arguments**. These are the **inputs** of the function. There is a single argument here, named `number`.\n", 932 | "* No type is specified for the argument. This is because Python is **dynamically typed**; you could pass a variable of any type. This function would work fine with floating point numbers, for example (the modulo operation works with floating point numbers in addition to integers).\n", 933 | "* The body of the function is indented (and note the colon `:` at the end of the `def` statement).\n", 934 | "* There is a **docstring** wrapped by triple quotes `\"\"\"`. This is a particular form of comment that explains what the function does. It is not mandatory, but it is strongly recommended to write docstrings for the functions exposed to the user.\n", 935 | "* The `return` keyword in the body of the function specifies the **output** of the function. Here, the output is a Boolean, obtained from the expression `number % 2 == 0`. It is possible to return several values; just use a comma to separate them (in this case, a tuple of Booleans would be returned)." 936 | ] 937 | }, 938 | { 939 | "cell_type": "markdown", 940 | "metadata": {}, 941 | "source": [ 942 | "Once a function is defined, it can be called like this:" 943 | ] 944 | }, 945 | { 946 | "cell_type": "code", 947 | "execution_count": 24, 948 | "metadata": { 949 | "collapsed": false 950 | }, 951 | "outputs": [ 952 | { 953 | "data": { 954 | "text/plain": [ 955 | "False" 956 | ] 957 | }, 958 | "execution_count": 24, 959 | "metadata": {}, 960 | "output_type": "execute_result" 961 | } 962 | ], 963 | "source": [ 964 | "is_even(3)" 965 | ] 966 | }, 967 | { 968 | "cell_type": "code", 969 | "execution_count": 25, 970 | "metadata": { 971 | "collapsed": false 972 | }, 973 | "outputs": [ 974 | { 975 | "data": { 976 | "text/plain": [ 977 | "True" 978 | ] 979 | }, 980 | "execution_count": 25, 981 | "metadata": {}, 982 | "output_type": "execute_result" 983 | } 984 | ], 985 | "source": [ 986 | "is_even(4)" 987 | ] 988 | }, 989 | { 990 | "cell_type": "markdown", 991 | "metadata": {}, 992 | "source": [ 993 | "Here, 3 and 4 are successively passed as arguments to the function." 994 | ] 995 | }, 996 | { 997 | "cell_type": "markdown", 998 | "metadata": {}, 999 | "source": [ 1000 | "### Positional and keyword arguments" 1001 | ] 1002 | }, 1003 | { 1004 | "cell_type": "markdown", 1005 | "metadata": {}, 1006 | "source": [ 1007 | "A Python function can accept an arbitrary number of arguments, called **positional arguments**. It can also accept optional named arguments, called **keyword arguments**. Here is an example:" 1008 | ] 1009 | }, 1010 | { 1011 | "cell_type": "code", 1012 | "execution_count": 26, 1013 | "metadata": { 1014 | "collapsed": true 1015 | }, 1016 | "outputs": [], 1017 | "source": [ 1018 | "def remainder(number, divisor=2):\n", 1019 | " return number % divisor" 1020 | ] 1021 | }, 1022 | { 1023 | "cell_type": "markdown", 1024 | "metadata": {}, 1025 | "source": [ 1026 | "The second argument of this function, `divisor`, is optional. If it is not provided by the caller, it will default to the number 2, as show here:" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 27, 1032 | "metadata": { 1033 | "collapsed": false 1034 | }, 1035 | "outputs": [ 1036 | { 1037 | "data": { 1038 | "text/plain": [ 1039 | "1" 1040 | ] 1041 | }, 1042 | "execution_count": 27, 1043 | "metadata": {}, 1044 | "output_type": "execute_result" 1045 | } 1046 | ], 1047 | "source": [ 1048 | "remainder(5)" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "markdown", 1053 | "metadata": {}, 1054 | "source": [ 1055 | "There are two equivalent ways of specifying a keyword argument when calling a function:" 1056 | ] 1057 | }, 1058 | { 1059 | "cell_type": "code", 1060 | "execution_count": 28, 1061 | "metadata": { 1062 | "collapsed": false 1063 | }, 1064 | "outputs": [ 1065 | { 1066 | "data": { 1067 | "text/plain": [ 1068 | "2" 1069 | ] 1070 | }, 1071 | "execution_count": 28, 1072 | "metadata": {}, 1073 | "output_type": "execute_result" 1074 | } 1075 | ], 1076 | "source": [ 1077 | "remainder(5, 3)" 1078 | ] 1079 | }, 1080 | { 1081 | "cell_type": "code", 1082 | "execution_count": 29, 1083 | "metadata": { 1084 | "collapsed": false 1085 | }, 1086 | "outputs": [ 1087 | { 1088 | "data": { 1089 | "text/plain": [ 1090 | "2" 1091 | ] 1092 | }, 1093 | "execution_count": 29, 1094 | "metadata": {}, 1095 | "output_type": "execute_result" 1096 | } 1097 | ], 1098 | "source": [ 1099 | "remainder(5, divisor=3)" 1100 | ] 1101 | }, 1102 | { 1103 | "cell_type": "markdown", 1104 | "metadata": {}, 1105 | "source": [ 1106 | "In the first case, `3` is understood as the second argument, `divisor`. In the second case, the name of the argument is given explicitly by the caller. This second syntax is clearer and less error-prone than the first one." 1107 | ] 1108 | }, 1109 | { 1110 | "cell_type": "markdown", 1111 | "metadata": {}, 1112 | "source": [ 1113 | "Functions can also accept arbitrary sets of positional and keyword arguments, using the following syntax:" 1114 | ] 1115 | }, 1116 | { 1117 | "cell_type": "code", 1118 | "execution_count": 30, 1119 | "metadata": { 1120 | "collapsed": true 1121 | }, 1122 | "outputs": [], 1123 | "source": [ 1124 | "def f(*args, **kwargs):\n", 1125 | " print(\"Positional arguments:\", args)\n", 1126 | " print(\"Keyword arguments:\", kwargs)" 1127 | ] 1128 | }, 1129 | { 1130 | "cell_type": "code", 1131 | "execution_count": 31, 1132 | "metadata": { 1133 | "collapsed": false 1134 | }, 1135 | "outputs": [ 1136 | { 1137 | "data": { 1138 | "text/plain": [ 1139 | "Positional arguments: (1, 2)\n", 1140 | "Keyword arguments: {'c': 3, 'd': 4}" 1141 | ] 1142 | }, 1143 | "execution_count": 31, 1144 | "metadata": {}, 1145 | "output_type": "execute_result" 1146 | } 1147 | ], 1148 | "source": [ 1149 | "f(1, 2, c=3, d=4)" 1150 | ] 1151 | }, 1152 | { 1153 | "cell_type": "markdown", 1154 | "metadata": {}, 1155 | "source": [ 1156 | "Inside the function, `args` is a tuple containing positional arguments, and `kwargs` is a dictionary containing keyword arguments." 1157 | ] 1158 | }, 1159 | { 1160 | "cell_type": "markdown", 1161 | "metadata": {}, 1162 | "source": [ 1163 | "### Passage by assignment" 1164 | ] 1165 | }, 1166 | { 1167 | "cell_type": "markdown", 1168 | "metadata": {}, 1169 | "source": [ 1170 | "When passing a parameter to a Python function, a *reference* to the object is actually passed (**passage by assignment**):" 1171 | ] 1172 | }, 1173 | { 1174 | "cell_type": "markdown", 1175 | "metadata": {}, 1176 | "source": [ 1177 | "* If the passed object is mutable, it can be modified by the function.\n", 1178 | "* If the passed object is immutable, it cannot be modified by the function." 1179 | ] 1180 | }, 1181 | { 1182 | "cell_type": "markdown", 1183 | "metadata": {}, 1184 | "source": [ 1185 | "Here is an example:" 1186 | ] 1187 | }, 1188 | { 1189 | "cell_type": "code", 1190 | "execution_count": 32, 1191 | "metadata": { 1192 | "collapsed": false 1193 | }, 1194 | "outputs": [ 1195 | { 1196 | "data": { 1197 | "text/plain": [ 1198 | "[1, 2, 3]" 1199 | ] 1200 | }, 1201 | "execution_count": 32, 1202 | "metadata": {}, 1203 | "output_type": "execute_result" 1204 | } 1205 | ], 1206 | "source": [ 1207 | "my_list = [1, 2]\n", 1208 | "\n", 1209 | "def add(some_list, value):\n", 1210 | " some_list.append(value)\n", 1211 | "\n", 1212 | "add(my_list, 3)\n", 1213 | "my_list" 1214 | ] 1215 | }, 1216 | { 1217 | "cell_type": "markdown", 1218 | "metadata": {}, 1219 | "source": [ 1220 | "The function `add()` modifies an object defined outside it (in this case, the object `my_list`); we say this function has **side-effects**. A function with no side-effects is called a **pure function**: it doesn't modify anything in the outer context, and it deterministically returns the same result for any given set of inputs. Pure functions are to be preferred over functions with side-effects." 1221 | ] 1222 | }, 1223 | { 1224 | "cell_type": "markdown", 1225 | "metadata": {}, 1226 | "source": [ 1227 | "Knowing this can help you spot out subtle bugs. There are further related concepts that are useful to know, including function scopes, naming, binding, and more. Here are a couple of links:" 1228 | ] 1229 | }, 1230 | { 1231 | "cell_type": "markdown", 1232 | "metadata": {}, 1233 | "source": [ 1234 | "* Passage by reference at https://docs.python.org/3/faq/programming.html#how-do-i-write-a-function-with-output-parameters-call-by-reference\n", 1235 | "* Naming, binding, and scope at https://docs.python.org/3.4/reference/executionmodel.html" 1236 | ] 1237 | }, 1238 | { 1239 | "cell_type": "markdown", 1240 | "metadata": {}, 1241 | "source": [ 1242 | "### Errors" 1243 | ] 1244 | }, 1245 | { 1246 | "cell_type": "markdown", 1247 | "metadata": {}, 1248 | "source": [ 1249 | "Let's discuss about errors in Python. As you learn, you will inevitably come across errors and exceptions. The Python interpreter will most of the time tell you what the problem is, and where it occurred. It is important to understand the vocabulary used by Python so that you can more quickly find and correct your errors." 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "markdown", 1254 | "metadata": {}, 1255 | "source": [ 1256 | "Let's see an example:" 1257 | ] 1258 | }, 1259 | { 1260 | "cell_type": "code", 1261 | "execution_count": 33, 1262 | "metadata": { 1263 | "collapsed": true 1264 | }, 1265 | "outputs": [], 1266 | "source": [ 1267 | "def divide(a, b):\n", 1268 | " return a / b" 1269 | ] 1270 | }, 1271 | { 1272 | "cell_type": "code", 1273 | "execution_count": 34, 1274 | "metadata": { 1275 | "collapsed": false 1276 | }, 1277 | "outputs": [ 1278 | { 1279 | "data": { 1280 | "text/plain": [ 1281 | "---------------------------------------------------------\n", 1282 | "ZeroDivisionError Traceback (most recent call last)\n", 1283 | " in ()\n", 1284 | "----> 1 divide(1, 0)\n", 1285 | "\n", 1286 | " in divide(a, b)\n", 1287 | " 1 def divide(a, b):\n", 1288 | "----> 2 return a / b\n", 1289 | "\n", 1290 | "ZeroDivisionError: division by zero" 1291 | ] 1292 | }, 1293 | "execution_count": 34, 1294 | "metadata": {}, 1295 | "output_type": "execute_result" 1296 | } 1297 | ], 1298 | "source": [ 1299 | "divide(1, 0)" 1300 | ] 1301 | }, 1302 | { 1303 | "cell_type": "markdown", 1304 | "metadata": {}, 1305 | "source": [ 1306 | "Here, we defined a `divide()` function, and called it to divide 1 by 0. Dividing a number by 0 is an error in Python. Here, a `ZeroDivisionError` **exception** was raised. An exception is a particular type of error that can be raised at any point in a program. It is propagated from the innards of the code up to the command that launched the code. It can be caught and processed at any point. You will find more details about exceptions at https://docs.python.org/3/tutorial/errors.html, and common exception types at https://docs.python.org/3/library/exceptions.html#bltin-exceptions." 1307 | ] 1308 | }, 1309 | { 1310 | "cell_type": "markdown", 1311 | "metadata": {}, 1312 | "source": [ 1313 | "The error message you see contains the **stack trace** and the exception's type and message. The stack trace shows all functions calls between the raised exception and the script calling point." 1314 | ] 1315 | }, 1316 | { 1317 | "cell_type": "markdown", 1318 | "metadata": {}, 1319 | "source": [ 1320 | "The top frame, indicated by the first arrow `---->`, shows the entry point of the code execution. Here, it is `divide(1, 0)` which was called directly in the Notebook. The error occurred while this function was called." 1321 | ] 1322 | }, 1323 | { 1324 | "cell_type": "markdown", 1325 | "metadata": {}, 1326 | "source": [ 1327 | "The next and last frame is indicated by the second arrow. It corresponds to line 2 in our function `divide(a, b)`. It is the last frame in the stack trace: this means that the error occurred there." 1328 | ] 1329 | }, 1330 | { 1331 | "cell_type": "markdown", 1332 | "metadata": {}, 1333 | "source": [ 1334 | "We will see later in this chapter how to **debug** such errors interactively in IPython and in the Jupyter Notebook. Knowing how to navigate up and down in the stack trace is critical when debugging complex Python code." 1335 | ] 1336 | }, 1337 | { 1338 | "cell_type": "markdown", 1339 | "metadata": {}, 1340 | "source": [ 1341 | "\n", 1342 | "\n", 1343 | "### Object-oriented programming" 1344 | ] 1345 | }, 1346 | { 1347 | "cell_type": "markdown", 1348 | "metadata": {}, 1349 | "source": [ 1350 | "**Object-oriented programming** (or OOP) is a relatively advanced topic. Although we won't use it much in this book, it is useful to know the basics. Also, mastering OOP is often essential when you start to have a large code base." 1351 | ] 1352 | }, 1353 | { 1354 | "cell_type": "markdown", 1355 | "metadata": {}, 1356 | "source": [ 1357 | "In Python, everything is an **object**. A number, a string, a function is an object. An object is an instance of a **type** (also known as *class*). An object has **attributes** and **methods**, as specified by its type. An attribute is a variable bound to an object, giving some information about it. A method is a function that applies to the object." 1358 | ] 1359 | }, 1360 | { 1361 | "cell_type": "markdown", 1362 | "metadata": {}, 1363 | "source": [ 1364 | "For example, the object `'hello'` is an instance of the built-in `str` type (string). The `type()` function returns the type of an object, as shown here:" 1365 | ] 1366 | }, 1367 | { 1368 | "cell_type": "code", 1369 | "execution_count": 35, 1370 | "metadata": { 1371 | "collapsed": false 1372 | }, 1373 | "outputs": [ 1374 | { 1375 | "data": { 1376 | "text/plain": [ 1377 | "str" 1378 | ] 1379 | }, 1380 | "execution_count": 35, 1381 | "metadata": {}, 1382 | "output_type": "execute_result" 1383 | } 1384 | ], 1385 | "source": [ 1386 | "type('hello')" 1387 | ] 1388 | }, 1389 | { 1390 | "cell_type": "markdown", 1391 | "metadata": {}, 1392 | "source": [ 1393 | "There are native types, like `str` or `int` (integer), and custom types, also called classes, that can be created by the user." 1394 | ] 1395 | }, 1396 | { 1397 | "cell_type": "markdown", 1398 | "metadata": {}, 1399 | "source": [ 1400 | "In IPython, you can discover the attributes and methods of any object with the dot syntax and tab completion. For example, typing `'hello'.u` and pressing *Tab* automatically shows us the existence of the `upper()` method:" 1401 | ] 1402 | }, 1403 | { 1404 | "cell_type": "code", 1405 | "execution_count": 36, 1406 | "metadata": { 1407 | "collapsed": false 1408 | }, 1409 | "outputs": [ 1410 | { 1411 | "data": { 1412 | "text/plain": [ 1413 | "'HELLO'" 1414 | ] 1415 | }, 1416 | "execution_count": 36, 1417 | "metadata": {}, 1418 | "output_type": "execute_result" 1419 | } 1420 | ], 1421 | "source": [ 1422 | "'hello'.upper()" 1423 | ] 1424 | }, 1425 | { 1426 | "cell_type": "markdown", 1427 | "metadata": {}, 1428 | "source": [ 1429 | "Here, `upper()` is a method available to all `str` objects; it returns an uppercase copy of a string." 1430 | ] 1431 | }, 1432 | { 1433 | "cell_type": "markdown", 1434 | "metadata": {}, 1435 | "source": [ 1436 | "A useful string method is `format()`. This simple and convenient templating system lets you generate strings dynamically:" 1437 | ] 1438 | }, 1439 | { 1440 | "cell_type": "code", 1441 | "execution_count": 37, 1442 | "metadata": { 1443 | "collapsed": false 1444 | }, 1445 | "outputs": [ 1446 | { 1447 | "data": { 1448 | "text/plain": [ 1449 | "Hello Python!" 1450 | ] 1451 | }, 1452 | "execution_count": 37, 1453 | "metadata": {}, 1454 | "output_type": "execute_result" 1455 | } 1456 | ], 1457 | "source": [ 1458 | "'Hello {0:s}!'.format('Python')" 1459 | ] 1460 | }, 1461 | { 1462 | "cell_type": "markdown", 1463 | "metadata": {}, 1464 | "source": [ 1465 | "The `{0:s}` syntax means \"replace this with the first argument of `format()` which should be a string\". The variable type after the colon is especially useful for numbers, where you can specify how to display the number (for example, `.3f` to display three decimals). The `0` makes it possible to replace a given value several times in a given string. You can also use a name instead of a position, for example `'Hello {name}!'.format(name='Python')`." 1466 | ] 1467 | }, 1468 | { 1469 | "cell_type": "markdown", 1470 | "metadata": {}, 1471 | "source": [ 1472 | "Some methods are prefixed with an underscore `_`; they are private and are generally not meant to be used directly. IPython's tab completion won't show you these private attributes and methods unless you explicitly type `_` before pressing *Tab*." 1473 | ] 1474 | }, 1475 | { 1476 | "cell_type": "markdown", 1477 | "metadata": {}, 1478 | "source": [ 1479 | "In practice, the most important thing to remember is that appending a dot `.` to any Python object and pressing *Tab* in IPython will show you a lot of functionality pertaining to that object." 1480 | ] 1481 | }, 1482 | { 1483 | "cell_type": "markdown", 1484 | "metadata": {}, 1485 | "source": [ 1486 | "### Functional programming" 1487 | ] 1488 | }, 1489 | { 1490 | "cell_type": "markdown", 1491 | "metadata": {}, 1492 | "source": [ 1493 | "Python is a multi-paradigm language; it notably supports imperative, object-oriented, and functional programming models. Python functions are objects and can be handled like other objects. In particular, they can be passed as arguments to other functions (also called **higher-order functions**). This the essence of **functional programming**." 1494 | ] 1495 | }, 1496 | { 1497 | "cell_type": "markdown", 1498 | "metadata": {}, 1499 | "source": [ 1500 | "**Decorators** provide a convenient syntax construct to define higher-order functions. Here is an example using the `is_even()` function from the previous **Functions** section:" 1501 | ] 1502 | }, 1503 | { 1504 | "cell_type": "code", 1505 | "execution_count": 38, 1506 | "metadata": { 1507 | "collapsed": true 1508 | }, 1509 | "outputs": [], 1510 | "source": [ 1511 | "def show_output(func):\n", 1512 | " def wrapped(*args, **kwargs):\n", 1513 | " output = func(*args, **kwargs)\n", 1514 | " print(\"The result is:\", output)\n", 1515 | " return wrapped" 1516 | ] 1517 | }, 1518 | { 1519 | "cell_type": "markdown", 1520 | "metadata": {}, 1521 | "source": [ 1522 | "The `show_output()` function transforms an arbitrary function `func()` to a new function, named `wrapped()`, that displays the result of the function:" 1523 | ] 1524 | }, 1525 | { 1526 | "cell_type": "code", 1527 | "execution_count": 39, 1528 | "metadata": { 1529 | "collapsed": false 1530 | }, 1531 | "outputs": [ 1532 | { 1533 | "data": { 1534 | "text/plain": [ 1535 | "The result is: False" 1536 | ] 1537 | }, 1538 | "execution_count": 39, 1539 | "metadata": {}, 1540 | "output_type": "execute_result" 1541 | } 1542 | ], 1543 | "source": [ 1544 | "f = show_output(is_even)\n", 1545 | "f(3)" 1546 | ] 1547 | }, 1548 | { 1549 | "cell_type": "markdown", 1550 | "metadata": {}, 1551 | "source": [ 1552 | "Equivalently, this higher-order function can also be used with a decorator:" 1553 | ] 1554 | }, 1555 | { 1556 | "cell_type": "code", 1557 | "execution_count": 40, 1558 | "metadata": { 1559 | "collapsed": true 1560 | }, 1561 | "outputs": [], 1562 | "source": [ 1563 | "@show_output\n", 1564 | "def square(x):\n", 1565 | " return x * x" 1566 | ] 1567 | }, 1568 | { 1569 | "cell_type": "code", 1570 | "execution_count": 41, 1571 | "metadata": { 1572 | "collapsed": false 1573 | }, 1574 | "outputs": [ 1575 | { 1576 | "data": { 1577 | "text/plain": [ 1578 | "The result is: 9" 1579 | ] 1580 | }, 1581 | "execution_count": 41, 1582 | "metadata": {}, 1583 | "output_type": "execute_result" 1584 | } 1585 | ], 1586 | "source": [ 1587 | "square(3)" 1588 | ] 1589 | }, 1590 | { 1591 | "cell_type": "markdown", 1592 | "metadata": {}, 1593 | "source": [ 1594 | "You can find more information about Python decorators at https://en.wikipedia.org/wiki/Python_syntax_and_semantics#Decorators and at http://thecodeship.com/patterns/guide-to-python-function-decorators/." 1595 | ] 1596 | }, 1597 | { 1598 | "cell_type": "markdown", 1599 | "metadata": {}, 1600 | "source": [ 1601 | "### Python 2 and 3" 1602 | ] 1603 | }, 1604 | { 1605 | "cell_type": "markdown", 1606 | "metadata": {}, 1607 | "source": [ 1608 | "Let's finish this section with a few notes about Python 2 and Python 3 compatibility issues." 1609 | ] 1610 | }, 1611 | { 1612 | "cell_type": "markdown", 1613 | "metadata": {}, 1614 | "source": [ 1615 | "There are still some Python 2 code and libraries that are not compatible with Python 3. Therefore, it is sometimes useful to be aware of the differences between the two versions. One of the most obvious differences is that `print` is a statement in Python 2, whereas it is a function in Python 3. Therefore, `print \"Hello\"` (without parentheses) works in Python 2 but not in Python 3, while `print(\"Hello\")` works in both Python 2 and Python 3." 1616 | ] 1617 | }, 1618 | { 1619 | "cell_type": "markdown", 1620 | "metadata": {}, 1621 | "source": [ 1622 | "There are several non-mutually exclusive options to write portable code that works with both versions:" 1623 | ] 1624 | }, 1625 | { 1626 | "cell_type": "markdown", 1627 | "metadata": {}, 1628 | "source": [ 1629 | "* **futures**: a builtin module supporting backward-incompatible Python syntax\n", 1630 | "* **2to3**: a builtin Python module to port Python 2 code to Python 3\n", 1631 | "* **six**: an external lightweight library for writing compatible code" 1632 | ] 1633 | }, 1634 | { 1635 | "cell_type": "markdown", 1636 | "metadata": {}, 1637 | "source": [ 1638 | "Here are a few references:" 1639 | ] 1640 | }, 1641 | { 1642 | "cell_type": "markdown", 1643 | "metadata": {}, 1644 | "source": [ 1645 | "* Official Python 2/3 wiki page at https://wiki.python.org/moin/Python2orPython3\n", 1646 | "* *Porting to Python 3* book at http://python3porting.com/bookindex.html\n", 1647 | "* 2to3 at https://docs.python.org/3.4/library/2to3.html\n", 1648 | "* six at https://pythonhosted.org/six/\n", 1649 | "* futures at https://docs.python.org/3.4/library/__future__.html\n", 1650 | "* The IPython Cookbook contains an in-depth recipe about choosing between Python 2 and 3, and how to support both." 1651 | ] 1652 | }, 1653 | { 1654 | "cell_type": "markdown", 1655 | "metadata": {}, 1656 | "source": [ 1657 | "\n", 1658 | "### Going beyond the basics" 1659 | ] 1660 | }, 1661 | { 1662 | "cell_type": "markdown", 1663 | "metadata": {}, 1664 | "source": [ 1665 | "You now know the fundamentals of Python, the bare minimum that you will need in this book. As you can imagine, there is much more to say about Python." 1666 | ] 1667 | }, 1668 | { 1669 | "cell_type": "markdown", 1670 | "metadata": {}, 1671 | "source": [ 1672 | "There are a few further basic concepts that are often useful and that we cannot cover here, unfortunately. You are highly encouraged to have a look at them in the references given at the end of this section:" 1673 | ] 1674 | }, 1675 | { 1676 | "cell_type": "markdown", 1677 | "metadata": {}, 1678 | "source": [ 1679 | "* `range` and `enumerate`\n", 1680 | "* `pass`, `break`, and, `continue`, to be used in loops\n", 1681 | "* working with files\n", 1682 | "* creating and importing modules\n", 1683 | "* the Python standard library provides a wide range of functionality (OS, network, file systems, compression, mathematics, and more)" 1684 | ] 1685 | }, 1686 | { 1687 | "cell_type": "markdown", 1688 | "metadata": {}, 1689 | "source": [ 1690 | "Here are some slightly more advanced concepts that you might find useful if you want to strengthen your Python skills:" 1691 | ] 1692 | }, 1693 | { 1694 | "cell_type": "markdown", 1695 | "metadata": {}, 1696 | "source": [ 1697 | "* regular expressions for advanced string processing\n", 1698 | "* lambda functions for defining small anonymous functions\n", 1699 | "* generators for controlling custom loops\n", 1700 | "* exceptions for handling errors\n", 1701 | "* `with` statements for safely handling contexts\n", 1702 | "* advanced object-oriented programming\n", 1703 | "* metaprogramming for modifying Python code dynamically\n", 1704 | "* the `pickle` module for persisting Python objects on disk and exchanging them across a network" 1705 | ] 1706 | }, 1707 | { 1708 | "cell_type": "markdown", 1709 | "metadata": {}, 1710 | "source": [ 1711 | "Finally, here are a few references:" 1712 | ] 1713 | }, 1714 | { 1715 | "cell_type": "markdown", 1716 | "metadata": {}, 1717 | "source": [ 1718 | "* Getting started with Python: https://www.python.org/about/gettingstarted/\n", 1719 | "* A Python tutorial: https://docs.python.org/3/tutorial/index.html\n", 1720 | "* The Python Standard Library: https://docs.python.org/3/library/index.html\n", 1721 | "* Interactive tutorial: http://www.learnpython.org/\n", 1722 | "* Codecademy Python course: http://www.codecademy.com/tracks/python\n", 1723 | "* Language reference (expert level): https://docs.python.org/3/reference/index.html\n", 1724 | "* Python Cookbook, by David Beazley and Brian K. Jones, O'Reilly Media (advanced level, highly recommended if you want to become a Python expert)." 1725 | ] 1726 | } 1727 | ], 1728 | "metadata": { 1729 | "kernelspec": { 1730 | "display_name": "Python 3", 1731 | "language": "python", 1732 | "name": "python3" 1733 | }, 1734 | "language_info": { 1735 | "codemirror_mode": { 1736 | "name": "ipython", 1737 | "version": 3 1738 | }, 1739 | "file_extension": ".py", 1740 | "mimetype": "text/x-python", 1741 | "name": "python", 1742 | "nbconvert_exporter": "python", 1743 | "pygments_lexer": "ipython3", 1744 | "version": "3.4.3" 1745 | } 1746 | }, 1747 | "nbformat": 4, 1748 | "nbformat_minor": 0 1749 | } 1750 | -------------------------------------------------------------------------------- /chapter1/15-ten.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Ten Jupyter/IPython essentials" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Using IPython as an extended shell" 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": "'/home/cyrille/minibook/chapter1'" 21 | }, 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "output_type": "execute_result" 25 | } 26 | ], 27 | "source": "%pwd" 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": "!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip" 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": "facebook.zip [...]" 44 | }, 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": "%ls" 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": "!unzip facebook.zip" 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": "facebook facebook.zip [...]" 67 | }, 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": "%ls" 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": "/home/cyrille/minibook/chapter1/facebook" 83 | }, 84 | "execution_count": 6, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": "%cd facebook" 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 7, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": "%bookmark fbdata" 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 8, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": "0.circles 1684.circles 3437.circles 3980.circles 686.circles\n0.edges 1684.edges 3437.edges 3980.edges 686.edges\n107.circles 1912.circles 348.circles 414.circles 698.circles\n107.edges 1912.edges 348.edges 414.edges 698.edges" 106 | }, 107 | "execution_count": 8, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": "%ls" 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 9, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": "files = !ls -1 -S | grep .edges" 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 10, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": "['1912.edges',\n '107.edges',\n '1684.edges',\n '3437.edges',\n '348.edges',\n '0.edges',\n '414.edges',\n '686.edges',\n '698.edges',\n '3980.edges']" 129 | }, 130 | "execution_count": 10, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": "files" 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 11, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": "import os\nfrom operator import itemgetter\n# Get the name and file size of all .edges files.\nfiles = [(file, os.stat(file).st_size)\n for file in os.listdir('.')\n if file.endswith('.edges')]\n# Sort the list with the second item (file size),\n# in decreasing order.\nfiles = sorted(files,\n key=itemgetter(1),\n reverse=True)\n# Only keep the first item (file name), in the same order.\nfiles = [file for (file, size) in files]" 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 12, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": "2290 2363\n2346 2025\n2140 2428\n2201 2506\n2425 2557" 152 | }, 153 | "execution_count": 12, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": "!head -n5 {files[0]}" 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": "### Learning magic commands" 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 13, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": "Available line magics:\n%alias %alias_magic %autocall %automagic %autosave %bookmark %cat %cd %clear %colors %config %connect_info %cp %debug %dhist %dirs %doctest_mode %ed %edit %env %gui %hist %history %install_default_config %install_ext %install_profiles %killbgscripts %ldir %less %lf %lk %ll %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %lx %macro %magic %man %matplotlib %mkdir %more %mv %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %popd %pprint %precision %profile %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %rep %rerun %reset %reset_selective %rm %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode\n\nAvailable cell magics:\n%%! %%HTML %%SVG %%bash %%capture %%debug %%file %%html %%javascript %%latex %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile\n\nAutomagic is ON, % prefix IS NOT needed for line magics." 173 | }, 174 | "execution_count": 13, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": "%lsmagic" 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 14, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": "%history?" 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 15, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": "files = !ls -1 -S | grep .edges\nfiles\n!head -n5 {files[0]}\n%lsmagic\n%history?" 196 | }, 197 | "execution_count": 15, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": "%history -l 5" 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 16, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": "1440" 212 | }, 213 | "execution_count": 16, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": "# how many minutes in a day?\n24 * 60" 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 17, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": "525600" 228 | }, 229 | "execution_count": 17, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": "# and in a year?\n_ * 365" 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 18, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": "%%capture output\n%ls" 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 19, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": "0.circles 1684.circles 3437.circles 3980.circles 686.circles\n0.edges 1684.edges 3437.edges 3980.edges 686.edges\n107.circles 1912.circles 348.circles 414.circles 698.circles\n107.edges 1912.edges 348.edges 414.edges 698.edges" 251 | }, 252 | "execution_count": 19, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": "output.stdout" 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 20, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": "_HEY\nfacebook\nfacebook.zip\n[...]" 267 | }, 268 | "execution_count": 20, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": "%%bash\ncd ..\ntouch _HEY\nls\nrm _HEY\ncd facebook" 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 21, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": "GHCi, version 7.6.3: http://www.haskell.org/ghc/ :? for help\nLoading package ghc-prim ... linking ... done.\nLoading package integer-gmp ... linking ... done.\nLoading package base ... linking ... done.\nPrelude> Hello world!\nPrelude> Leaving GHCi." 283 | }, 284 | "execution_count": 21, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": "%%script ghci\nputStrLn \"Hello world!\"" 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 22, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": "Writing myfile.txt" 299 | }, 300 | "execution_count": 22, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": "%%writefile myfile.txt\nHello world!" 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 23, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": "Hello world!" 315 | }, 316 | "execution_count": 23, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": "!more myfile.txt" 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 24, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": "!rm myfile.txt" 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": "### Mastering tab completion" 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 25, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": "(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook\n/home/cyrille/minibook/chapter1/facebook\n0.circles 1684.circles 3437.circles 3980.circles 686.circles\n0.edges 1684.edges 3437.edges 3980.edges 686.edges\n107.circles 1912.circles 348.circles 414.circles 698.circles\n107.edges 1912.edges 348.edges 414.edges 698.edges" 343 | }, 344 | "execution_count": 25, 345 | "metadata": {}, 346 | "output_type": "execute_result" 347 | } 348 | ], 349 | "source": "%cd fbdata\n%ls" 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": "### Writing interactive documents in the Notebook with Markdown" 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": "### Creating interactive widgets in the Notebook" 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 26, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": "from IPython.display import YouTubeVideo\nYouTubeVideo('j9YpkSX7NNM')" 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 27, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": "'The square of 7 is 49.'" 376 | }, 377 | "execution_count": 27, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": "from ipywidgets import interact # IPython.html.widgets before IPython 4.0\n@interact(x=(0, 10))\ndef square(x):\n return(\"The square of %d is %d.\" % (x, x**2))" 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": "### Running Python scripts from IPython" 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 28, 392 | "metadata": {}, 393 | "outputs": [ 394 | { 395 | "data": { 396 | "text/plain": "(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook\n/home/cyrille/minibook/chapter1/facebook" 397 | }, 398 | "execution_count": 28, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | } 402 | ], 403 | "source": "%cd fbdata\n%cd .." 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 29, 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "data": { 412 | "text/plain": "Overwriting egos.py" 413 | }, 414 | "execution_count": 29, 415 | "metadata": {}, 416 | "output_type": "execute_result" 417 | } 418 | ], 419 | "source": "%%writefile egos.py\nimport sys\nimport os\n# We retrieve the folder as the first positional argument\n# to the command-line call\nif len(sys.argv) > 1:\n folder = sys.argv[1]\n# We list all files in the specified folder\nfiles = os.listdir(folder)\n# ids contains the list of idenfitiers\nidentifiers = [int(file.split('.')[0]) for file in files]\n# Finally, we remove duplicates with set(), and sort the list\n# with sorted().\nids = sorted(set(identifiers))" 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 30, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": "%run egos.py facebook" 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 31, 431 | "metadata": {}, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": "[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]" 436 | }, 437 | "execution_count": 31, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": "ids" 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 32, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": "folder = 'facebook'" 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 33, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": "%run egos.py" 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 34, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": "%run -i egos.py" 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 35, 468 | "metadata": {}, 469 | "outputs": [ 470 | { 471 | "data": { 472 | "text/plain": "[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]" 473 | }, 474 | "execution_count": 35, 475 | "metadata": {}, 476 | "output_type": "execute_result" 477 | } 478 | ], 479 | "source": "ids" 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": "### Introspecting Python objects" 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 36, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": "import networkx" 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 37, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": "networkx.Graph?" 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": {}, 503 | "source": "### Debugging Python code" 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": "### Benchmarking Python code" 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 38, 513 | "metadata": {}, 514 | "outputs": [ 515 | { 516 | "data": { 517 | "text/plain": "(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook\n/home/cyrille/minibook/chapter1/facebook" 518 | }, 519 | "execution_count": 38, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": "%cd fbdata" 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 39, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": "import networkx" 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 40, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": "graph = networkx.read_edgelist('107.edges')" 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": 41, 543 | "metadata": {}, 544 | "outputs": [ 545 | { 546 | "data": { 547 | "text/plain": "(1034, 26749)" 548 | }, 549 | "execution_count": 41, 550 | "metadata": {}, 551 | "output_type": "execute_result" 552 | } 553 | ], 554 | "source": "len(graph.nodes()), len(graph.edges())" 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 42, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "data": { 563 | "text/plain": "True" 564 | }, 565 | "execution_count": 42, 566 | "metadata": {}, 567 | "output_type": "execute_result" 568 | } 569 | ], 570 | "source": "networkx.is_connected(graph)" 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 43, 575 | "metadata": {}, 576 | "outputs": [ 577 | { 578 | "data": { 579 | "text/plain": "100 loops, best of 3: 5.92 ms per loop" 580 | }, 581 | "execution_count": 43, 582 | "metadata": {}, 583 | "output_type": "execute_result" 584 | } 585 | ], 586 | "source": "%timeit networkx.is_connected(graph)" 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": "### Profiling Python code" 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 44, 596 | "metadata": {}, 597 | "outputs": [], 598 | "source": "import networkx" 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": 45, 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": "def ncomponents(file):\n graph = networkx.read_edgelist(file)\n return networkx.number_connected_components(graph)" 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 46, 610 | "metadata": {}, 611 | "outputs": [], 612 | "source": "import glob\ndef ncomponents_files():\n return [(file, ncomponents(file))\n for file in sorted(glob.glob('*.edges'))]" 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 47, 617 | "metadata": {}, 618 | "outputs": [ 619 | { 620 | "data": { 621 | "text/plain": "0.edges 5 component(s)\n107.edges 1 component(s)\n1684.edges 4 component(s)\n1912.edges 2 component(s)\n3437.edges 2 component(s)\n348.edges 1 component(s)\n3980.edges 4 component(s)\n414.edges 2 component(s)\n686.edges 1 component(s)\n698.edges 3 component(s)" 622 | }, 623 | "execution_count": 47, 624 | "metadata": {}, 625 | "output_type": "execute_result" 626 | } 627 | ], 628 | "source": "for file, n in ncomponents_files():\n print(file.ljust(12), n, 'component(s)')" 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 48, 633 | "metadata": {}, 634 | "outputs": [ 635 | { 636 | "data": { 637 | "text/plain": "1 loops, best of 3: 634 ms per loop" 638 | }, 639 | "execution_count": 48, 640 | "metadata": {}, 641 | "output_type": "execute_result" 642 | } 643 | ], 644 | "source": "%timeit ncomponents_files()" 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": 49, 649 | "metadata": {}, 650 | "outputs": [ 651 | { 652 | "data": { 653 | "text/plain": "2391070 function calls in 1.038 seconds\n\nOrdered by: cumulative time\n\nncalls tottime percall cumtime percall filename:lineno(function)\n 1 0.000 0.000 1.038 1.038 {built-in method exec}\n 1 0.000 0.000 1.038 1.038 :1()\n 10 0.000 0.000 0.995 0.100 :1(read_edgelist)\n 10 0.000 0.000 0.995 0.100 decorators.py:155(_open_file)\n 10 0.376 0.038 0.995 0.099 edgelist.py:174(parse_edgelist)\n170174 0.279 0.000 0.350 0.000 graph.py:648(add_edge)\n170184 0.059 0.000 0.095 0.000 edgelist.py:366()\n 10 0.000 0.000 0.021 0.002 connected.py:98(number_connected_components)\n 35 0.001 0.000 0.021 0.001 connected.py:22(connected_components)" 654 | }, 655 | "execution_count": 49, 656 | "metadata": {}, 657 | "output_type": "execute_result" 658 | } 659 | ], 660 | "source": "%prun -s cumtime ncomponents_files()" 661 | } 662 | ], 663 | "metadata": {}, 664 | "nbformat": 4, 665 | "nbformat_minor": 0 666 | } -------------------------------------------------------------------------------- /chapter1/images/ipython-console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/ipython-console.png -------------------------------------------------------------------------------- /chapter1/images/kernel-selector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/kernel-selector.png -------------------------------------------------------------------------------- /chapter1/images/markdown-both.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/markdown-both.png -------------------------------------------------------------------------------- /chapter1/images/markdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/markdown.png -------------------------------------------------------------------------------- /chapter1/images/nbui-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/nbui-1.png -------------------------------------------------------------------------------- /chapter1/images/nbui-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/nbui-2.png -------------------------------------------------------------------------------- /chapter1/images/nbui-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/nbui-3.png -------------------------------------------------------------------------------- /chapter1/images/notebook-terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/notebook-terminal.png -------------------------------------------------------------------------------- /chapter1/images/notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/notebook.png -------------------------------------------------------------------------------- /chapter1/images/pager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/pager.png -------------------------------------------------------------------------------- /chapter1/images/slider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/slider.png -------------------------------------------------------------------------------- /chapter1/images/tab-completion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/tab-completion.png -------------------------------------------------------------------------------- /chapter1/images/text-editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/text-editor.png -------------------------------------------------------------------------------- /chapter1/images/unicode-completion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/unicode-completion.png -------------------------------------------------------------------------------- /chapter1/images/youtube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter1/images/youtube.png -------------------------------------------------------------------------------- /chapter2/21-exploring.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Exploring a dataset in the Notebook" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Provenance of the data" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "### Downloading and loading a dataset" 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline" 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": "%cd ~/minibook/chapter2/" 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": "!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/nyc_taxi.zip\n!unzip nyc_taxi.zip" 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": "nyc_data.csv nyc_fare.csv [...]" 47 | }, 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": "%ls data" 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": "data_filename = 'data/nyc_data.csv'\nfare_filename = 'data/nyc_fare.csv'" 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": "data = pd.read_csv(data_filename, parse_dates=['pickup_datetime',\n 'dropoff_datetime'])\nfare = pd.read_csv(fare_filename, parse_dates=['pickup_datetime'])" 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 7, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": "data.head(3)" 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": "### Making plots with matplotlib" 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 8, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": "Index(['medallion',\n ...\n 'pickup_datetime',\n 'dropoff_datetime',\n 'passenger_count',\n 'trip_time_in_secs',\n 'trip_distance',\n 'pickup_longitude',\n 'pickup_latitude',\n 'dropoff_longitude',\n 'dropoff_latitude'], dtype='object')" 89 | }, 90 | "execution_count": 8, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": "data.columns" 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 9, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": "p_lng = data.pickup_longitude\np_lat = data.pickup_latitude\nd_lng = data.dropoff_longitude\nd_lat = data.dropoff_latitude" 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 10, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": "0 -73.955925\n1 -74.005501\n...\n846943 -73.978477\n846944 -73.987206\nName: pickup_longitude, Length: 846945, dtype: float64" 112 | }, 113 | "execution_count": 10, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": "p_lng" 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 11, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": "def lat_lng_to_pixels(lat, lng):\n lat_rad = lat * np.pi / 180.0\n lat_rad = np.log(np.tan((lat_rad + np.pi / 2.0) / 2.0))\n x = 100 * (lng + 180.0) / 360.0\n y = 100 * (lat_rad - np.pi) / (2.0 * np.pi)\n return (x, y)" 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 12, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": "px, py = lat_lng_to_pixels(p_lat, p_lng)" 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 13, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": "0 29.456688\n1 29.442916\n...\n846943 29.450423\n846944 29.447998\nName: pickup_longitude, dtype: float64" 142 | }, 143 | "execution_count": 13, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": "px" 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 14, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": "plt.scatter(px, py)" 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 15, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": "plt.figure(figsize=(8, 6))\nplt.scatter(px, py, s=.1, alpha=.03)\nplt.axis('equal')\nplt.xlim(29.40, 29.55)\nplt.ylim(-37.63, -37.54)\nplt.axis('off')" 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": "### Descriptive statistics with pandas and seaborn" 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 16, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": "(846945, 29.417137499999995, 29.714313055555561)" 177 | }, 178 | "execution_count": 16, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": "px.count(), px.min(), px.max()" 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 17, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": "(29.451345807768575, 29.449418333333337, 0.0097616942794720614)" 193 | }, 194 | "execution_count": 17, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": "px.mean(), px.median(), px.std()" 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 18, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": "!conda install seaborn -q -y" 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 19, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": "'0.6.0'" 216 | }, 217 | "execution_count": 19, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": "import seaborn as sns\nsns.__version__" 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 20, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": "data.trip_distance.hist(bins=np.linspace(0., 10., 100))" 230 | } 231 | ], 232 | "metadata": {}, 233 | "nbformat": 4, 234 | "nbformat_minor": 0 235 | } -------------------------------------------------------------------------------- /chapter2/22-manipulating.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Manipulating data" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline\ndata = pd.read_csv('data/nyc_data.csv', parse_dates=['pickup_datetime',\n 'dropoff_datetime'])\nfare = pd.read_csv('data/nyc_fare.csv', parse_dates=['pickup_datetime'])" 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": "### Selecting data" 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": " trip_distance trip_time_in_secs\n0 0.61 300\n1 3.28 960\n2 1.50 386" 28 | }, 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "output_type": "execute_result" 32 | } 33 | ], 34 | "source": "data[['trip_distance', 'trip_time_in_secs']].head(3)" 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": "medallion 76942C3205E17D7E7FE5A9F709D16434\nhack_license 25BA06A87905667AA1FE5990E33F0E2E\nvendor_id VTS\nrate_code 1\nstore_and_fwd_flag NaN\npickup_datetime 2013-01-01 00:00:00\ndropoff_datetime 2013-01-01 00:05:00\npassenger_count 3\ntrip_time_in_secs 300\ntrip_distance 0.61\npickup_longitude -73.95592\npickup_latitude 40.78189\ndropoff_longitude -73.96318\ndropoff_latitude 40.77783\nName: 0, dtype: object" 44 | }, 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": "data.loc[0]" 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": "data.loc[[0, 100000]]" 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": " trip_distance trip_time_in_secs\n1000 1.00 441\n1010 3.80 691\n....\n1990 0.13 60\n2000 9.60 963" 67 | }, 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": "data.loc[1000:2000:10,\n ['trip_distance', 'trip_time_in_secs']]" 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": "data.loc[data.trip_distance>50]" 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 7, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": "from ipywidgets import interact" 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 8, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": "@interact\ndef show_nrows(distance_threshold=(0, 200)):\n return len(data.loc[data.trip_distance > distance_threshold])" 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": "### Computing with numbers" 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 9, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": "data['trip_time_in_mins'] = data.trip_time_in_secs / 60.0" 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 10, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": " trip_time_in_secs trip_time_in_mins\n0 300 5.000000\n1 960 16.000000\n2 386 6.433333" 116 | }, 117 | "execution_count": 10, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": "data[['trip_time_in_secs', 'trip_time_in_mins']].head(3)" 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 11, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": "0 0.61\n1 3.28\n2 1.50\n3 0.00\n4 1.31\nName: trip_distance, dtype: float64" 132 | }, 133 | "execution_count": 11, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": "a = data.trip_distance[:5]\na" 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 12, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": "2 1.50\n3 0.00\n4 1.31\n5 5.81\nName: trip_distance, dtype: float64" 148 | }, 149 | "execution_count": 12, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": "b = data.trip_distance[2:6]\nb" 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 13, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": "0 NaN\n1 NaN\n2 3.00\n3 0.00\n4 2.62\n5 NaN\nName: trip_distance, dtype: float64" 164 | }, 165 | "execution_count": 13, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": "a + b" 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": "### Working with text" 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 14, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": "0 76942C3205E17D7E7FE5A9F709D16434\n1 517C6B330DBB3F055D007B07512628B3\n2 ED15611F168E41B33619C83D900FE266\nName: medallion, dtype: object" 185 | }, 186 | "execution_count": 14, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": "data.medallion.head(3)" 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 15, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": "0 7694\n1 517C\n2 ED15\nName: medallion, dtype: object" 201 | }, 202 | "execution_count": 15, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": "data.medallion.str.slice(0, 4).head(3)" 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": "### Working with dates and times" 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 16, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": "0 1\n200000 6\n400000 5\n600000 0\n800000 1\ndtype: int64" 222 | }, 223 | "execution_count": 16, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": "data.pickup_datetime.dt.dayofweek[::200000]" 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 17, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": "7716" 238 | }, 239 | "execution_count": 17, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": "day_p = data.pickup_datetime.dt.day\nday_d = data.dropoff_datetime.dt.day\nselection = (day_p != day_d)\nprint(len(data.loc[selection]))\ndata.loc[selection].head(3)" 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": "### Handling missing data" 250 | } 251 | ], 252 | "metadata": {}, 253 | "nbformat": 4, 254 | "nbformat_minor": 0 255 | } -------------------------------------------------------------------------------- /chapter2/23-groupby.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Complex operations" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn\n%matplotlib inline\ndata = pd.read_csv('data/nyc_data.csv', parse_dates=['pickup_datetime',\n 'dropoff_datetime'])\nfare = pd.read_csv('data/nyc_fare.csv', parse_dates=['pickup_datetime'])" 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": "### Group-by" 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": "weekly = data.groupby(data.pickup_datetime.dt.weekofyear)" 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/plain": "52" 35 | }, 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": "len(weekly)" 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": "1 17042\n2 15941\n3 17017\ndtype: int64" 51 | }, 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": "y = weekly.size()\ny.head(3)" 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": "1 2013-01-01 00:00:00\n2 2013-01-07 00:03:00\n3 2013-01-14 00:00:51\nName: pickup_datetime, dtype: datetime64[ns]" 67 | }, 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": "x = weekly.pickup_datetime.first()\nx.head(3)" 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": "pd.Series(y.values, index=x).plot()\nplt.ylim(0) # Set the lower y value to 0.\nplt.xlabel('Week') # Label of the x axis.\nplt.ylabel('Taxi rides') # Label of the y axis." 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": "### Joins" 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 7, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": "13407\n tip_amount\nmedallion\n00005007A9F30E289E760362F69E4EAD 1.815854\n000318C2E3E6381580E5C99910A60668 2.857222\n000351EDC735C079246435340A54C7C1 2.099111" 95 | }, 96 | "execution_count": 7, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": "tip = fare[['medallion', 'tip_amount']] \\\n .loc[fare.tip_amount>0].groupby('medallion').mean()\nprint(len(tip))\ntip.head(3)" 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 8, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": "tip.hist(bins=np.linspace(0., 6., 100))\nplt.xlabel('Average tip')\nplt.ylabel('Number of taxis')" 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 9, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": "data_merged = pd.merge(data, tip, how='left', left_on='medallion', right_index=True)\ndata_merged.head(3)" 116 | } 117 | ], 118 | "metadata": {}, 119 | "nbformat": 4, 120 | "nbformat_minor": 0 121 | } -------------------------------------------------------------------------------- /chapter2/cleaning/README.md: -------------------------------------------------------------------------------- 1 | NYC taxi dataset 2013 2 | ===================== 3 | 4 | Instructions to create your own subset of the data: 5 | 6 | * Clone this repository on your computer with `git clone https://github.com/ipython-books/minibook-2nd-code.git` 7 | 8 | * Use a BitTorrent client (like http://www.utorrent.com/) to download the `nycTaxiFareData2013.torrent` and `nycTaxiTripData2013.torrent` datasets in `../data` (they have been obtained at http://chriswhong.com/open-data/foil_nyc_taxi/). 9 | 10 | * Extract the two downloaded `tripData2013.zip` and `faredata2013.zip` files in the `/minibook-2nd-code/chapter2/data` directory. 11 | 12 | * You now have 24 zip files named `trip_data_1.csv.zip`, ..., `trip_data_12.csv.zip`, `trip_fare_1.csv.zip`, ..., `trip_fare_12.csv.zip` in the `/minibook-2nd-code/chapter2/data` directory. 13 | 14 | * Start a notebook server in the current directory (`minibook-2nd-code/chapter2/cleaning/`) with `jupyter notebook`, and open the `subset.ipynb` notebook. 15 | 16 | * You can tweak the `step = 200` line at the top of the notebook. Use a lower value to get a larger subset. The proportion of the subset is `1/step` (so 0.5% with step = 200). 17 | 18 | * Run this notebook. After several minutes, you will get two `trip_data_subset.csv` and `trip_fare_subset.csv` files in the data directory. These are the files we will be working on in this chapter and the next. 19 | -------------------------------------------------------------------------------- /chapter2/cleaning/cleanup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Cleanup" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Let's make some clean-up." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import numpy as np\n", 26 | "import pandas as pd\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "data_filename = '../data/trip_data_subset.csv'\n", 40 | "fare_filename = '../data/trip_fare_subset.csv'" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "data = pd.read_csv(data_filename)\n", 52 | "fare = pd.read_csv(fare_filename)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Cleanup headers" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "First, we remove extra spaces in the headers." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "fare.columns = [col.strip() for col in fare.columns]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "## Reorder rows by date" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Then, we reorder the rows by date." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "data.pickup_datetime = pd.to_datetime(data.pickup_datetime)\n", 103 | "data.dropoff_datetime = pd.to_datetime(data.dropoff_datetime)\n", 104 | "fare.pickup_datetime = pd.to_datetime(fare.pickup_datetime)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "sorted_index = data.sort('pickup_datetime').index" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "data = data.ix[sorted_index]\n", 127 | "fare = fare.ix[sorted_index]" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "We now remove the index." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "data = data.reset_index()\n", 146 | "del data['index']" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "fare = fare.reset_index()\n", 158 | "del fare['index']" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## Cleaning up the coordinates" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "We find a rectangle around Manhattan (using http://www.openstreetmap.org/)." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "lon_min, lon_max = -74.1, -73\n", 184 | "lat_min, lat_max = 40, 41" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "We get rid of values outside this area." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "lon_p, lat_p = data.pickup_longitude, data.pickup_latitude\n", 203 | "lon_d, lat_d = data.dropoff_longitude, data.dropoff_latitude" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "to_keep = ((lon_p > lon_min) &\n", 215 | " (lon_p < lon_max) &\n", 216 | " (lat_p > lat_min) &\n", 217 | " (lat_p < lat_max) &\n", 218 | " (lon_d > lon_min) &\n", 219 | " (lon_d < lon_max) &\n", 220 | " (lat_d > lat_min) &\n", 221 | " (lat_d < lat_max))" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "We keep those rows where the coordinates are within the rectangle." 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "collapsed": true 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "data = data.ix[to_keep]\n", 240 | "fare = fare.ix[to_keep]" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## Save the cleaned datasets" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "Finally, we save the cleaned-up datasets." 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "collapsed": true 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "data.to_csv('../data/nyc_data.csv', index=False)\n", 266 | "fare.to_csv('../data/nyc_fare.csv', index=False)" 267 | ] 268 | } 269 | ], 270 | "metadata": { 271 | "kernelspec": { 272 | "display_name": "Python 3", 273 | "language": "python", 274 | "name": "python3" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 3 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython3", 286 | "version": "3.4.3" 287 | } 288 | }, 289 | "nbformat": 4, 290 | "nbformat_minor": 0 291 | } 292 | -------------------------------------------------------------------------------- /chapter2/cleaning/subset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Making a subset of the data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In this notebook, we create a subset of the NYC taxi dataset. This subset only contains 0.5% of all rows." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Instructions" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "* It is recommended to have **100GB of free space** on your hard drive.\n", 29 | "* This notebook should be in the `minibook-2nd-code/chapter2/cleaning/` directory.\n", 30 | "* The original data files should be in the `minibook-2nd-code/chapter2/data/` directory.\n", 31 | "* The 24 data files are named `trip_data_1.csv.zip`, ..., `trip_fare_1.csv.zip`. **Make sure to download these files and put them in the `data` directory**.\n", 32 | "* This notebook will create two `trip_data_subset.csv` and `trip_fare_subset.csv` files in the `data` directory.\n", 33 | "* Once the 24 data files have been put in `data/`, click on `Cell > Run all` above to launch the extraction process. **It will take several minutes**." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "The procedure implemented here is memory-efficient in that the full ZIP files are not first extracted on disk. Unzipping occurs on the fly. We make heavy use of Python generators and iterators." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# Relative path of the directory containing the data.\n", 52 | "data_dir = '../data'\n", 53 | "# Only keep one out of 'step' lines ('200' = 0.5%).\n", 54 | "step = 200\n", 55 | "# In every file, stop after 'stop' lines (None=until the end).\n", 56 | "stop = None # type 400 to test the script and generate tiny subsets." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "import os\n", 68 | "import os.path as op\n", 69 | "import re\n", 70 | "import zipfile\n", 71 | "import glob\n", 72 | "from itertools import chain, islice" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "def _csv_filename(zip_filename):\n", 84 | " \"\"\"Return the filename of the CSV in a ZIP file.\"\"\"\n", 85 | " return op.splitext(op.basename(zip_filename))[0]" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "def _iter_lines(zip_filename):\n", 97 | " \"\"\"Iterate over all rows from a zipped CSV file.\"\"\"\n", 98 | " print(\"Processing file {file}...\".format(file=zip_filename))\n", 99 | " csv_filename = _csv_filename(zip_filename)\n", 100 | " with zipfile.ZipFile(zip_filename) as z:\n", 101 | " with z.open(csv_filename) as f:\n", 102 | " for line in f:\n", 103 | " yield line" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "def _iter_all_lines(files, step=None, stop=None):\n", 115 | " \"\"\"Iterate over rows from several zipped CSV files.\"\"\"\n", 116 | " # Keep the header row in the first file, but not in the other files.\n", 117 | " return chain(*[islice(_iter_lines(f), min(1, i), stop, step)\n", 118 | " for i, f in enumerate(files)])" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": true 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "def _extract_number(filename):\n", 130 | " \"\"\"Return the month number appearing in a ZIP file.\"\"\"\n", 131 | " r = re.search(r'([\\d]+)', filename)\n", 132 | " if r:\n", 133 | " return int(r.group(1))" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "def _zip_filenames(name):\n", 145 | " \"\"\"Return the ordered list of ZIP filenames.\"\"\"\n", 146 | " return sorted(glob.glob(op.join(data_dir, 'trip_{name}_*.zip'.format(name=name))), \n", 147 | " key=_extract_number)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def _make_extract(step=None, stop=None):\n", 159 | " \"\"\"Create the subset data.\"\"\"\n", 160 | " for name in ('fare', 'data'):\n", 161 | " # Output CSV filename (with the subset data).\n", 162 | " filename = op.join(data_dir, 'trip_{name}_subset.csv'.format(name=name))\n", 163 | " # List of zipped CSV files.\"\n", 164 | " files = _zip_filenames(name)\n", 165 | " with open(filename, 'wb') as f:\n", 166 | " # Iterate over a subset of the rows from all files.\n", 167 | " for line in _iter_all_lines(files, step=step, stop=stop):\n", 168 | " f.write(line)\n", 169 | " print(\"*** Done! {0} has been successfully created. ***\\n\".format(filename))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "Make the subset data (**this will take a while**)." 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": false, 184 | "scrolled": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "_make_extract(step=step, stop=stop)" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.4.3" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 0 213 | } 214 | -------------------------------------------------------------------------------- /chapter2/data/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.zip 3 | -------------------------------------------------------------------------------- /chapter2/data/nycTaxiFareData2013.torrent: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter2/data/nycTaxiFareData2013.torrent -------------------------------------------------------------------------------- /chapter2/data/nycTaxiTripData2013.torrent: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipython-books/minibook-2nd-code/641e6e3cb119f52110dce719b1488ff28ad2f801/chapter2/data/nycTaxiTripData2013.torrent -------------------------------------------------------------------------------- /chapter3/31-primer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## A primer to vector computing" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Multidimensional arrays" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "### The ndarray" 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": "### Vector operations on ndarrays" 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": "### Example: how fast are vector computations in NumPy?" 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": "from random import random\nlist_1 = [random() for _ in range(1000000)]\nlist_2 = [random() for _ in range(1000000)]" 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": "[0.843375384328939, 1.507485612134079, 1.4119777108063973]" 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": "out = [x + y for (x, y) in zip(list_1, list_2)]\nout[:3]" 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": "10 loops, best of 3: 69.7 ms per loop" 59 | }, 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": "%timeit [x + y for (x, y) in zip(list_1, list_2)]" 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": "import numpy as np\narr_1 = np.array(list_1)\narr_2 = np.array(list_2)" 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 5, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": "(list, numpy.ndarray)" 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": "type(list_1), type(arr_1)" 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": "(1000000,)" 98 | }, 99 | "execution_count": 6, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": "arr_1.shape" 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 7, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": "dtype('float64')" 114 | }, 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": "arr_1.dtype" 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 8, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": "array([ 0.84337538, 1.50748561, 1.41197771])" 130 | }, 131 | "execution_count": 8, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": "sum_arr = arr_1 + arr_2\nsum_arr[:3]" 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 9, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": "1000 loops, best of 3: 1.57 ms per loop" 146 | }, 147 | "execution_count": 9, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": "%timeit arr_1 + arr_2" 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": "### How an ndarray is stored in memory" 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": "### Why operations on ndarrays are fast" 163 | } 164 | ], 165 | "metadata": {}, 166 | "nbformat": 4, 167 | "nbformat_minor": 0 168 | } -------------------------------------------------------------------------------- /chapter3/32-creating.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Creating and loading arrays" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Creating arrays" 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": "ones [ 1. 1. 1. 1. 1.]\narange [0 1 2 3 4]\nlinspace [ 0. 0.25 0.5 0.75 1. ]\nrandom [ 0.68361911 0.33585308 0.70733934]\ncustom [2 3 5]" 21 | }, 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "output_type": "execute_result" 25 | } 26 | ], 27 | "source": "import numpy as np\nprint(\"ones\", np.ones(5))\nprint(\"arange\", np.arange(5))\nprint(\"linspace\", np.linspace(0., 1., 5))\nprint(\"random\", np.random.uniform(size=3))\nprint(\"custom\", np.array([2, 3, 5]))" 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": "array([[1, 2],\n [3, 4]])" 37 | }, 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "output_type": "execute_result" 41 | } 42 | ], 43 | "source": "np.array([[1, 2], [3, 4]])" 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": "array([1, 1, 1, 1, 1])" 53 | }, 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": "np.ones(5, dtype=np.int64)" 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": "array([ 0., 1., 2., 3., 4.])" 69 | }, 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": "np.arange(5).astype(np.float64)" 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": "### Loading arrays from files" 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": "import pandas as pd" 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 6, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": "data = pd.read_csv('../chapter2/data/nyc_data.csv')" 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 7, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": "array([[-73.955925, 40.781887],\n [-74.005501, 40.745735],\n ...,\n [-73.978477, 40.772945],\n [-73.987206, 40.750568]])" 104 | }, 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": "pickup = data[['pickup_longitude', 'pickup_latitude']].values\npickup" 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 8, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": "(846945, 2)" 120 | }, 121 | "execution_count": 8, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": "pickup.shape" 127 | } 128 | ], 129 | "metadata": {}, 130 | "nbformat": 4, 131 | "nbformat_minor": 0 132 | } -------------------------------------------------------------------------------- /chapter3/33-basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Basic array manipulations" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import numpy as np" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": "x = np.arange(1, 11)" 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 30 | }, 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": "x" 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": "array([[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])" 46 | }, 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": "x_row = x.reshape((1, -1))\nx_row" 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": "array([[ 1],\n [ 2],\n [ 3],\n [ 4],\n [ 5],\n [ 6],\n [ 7],\n [ 8],\n [ 9],\n [10]])" 62 | }, 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": "x_col = x[:, np.newaxis]\nx_col" 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 6, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": "array([[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n [ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20],\n [ 3, 6, 9, 12, 15, 18, 21, 24, 27, 30],\n [ 4, 8, 12, 16, 20, 24, 28, 32, 36, 40],\n [ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50],\n [ 6, 12, 18, 24, 30, 36, 42, 48, 54, 60],\n [ 7, 14, 21, 28, 35, 42, 49, 56, 63, 70],\n [ 8, 16, 24, 32, 40, 48, 56, 64, 72, 80],\n [ 9, 18, 27, 36, 45, 54, 63, 72, 81, 90],\n [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]])" 78 | }, 79 | "execution_count": 6, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": "np.dot(x_col, x_row)" 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": "array([[ 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]])" 94 | }, 95 | "execution_count": 7, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": "x_row * x_row" 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 8, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": "array([[ 1, 2, 3, ..., 9, 10],\n [ 2, 4, 6, ..., 18, 20],\n ...\n [ 9, 18, 27, ..., 81, 90],\n [ 10, 20, 30, ..., 90, 100]])" 110 | }, 111 | "execution_count": 8, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": "x_row * x_col" 117 | } 118 | ], 119 | "metadata": {}, 120 | "nbformat": 4, 121 | "nbformat_minor": 0 122 | } -------------------------------------------------------------------------------- /chapter4/41-notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Choosing a plotting backend" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Inline plots" 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": "import numpy as np\nimport matplotlib.pyplot as plt" 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": "%matplotlib inline" 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": "plt.imshow(np.random.rand(10, 10), interpolation='none')" 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": "### Exported figures" 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": "### GUI toolkits" 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": "%matplotlib qt\nplt.imshow(np.random.rand(10, 10), interpolation='none')" 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": "### Dynamic inline plots" 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": "import matplotlib\nmatplotlib.use('nbagg')" 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": "plt.imshow(np.random.rand(10, 10), interpolation='none')\nplt.show()" 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": "### Web-based visualization" 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 7, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": "import numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline\nimport mpld3\nmpld3.enable_notebook()" 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 8, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": "plt.imshow(np.random.rand(10, 10), interpolation='none')" 88 | } 89 | ], 90 | "metadata": {}, 91 | "nbformat": 4, 92 | "nbformat_minor": 0 93 | } -------------------------------------------------------------------------------- /chapter4/42-mpl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## matplotlib and seaborn essentials" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn\n%matplotlib inline" 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": "### Common plots with matplotlib" 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": "y = np.random.randn(1000)" 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": "plt.plot(y)" 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": "x = np.linspace(-10., 10., 1000)\ny = np.sin(3 * x) * np.exp(-.1 * x**2)" 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": "plt.plot(x, y)" 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 6, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": "x = np.linspace(-5., 5., 100)\ny = np.sin(3 * x) * np.exp(-.1 * x ** 2)" 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 7, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": "plt.plot(x, y, '--^',\n lw=3, color='#fdbb84',\n mfc='#2b8cbe', ms=8)" 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 8, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": "x = np.random.randn(100)\ny = x + np.random.randn(100)" 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 9, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": "plt.scatter(x, y)" 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": "### Customizing matplotlib figures" 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 10, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": "# Left panel.\nplt.subplot(1, 2, 1)\nx = np.linspace(-10., 10., 1000)\nplt.plot(x, np.sin(x), '-r', label='sinus')\nplt.plot(x, np.cos(x), ':g', lw=1, label='cosinus')\nplt.xticks([-10, 0, 10])\nplt.yticks([-1, 0, 1])\nplt.ylim(-2, 2)\nplt.xlabel(\"x axis\")\nplt.ylabel(\"y axis\")\nplt.title(\"Two plots\")\nplt.legend()\n\n# Right panel.\nplt.subplot(1, 2, 2, polar=True)\nx = np.linspace(0, 2 * np.pi, 1000)\nplt.plot(x, 1 + 2 * np.cos(6 * x))\nplt.yticks([])\nplt.xlim(-.1, 3.1)\nplt.ylim(-.1, 3.1)\nplt.xticks(np.linspace(0, 5 * np.pi / 3, 6))\nplt.title(\"A polar plot\")\nplt.grid(color='k', linewidth=1, linestyle=':')" 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": "### Interacting with matplotlib figures in the Notebook" 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 11, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": "from ipywidgets import interact" 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 12, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": "x = np.linspace(-5., 5., 1000)" 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 13, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": "@interact\ndef plot_sin(a=(1, 10)):\n plt.plot(x, np.sin(a*x))\n plt.ylim(-1, 1)" 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 14, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": "%matplotlib qt" 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 15, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": "lines = plt.plot([0, 1], [0, 1], 'b')" 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 16, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": "[]" 136 | }, 137 | "execution_count": 16, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": "lines" 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 17, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": "lines[0].set_color('r')\nplt.draw()" 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": "### High-level plotting with seaborn" 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 18, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": " sepal_length sepal_width petal_length petal_width species\n0 5.1 3.5 1.4 0.2 setosa\n1 4.9 3.0 1.4 0.2 setosa\n2 4.7 3.2 1.3 0.2 setosa" 164 | }, 165 | "execution_count": 18, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": "df = seaborn.load_dataset(\"iris\")\ndf.head(3)" 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 19, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": "seaborn.pairplot(df, hue=\"species\", size=2.5)" 178 | } 179 | ], 180 | "metadata": {}, 181 | "nbformat": 4, 182 | "nbformat_minor": 0 183 | } -------------------------------------------------------------------------------- /chapter4/43-image.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Image processing" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import numpy as np\nimport skimage\nfrom skimage import img_as_float\nimport skimage.filters as skif\nfrom skimage.color import rgb2gray\nimport skimage.data as skid\nimport skimage.exposure as skie\nfrom IPython.html.widgets import interact\nimport matplotlib.pyplot as plt\nimport seaborn\n%matplotlib inline" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": "chelsea = skid.chelsea()" 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": "((300, 451, 3), dtype('uint8'))" 30 | }, 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": "chelsea.shape, chelsea.dtype" 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": "plt.imshow(chelsea)\nplt.axis('off')" 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 5, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": "img = rgb2gray(chelsea)" 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 6, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": "((300, 451), dtype('float64'))" 60 | }, 61 | "execution_count": 6, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": "img.shape, img.dtype" 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 7, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": "array([[ 0.4852, 0.4852, ..., 0.1169, 0.1169],\n [ 0.4969, 0.4930, ..., 0.1225, 0.1272 ],\n ...,\n [ 0.4248, 0.3688, ..., 0.5544, 0.5583]])" 76 | }, 77 | "execution_count": 7, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": "img" 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 8, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": "p2, p98 = np.percentile(img, (2, 98))" 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 9, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": "img_rescale = skie.rescale_intensity(img, in_range=(p2, p98))" 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": "img_eq = skie.equalize_hist(img)" 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 11, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": "img_adapteq = img_as_float(skie.equalize_adapthist(img, clip_limit=0.03))" 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 12, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": "hist_types = dict([('Contrast stretching', img_rescale),\n ('Histogram equalization', img_eq),\n ('Adaptive equalization', img_adapteq)])" 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 13, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": "@interact(hist_type=list(hist_types.keys()))\ndef display_result(hist_type):\n result = hist_types[hist_type]\n\n # We display the processed grayscale image on the left.\n plt.subplot(121)\n plt.imshow(result, cmap='gray')\n plt.axis('off')\n\n # We display the histogram on the right.\n plt.subplot(122)\n plt.hist(result.ravel(), bins=np.linspace(0., 1., 256),\n histtype='step', color='black')\n\n plt.show()" 125 | } 126 | ], 127 | "metadata": {}, 128 | "nbformat": 4, 129 | "nbformat_minor": 0 130 | } -------------------------------------------------------------------------------- /chapter4/44-other.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Further plotting and visualization libraries" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### High-level plotting" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "### Maps and geometry" 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": "### 3D visualization" 22 | } 23 | ], 24 | "metadata": {}, 25 | "nbformat": 4, 26 | "nbformat_minor": 0 27 | } -------------------------------------------------------------------------------- /chapter5/51-numba.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Accelerating Python code with Numba" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import math\nimport random\nimport numpy as np\nfrom numba import jit, vectorize, float64\nimport matplotlib.pyplot as plt\nimport seaborn\n%matplotlib inline" 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": "### Random walk" 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": "def step():\n return 1. if random.random() > .5 else -1." 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": "def walk(n):\n x = np.zeros(n)\n dx = 1. / n\n for i in range(n - 1):\n x_new = x[i] + dx * step()\n if x_new > 5e-3:\n x[i + 1] = 0.\n else:\n x[i + 1] = x_new\n return x" 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": "n = 100000\nx = walk(n)" 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": "plt.plot(x)" 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 6, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": "10 loops, best of 3: 57.6 ms per loop" 56 | }, 57 | "execution_count": 6, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": "%%timeit\nwalk(n)" 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 7, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": "@jit(nopython=True)\ndef step_numba():\n return 1. if random.random() > .5 else -1." 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 8, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": "@jit(nopython=True)\ndef walk_numba(n):\n x = np.zeros(n)\n dx = 1. / n\n for i in range(n - 1):\n x_new = x[i] + dx * step_numba()\n if x_new > 5e-3:\n x[i + 1] = 0.\n else:\n x[i + 1] = x_new\n return x" 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 9, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": "The slowest run took 81.94 times longer than the fastest. This could mean that an intermediate result is being cached\n1000 loops, best of 3: 1.89 ms per loop" 86 | }, 87 | "execution_count": 9, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": "%%timeit\nwalk_numba(n)" 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": "### Universal functions" 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 10, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": "1 loops, best of 3: 689 ms per loop" 107 | }, 108 | "execution_count": 10, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": "x = np.random.rand(10000000)\n%timeit np.cos(2*x**2 + 3*x + 4*np.exp(x**3))" 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 11, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": "@vectorize\ndef kernel(x):\n return np.cos(2*x**2 + 3*x + 4*np.exp(x**3))" 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 12, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": "-0.98639139715432589" 130 | }, 131 | "execution_count": 12, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": "kernel(1.)" 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 13, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": "1 loops, best of 3: 324 ms per loop" 146 | }, 147 | "execution_count": 13, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": "%timeit kernel(x)" 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 14, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": "10 loops, best of 3: 122 ms per loop" 162 | }, 163 | "execution_count": 14, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": "import numexpr\n%timeit numexpr.evaluate('cos(2*x**2 + 3*x + 4*exp(x**3))')" 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 15, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": "4" 178 | }, 179 | "execution_count": 15, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": "numexpr.detect_number_of_cores()" 185 | } 186 | ], 187 | "metadata": {}, 188 | "nbformat": 4, 189 | "nbformat_minor": 0 190 | } -------------------------------------------------------------------------------- /chapter5/52-cython.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Writing C in Python with Cython" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Installing Cython and a C compiler for Python" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "### Implementing the Eratosthenes Sieve in Python and Cython" 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": "def primes_python(n):\n primes = [False, False] + [True] * (n - 2)\n i = 2\n while i < n:\n # We do not deal with composite numbers.\n if not primes[i]:\n i += 1\n continue\n k = i * i\n # We mark multiples of i as composite numbers.\n while k < n:\n primes[k] = False\n k += i\n i += 1\n # We return all numbers marked with True.\n return [i for i in range(2, n) if primes[i]]" 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": "[2, 3, 5, 7, 11, 13, 17, 19]" 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": "primes_python(20)" 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": "n = 10000" 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": "100 loops, best of 3: 4 ms per loop" 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": "%timeit primes_python(n)" 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": "%load_ext Cython" 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "metadata": { 75 | "scrolled": true 76 | }, 77 | "outputs": [], 78 | "source": "%%cython\ndef primes_cython_1(n):\n primes = [False, False] + [True] * (n - 2)\n i = 2\n while i < n:\n # We do not deal with composite numbers.\n if not primes[i]:\n i += 1\n continue\n k = i * i\n # We mark multiples of i as composite numbers.\n while k < n:\n primes[k] = False\n k += i\n i += 1\n # We return all numbers marked with True.\n return [i for i in range(2, n) if primes[i]]" 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 7, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": "[2, 3, 5, 7, 11, 13, 17, 19]" 88 | }, 89 | "execution_count": 7, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": "primes_cython_1(20)" 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 8, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": "100 loops, best of 3: 1.99 ms per loop" 104 | }, 105 | "execution_count": 8, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": "%timeit primes_cython_1(n)" 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 9, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": "%%cython -a\ndef primes_cython_2(int n):\n # Note the type declarations below:\n cdef list primes = [False, False] + [True] * (n - 2)\n cdef int i = 2\n cdef int k = 0\n # The rest of the function is unchanged.\n while i < n:\n # We do not deal with composite numbers.\n if not primes[i]:\n i += 1\n continue\n k = i * i\n # We mark multiples of i as composite numbers.\n while k < n:\n primes[k] = False\n k += i\n i += 1\n # We return all numbers marked with True.\n return [i for i in range(2, n) if primes[i]]" 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 10, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": "1000 loops, best of 3: 266 \u00b5s per loop" 127 | }, 128 | "execution_count": 10, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": "%timeit primes_cython_2(n)" 134 | } 135 | ], 136 | "metadata": {}, 137 | "nbformat": 4, 138 | "nbformat_minor": 0 139 | } -------------------------------------------------------------------------------- /chapter5/53-parallel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Distributing tasks on several cores with IPython.parallel" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "import numpy as np\nfrom ipyparallel import Client # IPython.parallel before IPython 4.0" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": "rc = Client()" 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": "### Direct interface" 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/plain": "[0, 1, 2, 3]" 35 | }, 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": "rc.ids" 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": "%px import os, time" 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 5, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": "[stdout:0] 11173\n[stdout:1] 11174\n[stdout:2] 11175\n[stdout:3] 11176" 58 | }, 59 | "execution_count": 5, 60 | "metadata": {}, 61 | "output_type": "execute_result" 62 | } 63 | ], 64 | "source": "%px print(os.getpid())" 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 6, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": "[stdout:0] 11173\n[stdout:1] 11174\n[stdout:2] 11175" 74 | }, 75 | "execution_count": 6, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": "%%px --targets :-1\nprint(os.getpid())" 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 7, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": "" 90 | }, 91 | "execution_count": 7, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": "view = rc[:-1]\nview" 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": "### Load-balanced interface" 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 8, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": "v = rc.load_balanced_view()" 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 9, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": "def sample(n):\n import numpy as np\n # Random coordinates.\n x, y = np.random.rand(2, n)\n # Square distances to the origin.\n r_square = x ** 2 + y ** 2\n # Number of points in the quarter disc.\n return (r_square <= 1).sum()" 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 10, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": "def pi(n_in, n):\n return 4. * float(n_in) / n" 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 11, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": "n = 100000000" 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 12, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": "3.14174968" 139 | }, 140 | "execution_count": 12, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": "pi(sample(n), n)" 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 13, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": "1 loops, best of 3: 2.65 s per loop" 155 | }, 156 | "execution_count": 13, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": "%timeit pi(sample(n), n)" 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 14, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": "args = [n // 100] * 100" 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 15, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": "ar = v.map(sample, args)" 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 16, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": "(False, 12)" 185 | }, 186 | "execution_count": 16, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": "ar.ready(), ar.progress" 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 17, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": "(1.428284, 4.042367000000002)" 201 | }, 202 | "execution_count": 17, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": "ar.elapsed, ar.serial_time" 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 18, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": "3.141666" 217 | }, 218 | "execution_count": 18, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": "pi(np.sum(ar.result()), n)" 224 | } 225 | ], 226 | "metadata": {}, 227 | "nbformat": 4, 228 | "nbformat_minor": 0 229 | } 230 | -------------------------------------------------------------------------------- /chapter5/54-further.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Further high-performance computing techniques" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### MPI" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "### Distributed computing" 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": "### C/C++ with Python" 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": "### GPU computing" 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": "### PyPy" 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": "### Julia" 37 | } 38 | ], 39 | "metadata": {}, 40 | "nbformat": 4, 41 | "nbformat_minor": 0 42 | } -------------------------------------------------------------------------------- /chapter6/61-magic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Creating a custom magic command in an IPython extension" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "from IPython.core.magic import register_cell_magic" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": "@register_cell_magic\ndef cpp(line, cell):\n \"\"\"Compile, execute C++ code, and return the standard output.\"\"\"\n\n # We first retrieve the current IPython interpreter instance.\n ip = get_ipython()\n\n # We define the source and executable filenames.\n source_filename = '_temp.cpp'\n program_filename = '_temp'\n\n # We write the code to the C++ file.\n with open(source_filename, 'w') as f:\n f.write(cell)\n\n # We compile the C++ code into an executable.\n compile = ip.getoutput(\"g++ {0:s} -o {1:s}\".format(\n source_filename, program_filename))\n\n # We execute the executable and return the output.\n output = ip.getoutput('./{0:s}'.format(program_filename))\n\n print('\\n'.join(output))" 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": "Hello world!" 30 | }, 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": "%%cpp\n#include\nint main()\n{\n std::cout << \"Hello world!\";\n}" 37 | } 38 | ], 39 | "metadata": {}, 40 | "nbformat": 4, 41 | "nbformat_minor": 0 42 | } -------------------------------------------------------------------------------- /chapter6/62-kernel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Writing a new Jupyter kernel" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "%mkdir cpp" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/plain": "Writing cpp/cpp_kernel.py" 23 | }, 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": "%%writefile cpp/cpp_kernel.py\n\nimport os\nimport os.path as op\nimport tempfile\n\n# We import the `getoutput()` function provided by IPython.\n# It allows us to do system calls from Python.\nfrom IPython.utils.process import getoutput\n\ndef exec_cpp(code):\n \"\"\"Compile, execute C++ code, and return the standard output.\"\"\"\n\n # We create a temporary directory. This directory will\n # be deleted at the end of the 'with' context.\n # All created files will be in this directory.\n with tempfile.TemporaryDirectory() as tmpdir:\n\n # We define the source and executable filenames.\n source_path = op.join(tmpdir, 'temp.cpp')\n program_path = op.join(tmpdir, 'temp')\n\n # We write the code to the C++ file.\n with open(source_path, 'w') as f:\n f.write(code)\n\n # We compile the C++ code into an executable.\n os.system(\"g++ {0:s} -o {1:s}\".format(\n source_path, program_path))\n\n # We execute the program and return the output.\n return getoutput(program_path)" 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": "Appending to cpp/cpp_kernel.py" 39 | }, 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": "%%writefile -a cpp/cpp_kernel.py\n\n\"\"\"C++ wrapper kernel.\"\"\"\nfrom ipykernel.kernelbase import Kernel\n\nclass CppKernel(Kernel):\n\n # Kernel information.\n implementation = 'C++'\n implementation_version = '1.0'\n language = 'c++'\n language_version = '1.0'\n language_info = {'name': 'c++',\n 'mimetype': 'text/plain'}\n banner = \"C++ kernel\"\n\n def do_execute(self, code, silent,\n store_history=True,\n user_expressions=None,\n allow_stdin=False):\n \"\"\"This function is called when a code cell is executed.\"\"\"\n\n if not silent:\n # We run the C++ code and get the output.\n output = exec_cpp(code)\n\n # We send back the result to the frontend.\n stream_content = {'name': 'stdout', 'text': output}\n self.send_response(self.iopub_socket, 'stream', stream_content)\n\n return {'status': 'ok',\n # The base class increments the execution count\n 'execution_count': self.execution_count,\n 'payload': [],\n 'user_expressions': {},\n }\n\nif __name__ == '__main__':\n from ipykernel.kernelapp import IPKernelApp\n IPKernelApp.launch_instance(kernel_class=CppKernel)" 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": "Writing cpp/kernel.json" 55 | }, 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": "%%writefile cpp/kernel.json\n{\n \"argv\": [\"python\",\n \"cpp/cpp_kernel.py\",\n \"-f\",\n \"{connection_file}\"\n ],\n \"display_name\": \"C++\"\n}" 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": "[InstallKernelSpec] Installed kernelspec cpp in /Users/cyrille/Library/Jupyter/kernels/cpp" 71 | }, 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": "!jupyter kernelspec install --replace --user cpp" 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/plain": "Available kernels:\n cpp\n python3" 87 | }, 88 | "execution_count": 6, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": "!jupyter kernelspec list" 94 | } 95 | ], 96 | "metadata": {}, 97 | "nbformat": 4, 98 | "nbformat_minor": 0 99 | } -------------------------------------------------------------------------------- /chapter6/63-rich.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Displaying rich HTML elements in the Notebook" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "### Displaying SVG in the Notebook" 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": "def svg_disc(radius, color):\n return \"\"\"\n \n \"\"\".format(radius, color)" 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": "class Disc(object):\n def __init__(self, radius, color='red'):\n self.radius = radius\n self.color = color\n\n def _repr_svg_(self):\n return svg_disc(self.radius, self.color)" 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": "Disc(60, 'purple')" 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": "### JavaScript and D3 in the Notebook" 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": "from IPython.display import display_javascript" 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": "JS_TEMPLATE = \"\"\"\n// We load the d3.js library from the Web.\nrequire.config({paths: {d3: \"http://d3js.org/d3.v3.min\"}});\nrequire([\"d3\"], function(d3) {\n // Example from http://bost.ocks.org/mike/bar/\n\n // Define the data.\n var data = %s;\n\n // We normalize the data.\n var x = d3.scale.linear()\n .domain([0, d3.max(data)])\n .range([0, 420]);\n\n // We define a categorical color map.\n var color = d3.scale.category10();\n\n // We create the chart.\n d3.select(\".chart\")\n .selectAll(\"div\")\n .data(data)\n .enter().append(\"div\")\n .style(\"width\", function(d) { return x(d) + \"px\"; })\n .text(function(d) { return d; });\n\n});\n\"\"\"" 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 6, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": "my_list = [2, 3, 5, 7, 11, 13]" 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 7, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": "JS = JS_TEMPLATE % str(my_list)" 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 8, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": "%%HTML\n\n
" 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 9, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": "display_javascript(JS, raw=True)" 80 | } 81 | ], 82 | "metadata": {}, 83 | "nbformat": 4, 84 | "nbformat_minor": 0 85 | } -------------------------------------------------------------------------------- /chapter6/64-nbapp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "## Customizing the Notebook interface with JavaScript" 7 | }, 8 | { 9 | "cell_type": "code", 10 | "execution_count": 1, 11 | "metadata": {}, 12 | "outputs": [], 13 | "source": "%%javascript\n\n// This function allows us to add buttons\n// to the Notebook toolbar.\nIPython.toolbar.add_buttons_group([\n{\n\n // The button's label.\n 'label': 'Renumber all code cells',\n\n // The button's icon.\n // See a list of Font-Awesome icons here:\n // http://fortawesome.github.io/Font-Awesome/icons/\n 'icon': 'fa-list-ol',\n\n // The callback function called when the button is pressed.\n 'callback': function () {\n\n // We retrieve the lists of all cells.\n var cells = IPython.notebook.get_cells();\n\n // We only keep the code cells.\n cells = cells.filter(function(c)\n {\n return c instanceof IPython.CodeCell;\n })\n\n // We set the input prompt of all code cells.\n for (var i = 0; i < cells.length; i++) {\n cells[i].set_input_prompt(i + 1);\n }\n }\n}]);" 14 | } 15 | ], 16 | "metadata": {}, 17 | "nbformat": 4, 18 | "nbformat_minor": 0 19 | } -------------------------------------------------------------------------------- /utils/gentoc.py: -------------------------------------------------------------------------------- 1 | """Generate the TOC in README.""" 2 | 3 | import os 4 | import os.path as op 5 | import json 6 | 7 | 8 | _TOC_HEADER = '## Table of contents' 9 | _CHAPTER_HEADERS = { 10 | 1: 'Getting started with IPython', 11 | 2: 'Interactive data analysis with pandas', 12 | 3: 'Numerical computing with NumPy', 13 | 4: 'Interactive plotting and Graphical Interfaces', 14 | 5: 'High-performance and parallel computing', 15 | 6: 'Customizing IPython', 16 | } 17 | 18 | 19 | def _get_readme_before_toc(file): 20 | with open(file, 'r') as f: 21 | readme = f.read() 22 | index = readme.index(_TOC_HEADER) 23 | before = readme[:index] 24 | return before 25 | 26 | 27 | def _iter_chapters(root): 28 | for i in range(1, 7): 29 | yield op.realpath(op.join(root, 'chapter{0:d}'.format(i))) 30 | 31 | 32 | def _iter_notebooks(chapter_path): 33 | for file in sorted(os.listdir(chapter_path)): 34 | if file.endswith('.ipynb'): 35 | with open(op.join(chapter_path, file), 'r') as f: 36 | text = f.read() 37 | yield op.join(chapter_path, file), 'execution_count' in text 38 | 39 | 40 | def _get_chapter_header(num): 41 | name = _CHAPTER_HEADERS[num] 42 | return '### {num}. {name}'.format(num=num, name=name) 43 | 44 | 45 | def _notebook_title(path): 46 | with open(path, 'r') as f: 47 | d = json.load(f) 48 | cells = d['cells'] 49 | if not cells: 50 | return 51 | title = d['cells'][0]['source'].replace('#', '').strip() 52 | a, b = op.basename(path)[:2] 53 | return '{0}.{1}. {2}'.format(a, b, title) 54 | 55 | 56 | def _nbviewer(path): 57 | dir = op.split(op.dirname(path))[-1] 58 | file = op.basename(path) 59 | return "http://nbviewer.ipython.org/github/ipython-books/minibook-2nd-code/blob/master/" + dir + '/' + file 60 | 61 | 62 | def _iter_all(root): 63 | for i, chapter in enumerate(_iter_chapters(root)): 64 | yield '{0}\n\n'.format(_get_chapter_header(i + 1)) 65 | for notebook, display_link in _iter_notebooks(chapter): 66 | title = _notebook_title(notebook) 67 | if not title: 68 | continue 69 | if display_link: 70 | yield '* [{0}]({1})\n'.format(title, _nbviewer(notebook)) 71 | else: 72 | yield '* {0}\n'.format(title) 73 | yield '\n' 74 | 75 | 76 | def generate(root): 77 | readme_file = op.join(root, 'README.md') 78 | before_toc = _get_readme_before_toc(readme_file) 79 | with open(readme_file, 'w') as f: 80 | f.write(before_toc) 81 | f.write(_TOC_HEADER + '\n\n') 82 | for item in _iter_all(root): 83 | f.write(item) 84 | 85 | 86 | if __name__ == '__main__': 87 | curdir = os.path.dirname(os.path.realpath(__file__)) 88 | root = op.realpath(op.join(curdir, '../')) 89 | generate(root) 90 | --------------------------------------------------------------------------------