├── .gitignore ├── .gitmodules ├── LICENSE-CODE ├── LICENSE-TEXT ├── README.md ├── environment.yml ├── notebooks ├── 00.00-Preface.ipynb ├── 01.00-IPython-Beyond-Normal-Python.ipynb ├── 01.01-Help-And-Documentation.ipynb ├── 01.02-Shell-Keyboard-Shortcuts.ipynb ├── 01.03-Magic-Commands.ipynb ├── 01.04-Input-Output-History.ipynb ├── 01.05-IPython-And-Shell-Commands.ipynb ├── 01.06-Errors-and-Debugging.ipynb ├── 01.07-Timing-and-Profiling.ipynb ├── 01.08-More-IPython-Resources.ipynb ├── 02.00-Introduction-to-NumPy.ipynb ├── 02.01-Understanding-Data-Types.ipynb ├── 02.02-The-Basics-Of-NumPy-Arrays.ipynb ├── 02.03-Computation-on-arrays-ufuncs.ipynb ├── 02.04-Computation-on-arrays-aggregates.ipynb ├── 02.05-Computation-on-arrays-broadcasting.ipynb ├── 02.06-Boolean-Arrays-and-Masks.ipynb ├── 02.07-Fancy-Indexing.ipynb ├── 02.08-Sorting.ipynb ├── 02.09-Structured-Data-NumPy.ipynb ├── 03.00-Introduction-to-Pandas.ipynb ├── 03.01-Introducing-Pandas-Objects.ipynb ├── 03.02-Data-Indexing-and-Selection.ipynb ├── 03.03-Operations-in-Pandas.ipynb ├── 03.04-Missing-Values.ipynb ├── 03.05-Hierarchical-Indexing.ipynb ├── 03.06-Concat-And-Append.ipynb ├── 03.07-Merge-and-Join.ipynb ├── 03.08-Aggregation-and-Grouping.ipynb ├── 03.09-Pivot-Tables.ipynb ├── 03.10-Working-With-Strings.ipynb ├── 03.11-Working-with-Time-Series.ipynb ├── 03.12-Performance-Eval-and-Query.ipynb ├── 03.13-Further-Resources.ipynb ├── 04.00-Introduction-To-Matplotlib.ipynb ├── 04.01-Simple-Line-Plots.ipynb ├── 04.02-Simple-Scatter-Plots.ipynb ├── 04.03-Errorbars.ipynb ├── 04.04-Density-and-Contour-Plots.ipynb ├── 04.05-Histograms-and-Binnings.ipynb ├── 04.06-Customizing-Legends.ipynb ├── 04.07-Customizing-Colorbars.ipynb ├── 04.08-Multiple-Subplots.ipynb ├── 04.09-Text-and-Annotation.ipynb ├── 04.10-Customizing-Ticks.ipynb ├── 04.11-Settings-and-Stylesheets.ipynb ├── 04.12-Three-Dimensional-Plotting.ipynb ├── 04.13-Geographic-Data-With-Basemap.ipynb ├── 04.14-Visualization-With-Seaborn.ipynb ├── 04.15-Further-Resources.ipynb ├── 05.00-Machine-Learning.ipynb ├── 05.01-What-Is-Machine-Learning.ipynb ├── 05.02-Introducing-Scikit-Learn.ipynb ├── 05.03-Hyperparameters-and-Model-Validation.ipynb ├── 05.04-Feature-Engineering.ipynb ├── 05.05-Naive-Bayes.ipynb ├── 05.06-Linear-Regression.ipynb ├── 05.07-Support-Vector-Machines.ipynb ├── 05.08-Random-Forests.ipynb ├── 05.09-Principal-Component-Analysis.ipynb ├── 05.10-Manifold-Learning.ipynb ├── 05.11-K-Means.ipynb ├── 05.12-Gaussian-Mixtures.ipynb ├── 05.13-Kernel-Density-Estimation.ipynb ├── 05.14-Image-Features.ipynb ├── 05.15-Learning-More.ipynb ├── 06.00-Figure-Code.ipynb ├── Index.ipynb ├── data │ ├── BicycleWeather.csv │ ├── Seattle2014.csv │ ├── births.csv │ ├── california_cities.csv │ ├── president_heights.csv │ ├── state-abbrevs.csv │ ├── state-areas.csv │ └── state-population.csv ├── figures │ ├── 02.05-broadcasting.png │ ├── 03.08-split-apply-combine.png │ ├── 05.01-classification-1.png │ ├── 05.01-classification-2.png │ ├── 05.01-classification-3.png │ ├── 05.01-clustering-1.png │ ├── 05.01-clustering-2.png │ ├── 05.01-dimesionality-1.png │ ├── 05.01-dimesionality-2.png │ ├── 05.01-regression-1.png │ ├── 05.01-regression-2.png │ ├── 05.01-regression-3.png │ ├── 05.01-regression-4.png │ ├── 05.02-samples-features.png │ ├── 05.03-2-fold-CV.png │ ├── 05.03-5-fold-CV.png │ ├── 05.03-bias-variance-2.png │ ├── 05.03-bias-variance.png │ ├── 05.03-learning-curve.png │ ├── 05.03-validation-curve.png │ ├── 05.05-gaussian-NB.png │ ├── 05.06-gaussian-basis.png │ ├── 05.08-decision-tree-levels.png │ ├── 05.08-decision-tree-overfitting.png │ ├── 05.08-decision-tree.png │ ├── 05.09-PCA-rotation.png │ ├── 05.09-digits-pca-components.png │ ├── 05.09-digits-pixel-components.png │ ├── 05.10-LLE-vs-MDS.png │ ├── 05.11-expectation-maximization.png │ ├── 05.12-covariance-type.png │ ├── Data_Science_VD.png │ ├── PDSH-cover-small.png │ ├── PDSH-cover.png │ ├── array_vs_list.png │ └── cint_vs_pyint.png └── helpers_05_08.py ├── requirements.txt ├── tools ├── README.md ├── add_book_info.py ├── add_navigation.py ├── fix_kernelspec.py └── generate_contents.py └── website ├── .gitignore ├── Makefile ├── README.md ├── content └── favicon.ico ├── copy_notebooks.py ├── fabfile.py ├── pelicanconf.py ├── publishconf.py └── theme ├── README.md ├── static ├── css │ └── icons.css └── font │ ├── icons.eot │ ├── icons.svg │ ├── icons.ttf │ └── icons.woff └── templates ├── _includes ├── analytics.html └── disqus_thread.html ├── about.html ├── archives.html ├── article.html ├── base.html ├── booksection.html ├── index.html ├── ipynb.css ├── main.css ├── main.less ├── page.html ├── pygments.css └── tag.html /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | 92 | # Emacs 93 | *~ 94 | 95 | 96 | # Temporary data files 97 | notebooks/recipeitems-latest.json 98 | notebooks/FremontBridge.csv 99 | notebooks/gistemp250.nc 100 | notebooks/marathon-data.csv 101 | notebooks/my_figure.png 102 | notebooks/hello.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "website/plugins/ipynb"] 2 | path = website/plugins/ipynb 3 | url = git://github.com/danielfrg/pelican-ipynb.git 4 | [submodule "website/plugins/pelican-plugins"] 5 | path = website/plugins/pelican-plugins 6 | url = git://github.com/getpelican/pelican-plugins.git 7 | -------------------------------------------------------------------------------- /LICENSE-CODE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Jacob VanderPlas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Data Science Handbook 2 | 3 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/jakevdp/PythonDataScienceHandbook/master?filepath=notebooks%2FIndex.ipynb) 4 | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/Index.ipynb) 5 | 6 | This repository contains the entire [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do), in the form of (free!) Jupyter notebooks. 7 | 8 | ![cover image](notebooks/figures/PDSH-cover.png) 9 | 10 | ## How to Use this Book 11 | 12 | - Read the book in its entirety online at https://jakevdp.github.io/PythonDataScienceHandbook/ 13 | 14 | - Run the code using the Jupyter notebooks available in this repository's [notebooks](notebooks) directory. 15 | 16 | - Launch executable versions of these notebooks using [Google Colab](http://colab.research.google.com): [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/Index.ipynb) 17 | 18 | - Launch a live notebook server with these notebooks using [binder](https://beta.mybinder.org/): [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/jakevdp/PythonDataScienceHandbook/master?filepath=notebooks%2FIndex.ipynb) 19 | 20 | - Buy the printed book through [O'Reilly Media](http://shop.oreilly.com/product/0636920034919.do) 21 | 22 | ## About 23 | 24 | The book was written and tested with Python 3.5, though other Python versions (including Python 2.7) should work in nearly all cases. 25 | 26 | The book introduces the core libraries essential for working with data in Python: particularly [IPython](http://ipython.org), [NumPy](http://numpy.org), [Pandas](http://pandas.pydata.org), [Matplotlib](http://matplotlib.org), [Scikit-Learn](http://scikit-learn.org), and related packages. 27 | Familiarity with Python as a language is assumed; if you need a quick introduction to the language itself, see the free companion project, 28 | [A Whirlwind Tour of Python](https://github.com/jakevdp/WhirlwindTourOfPython): it's a fast-paced introduction to the Python language aimed at researchers and scientists. 29 | 30 | See [Index.ipynb](http://nbviewer.jupyter.org/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/Index.ipynb) for an index of the notebooks available to accompany the text. 31 | 32 | ## Software 33 | 34 | The code in the book was tested with Python 3.5, though most (but not all) will also work correctly with Python 2.7 and other older Python versions. 35 | 36 | The packages I used to run the code in the book are listed in [requirements.txt](requirements.txt) (Note that some of these exact version numbers may not be available on your platform: you may have to tweak them for your own use). 37 | To install the requirements using [conda](http://conda.pydata.org), run the following at the command-line: 38 | 39 | ``` 40 | $ conda install --file requirements.txt 41 | ``` 42 | 43 | To create a stand-alone environment named ``PDSH`` with Python 3.5 and all the required package versions, run the following: 44 | 45 | ``` 46 | $ conda create -n PDSH python=3.5 --file requirements.txt 47 | ``` 48 | 49 | You can read more about using conda environments in the [Managing Environments](http://conda.pydata.org/docs/using/envs.html) section of the conda documentation. 50 | 51 | 52 | ## License 53 | 54 | ### Code 55 | The code in this repository, including all code samples in the notebooks listed above, is released under the [MIT license](LICENSE-CODE). Read more at the [Open Source Initiative](https://opensource.org/licenses/MIT). 56 | 57 | ### Text 58 | The text content of the book is released under the [CC-BY-NC-ND license](LICENSE-TEXT). Read more at [Creative Commons](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode). 59 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: data-science-handbook 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.5 6 | - pip: 7 | - -r requirements.txt -------------------------------------------------------------------------------- /notebooks/00.00-Preface.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "| [Contents](Index.ipynb) | [IPython: Beyond Normal Python](01.00-IPython-Beyond-Normal-Python.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Preface" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## What Is Data Science?\n", 37 | "\n", 38 | "This is a book about doing data science with Python, which immediately begs the question: what is *data science*?\n", 39 | "It's a surprisingly hard definition to nail down, especially given how ubiquitous the term has become.\n", 40 | "Vocal critics have variously dismissed the term as a superfluous label (after all, what science doesn't involve data?) or a simple buzzword that only exists to salt resumes and catch the eye of overzealous tech recruiters.\n", 41 | "\n", 42 | "In my mind, these critiques miss something important.\n", 43 | "Data science, despite its hype-laden veneer, is perhaps the best label we have for the cross-disciplinary set of skills that are becoming increasingly important in many applications across industry and academia.\n", 44 | "This cross-disciplinary piece is key: in my mind, the best extisting definition of data science is illustrated by Drew Conway's Data Science Venn Diagram, first published on his blog in September 2010:" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "![Data Science Venn Diagram](figures/Data_Science_VD.png)\n", 52 | "\n", 53 | "(Source: [Drew Conway](http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram). Used by permission.)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "While some of the intersection labels are a bit tongue-in-cheek, this diagram captures the essence of what I think people mean when they say \"data science\": it is fundamentally an *interdisciplinary* subject.\n", 61 | "Data science comprises three distinct and overlapping areas: the skills of a *statistician* who knows how to model and summarize datasets (which are growing ever larger); the skills of a *computer scientist* who can design and use algorithms to efficiently store, process, and visualize this data; and the *domain expertise*—what we might think of as \"classical\" training in a subject—necessary both to formulate the right questions and to put their answers in context.\n", 62 | "\n", 63 | "With this in mind, I would encourage you to think of data science not as a new domain of knowledge to learn, but a new set of skills that you can apply within your current area of expertise.\n", 64 | "Whether you are reporting election results, forecasting stock returns, optimizing online ad clicks, identifying microorganisms in microscope photos, seeking new classes of astronomical objects, or working with data in any other field, the goal of this book is to give you the ability to ask and answer new questions about your chosen subject area." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Who Is This Book For?\n", 72 | "\n", 73 | "In my teaching both at the University of Washington and at various tech-focused conferences and meetups, one of the most common questions I have heard is this: \"how should I learn Python?\"\n", 74 | "The people asking are generally technically minded students, developers, or researchers, often with an already strong background in writing code and using computational and numerical tools.\n", 75 | "Most of these folks don't want to learn Python *per se*, but want to learn the language with the aim of using it as a tool for data-intensive and computational science.\n", 76 | "While a large patchwork of videos, blog posts, and tutorials for this audience is available online, I've long been frustrated by the lack of a single good answer to this question; that is what inspired this book.\n", 77 | "\n", 78 | "The book is not meant to be an introduction to Python or to programming in general; I assume the reader has familiarity with the Python language, including defining functions, assigning variables, calling methods of objects, controlling the flow of a program, and other basic tasks.\n", 79 | "Instead it is meant to help Python users learn to use Python's data science stack–libraries such as IPython, NumPy, Pandas, Matplotlib, Scikit-Learn, and related tools–to effectively store, manipulate, and gain insight from data." 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "## Why Python?\n", 87 | "\n", 88 | "Python has emerged over the last couple decades as a first-class tool for scientific computing tasks, including the analysis and visualization of large datasets.\n", 89 | "This may have come as a surprise to early proponents of the Python language: the language itself was not specifically designed with data analysis or scientific computing in mind.\n", 90 | "The usefulness of Python for data science stems primarily from the large and active ecosystem of third-party packages: *NumPy* for manipulation of homogeneous array-based data, *Pandas* for manipulation of heterogeneous and labeled data, *SciPy* for common scientific computing tasks, *Matplotlib* for publication-quality visualizations, *IPython* for interactive execution and sharing of code, *Scikit-Learn* for machine learning, and many more tools that will be mentioned in the following pages.\n", 91 | "\n", 92 | "If you are looking for a guide to the Python language itself, I would suggest the sister project to this book, \"[A Whirlwind Tour of the Python Language](https://github.com/jakevdp/WhirlwindTourOfPython)\".\n", 93 | "This short report provides a tour of the essential features of the Python language, aimed at data scientists who already are familiar with one or more other programming languages." 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "### Python 2 vs Python 3\n", 101 | "\n", 102 | "This book uses the syntax of Python 3, which contains language enhancements that are not compatible with the 2.x series of Python.\n", 103 | "Though Python 3.0 was first released in 2008, adoption has been relatively slow, particularly in the scientific and web development communities.\n", 104 | "This is primarily because it took some time for many of the essential third-party packages and toolkits to be made compatible with the new language internals.\n", 105 | "Since early 2014, however, stable releases of the most important tools in the data science ecosystem have been fully compatible with both Python 2 and 3, and so this book will use the newer Python 3 syntax.\n", 106 | "However, the vast majority of code snippets in this book will also work without modification in Python 2: in cases where a Py2-incompatible syntax is used, I will make every effort to note it explicitly." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## Outline of the Book\n", 114 | "\n", 115 | "Each chapter of this book focuses on a particular package or tool that contributes a fundamental piece of the Python Data Sciece story.\n", 116 | "\n", 117 | "1. IPython and Jupyter: these packages provide the computational environment in which many Python-using data scientists work.\n", 118 | "2. NumPy: this library provides the ``ndarray`` for efficient storage and manipulation of dense data arrays in Python.\n", 119 | "3. Pandas: this library provides the ``DataFrame`` for efficient storage and manipulation of labeled/columnar data in Python.\n", 120 | "4. Matplotlib: this library provides capabilities for a flexible range of data visualizations in Python.\n", 121 | "5. Scikit-Learn: this library provides efficient & clean Python implementations of the most important and established machine learning algorithms.\n", 122 | "\n", 123 | "The PyData world is certainly much larger than these five packages, and is growing every day.\n", 124 | "With this in mind, I make every attempt through these pages to provide references to other interesting efforts, projects, and packages that are pushing the boundaries of what can be done in Python.\n", 125 | "Nevertheless, these five are currently fundamental to much of the work being done in the Python data science space, and I expect they will remain important even as the ecosystem continues growing around them." 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Using Code Examples\n", 133 | "\n", 134 | "Supplemental material (code examples, figures, etc.) is available for download at http://github.com/jakevdp/PythonDataScienceHandbook/. This book is here to help you get your job done. In general, if example code is offered with this book, you may use it in your programs and documentation. You do not need to contact us for permission unless you’re reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from this book does not require permission. Selling or distributing a CD-ROM of examples from O’Reilly books does require permission. Answering a question by citing this book and quoting example code does not require permission. Incorporating a significant amount of example code from this book into your product’s documentation does require permission.\n", 135 | "\n", 136 | "We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN. For example:\n", 137 | "\n", 138 | "> *The Python Data Science Handbook* by Jake VanderPlas (O’Reilly). Copyright 2016 Jake VanderPlas, 978-1-491-91205-8.\n", 139 | "\n", 140 | "If you feel your use of code examples falls outside fair use or the per‐ mission given above, feel free to contact us at permissions@oreilly.com." 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Installation Considerations\n", 148 | "\n", 149 | "Installing Python and the suite of libraries that enable scientific computing is straightforward . This section will outline some of the considerations when setting up your computer.\n", 150 | "\n", 151 | "Though there are various ways to install Python, the one I would suggest for use in data science is the Anaconda distribution, which works similarly whether you use Windows, Linux, or Mac OS X.\n", 152 | "The Anaconda distribution comes in two flavors:\n", 153 | "\n", 154 | "- [Miniconda](http://conda.pydata.org/miniconda.html) gives you the Python interpreter itself, along with a command-line tool called ``conda`` which operates as a cross-platform package manager geared toward Python packages, similar in spirit to the apt or yum tools that Linux users might be familiar with.\n", 155 | "\n", 156 | "- [Anaconda](https://www.continuum.io/downloads) includes both Python and conda, and additionally bundles a suite of other pre-installed packages geared toward scientific computing. Because of the size of this bundle, expect the installation to consume several gigabytes of disk space.\n", 157 | "\n", 158 | "Any of the packages included with Anaconda can also be installed manually on top of Miniconda; for this reason I suggest starting with Miniconda.\n", 159 | "\n", 160 | "To get started, download and install the Miniconda package–make sure to choose a version with Python 3–and then install the core packages used in this book:\n", 161 | "\n", 162 | "```\n", 163 | "[~]$ conda install numpy pandas scikit-learn matplotlib seaborn jupyter\n", 164 | "```\n", 165 | "\n", 166 | "Throughout the text, we will also make use of other more specialized tools in Python's scientific ecosystem; installation is usually as easy as typing **``conda install packagename``**.\n", 167 | "For more information on conda, including information about creating and using conda environments (which I would *highly* recommend), refer to [conda's online documentation](http://conda.pydata.org/docs/)." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "\n", 175 | "| [Contents](Index.ipynb) | [IPython: Beyond Normal Python](01.00-IPython-Beyond-Normal-Python.ipynb) >\n", 176 | "\n", 177 | "\"Open\n" 178 | ] 179 | } 180 | ], 181 | "metadata": { 182 | "anaconda-cloud": {}, 183 | "kernelspec": { 184 | "display_name": "Python 3", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.5.1" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 0 203 | } 204 | -------------------------------------------------------------------------------- /notebooks/01.00-IPython-Beyond-Normal-Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Preface](00.00-Preface.ipynb) | [Contents](Index.ipynb) | [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# IPython: Beyond Normal Python" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "There are many options for development environments for Python, and I'm often asked which one I use in my own work.\n", 37 | "My answer sometimes surprises people: my preferred environment is [IPython](http://ipython.org/) plus a text editor (in my case, Emacs or Atom depending on my mood).\n", 38 | "IPython (short for *Interactive Python*) was started in 2001 by Fernando Perez as an enhanced Python interpreter, and has since grown into a project aiming to provide, in Perez's words, \"Tools for the entire life cycle of research computing.\"\n", 39 | "If Python is the engine of our data science task, you might think of IPython as the interactive control panel.\n", 40 | "\n", 41 | "As well as being a useful interactive interface to Python, IPython also provides a number of useful syntactic additions to the language; we'll cover the most useful of these additions here.\n", 42 | "In addition, IPython is closely tied with the [Jupyter project](http://jupyter.org), which provides a browser-based notebook that is useful for development, collaboration, sharing, and even publication of data science results.\n", 43 | "The IPython notebook is actually a special case of the broader Jupyter notebook structure, which encompasses notebooks for Julia, R, and other programming languages.\n", 44 | "As an example of the usefulness of the notebook format, look no further than the page you are reading: the entire manuscript for this book was composed as a set of IPython notebooks.\n", 45 | "\n", 46 | "IPython is about using Python effectively for interactive scientific and data-intensive computing.\n", 47 | "This chapter will start by stepping through some of the IPython features that are useful to the practice of data science, focusing especially on the syntax it offers beyond the standard features of Python.\n", 48 | "Next, we will go into a bit more depth on some of the more useful \"magic commands\" that can speed-up common tasks in creating and using data science code.\n", 49 | "Finally, we will touch on some of the features of the notebook that make it useful in understanding data and sharing results." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Shell or Notebook?\n", 57 | "\n", 58 | "There are two primary means of using IPython that we'll discuss in this chapter: the IPython shell and the IPython notebook.\n", 59 | "The bulk of the material in this chapter is relevant to both, and the examples will switch between them depending on what is most convenient.\n", 60 | "In the few sections that are relevant to just one or the other, we will explicitly state that fact.\n", 61 | "Before we start, some words on how to launch the IPython shell and IPython notebook." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Launching the IPython Shell\n", 69 | "\n", 70 | "This chapter, like most of this book, is not designed to be absorbed passively.\n", 71 | "I recommend that as you read through it, you follow along and experiment with the tools and syntax we cover: the muscle-memory you build through doing this will be far more useful than the simple act of reading about it.\n", 72 | "Start by launching the IPython interpreter by typing **``ipython``** on the command-line; alternatively, if you've installed a distribution like Anaconda or EPD, there may be a launcher specific to your system (we'll discuss this more fully in [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)).\n", 73 | "\n", 74 | "Once you do this, you should see a prompt like the following:\n", 75 | "```\n", 76 | "IPython 4.0.1 -- An enhanced Interactive Python.\n", 77 | "? -> Introduction and overview of IPython's features.\n", 78 | "%quickref -> Quick reference.\n", 79 | "help -> Python's own help system.\n", 80 | "object? -> Details about 'object', use 'object??' for extra details.\n", 81 | "In [1]:\n", 82 | "```\n", 83 | "With that, you're ready to follow along." 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "### Launching the Jupyter Notebook\n", 91 | "\n", 92 | "The Jupyter notebook is a browser-based graphical interface to the IPython shell, and builds on it a rich set of dynamic display capabilities.\n", 93 | "As well as executing Python/IPython statements, the notebook allows the user to include formatted text, static and dynamic visualizations, mathematical equations, JavaScript widgets, and much more.\n", 94 | "Furthermore, these documents can be saved in a way that lets other people open them and execute the code on their own systems.\n", 95 | "\n", 96 | "Though the IPython notebook is viewed and edited through your web browser window, it must connect to a running Python process in order to execute code.\n", 97 | "This process (known as a \"kernel\") can be started by running the following command in your system shell:\n", 98 | "\n", 99 | "```\n", 100 | "$ jupyter notebook\n", 101 | "```\n", 102 | "\n", 103 | "This command will launch a local web server that will be visible to your browser.\n", 104 | "It immediately spits out a log showing what it is doing; that log will look something like this:\n", 105 | "\n", 106 | "```\n", 107 | "$ jupyter notebook\n", 108 | "[NotebookApp] Serving notebooks from local directory: /Users/jakevdp/PythonDataScienceHandbook\n", 109 | "[NotebookApp] 0 active kernels \n", 110 | "[NotebookApp] The IPython Notebook is running at: http://localhost:8888/\n", 111 | "[NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).\n", 112 | "```\n", 113 | "\n", 114 | "Upon issuing the command, your default browser should automatically open and navigate to the listed local URL;\n", 115 | "the exact address will depend on your system.\n", 116 | "If the browser does not open automatically, you can open a window and manually open this address (*http://localhost:8888/* in this example)." 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "\n", 124 | "< [Preface](00.00-Preface.ipynb) | [Contents](Index.ipynb) | [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb) >\n", 125 | "\n", 126 | "\"Open\n" 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "anaconda-cloud": {}, 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.5.1" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 0 152 | } 153 | -------------------------------------------------------------------------------- /notebooks/01.02-Shell-Keyboard-Shortcuts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb) | [Contents](Index.ipynb) | [IPython Magic Commands](01.03-Magic-Commands.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Keyboard Shortcuts in the IPython Shell" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "If you spend any amount of time on the computer, you've probably found a use for keyboard shortcuts in your workflow.\n", 37 | "Most familiar perhaps are the Cmd-C and Cmd-V (or Ctrl-C and Ctrl-V) for copying and pasting in a wide variety of programs and systems.\n", 38 | "Power-users tend to go even further: popular text editors like Emacs, Vim, and others provide users an incredible range of operations through intricate combinations of keystrokes.\n", 39 | "\n", 40 | "The IPython shell doesn't go this far, but does provide a number of keyboard shortcuts for fast navigation while typing commands.\n", 41 | "These shortcuts are not in fact provided by IPython itself, but through its dependency on the GNU Readline library: as such, some of the following shortcuts may differ depending on your system configuration.\n", 42 | "Also, while some of these shortcuts do work in the browser-based notebook, this section is primarily about shortcuts in the IPython shell.\n", 43 | "\n", 44 | "Once you get accustomed to these, they can be very useful for quickly performing certain commands without moving your hands from the \"home\" keyboard position.\n", 45 | "If you're an Emacs user or if you have experience with Linux-style shells, the following will be very familiar.\n", 46 | "We'll group these shortcuts into a few categories: *navigation shortcuts*, *text entry shortcuts*, *command history shortcuts*, and *miscellaneous shortcuts*." 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Navigation shortcuts\n", 54 | "\n", 55 | "While the use of the left and right arrow keys to move backward and forward in the line is quite obvious, there are other options that don't require moving your hands from the \"home\" keyboard position:\n", 56 | "\n", 57 | "| Keystroke | Action |\n", 58 | "|-----------------------------------|--------------------------------------------|\n", 59 | "| ``Ctrl-a`` | Move cursor to the beginning of the line |\n", 60 | "| ``Ctrl-e`` | Move cursor to the end of the line |\n", 61 | "| ``Ctrl-b`` or the left arrow key | Move cursor back one character |\n", 62 | "| ``Ctrl-f`` or the right arrow key | Move cursor forward one character |" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Text Entry Shortcuts\n", 70 | "\n", 71 | "While everyone is familiar with using the Backspace key to delete the previous character, reaching for the key often requires some minor finger gymnastics, and it only deletes a single character at a time.\n", 72 | "In IPython there are several shortcuts for removing some portion of the text you're typing.\n", 73 | "The most immediately useful of these are the commands to delete entire lines of text.\n", 74 | "You'll know these have become second-nature if you find yourself using a combination of Ctrl-b and Ctrl-d instead of reaching for Backspace to delete the previous character!\n", 75 | "\n", 76 | "| Keystroke | Action |\n", 77 | "|-------------------------------|--------------------------------------------------|\n", 78 | "| Backspace key | Delete previous character in line |\n", 79 | "| ``Ctrl-d`` | Delete next character in line |\n", 80 | "| ``Ctrl-k`` | Cut text from cursor to end of line |\n", 81 | "| ``Ctrl-u`` | Cut text from beginning of line to cursor |\n", 82 | "| ``Ctrl-y`` | Yank (i.e. paste) text that was previously cut |\n", 83 | "| ``Ctrl-t`` | Transpose (i.e., switch) previous two characters |" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## Command History Shortcuts\n", 91 | "\n", 92 | "Perhaps the most impactful shortcuts discussed here are the ones IPython provides for navigating the command history.\n", 93 | "This command history goes beyond your current IPython session: your entire command history is stored in a SQLite database in your IPython profile directory.\n", 94 | "The most straightforward way to access these is with the up and down arrow keys to step through the history, but other options exist as well:\n", 95 | "\n", 96 | "| Keystroke | Action |\n", 97 | "|-------------------------------------|--------------------------------------------|\n", 98 | "| ``Ctrl-p`` (or the up arrow key) | Access previous command in history |\n", 99 | "| ``Ctrl-n`` (or the down arrow key) | Access next command in history |\n", 100 | "| ``Ctrl-r`` | Reverse-search through command history |" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "The reverse-search can be particularly useful.\n", 108 | "Recall that in the previous section we defined a function called ``square``.\n", 109 | "Let's reverse-search our Python history from a new IPython shell and find this definition again.\n", 110 | "When you press Ctrl-r in the IPython terminal, you'll see the following prompt:\n", 111 | "\n", 112 | "```ipython\n", 113 | "In [1]:\n", 114 | "(reverse-i-search)`': \n", 115 | "```\n", 116 | "\n", 117 | "If you start typing characters at this prompt, IPython will auto-fill the most recent command, if any, that matches those characters:\n", 118 | "\n", 119 | "```ipython\n", 120 | "In [1]: \n", 121 | "(reverse-i-search)`sqa': square??\n", 122 | "```\n", 123 | "\n", 124 | "At any point, you can add more characters to refine the search, or press Ctrl-r again to search further for another command that matches the query. If you followed along in the previous section, pressing Ctrl-r twice more gives:\n", 125 | "\n", 126 | "```ipython\n", 127 | "In [1]: \n", 128 | "(reverse-i-search)`sqa': def square(a):\n", 129 | " \"\"\"Return the square of a\"\"\"\n", 130 | " return a ** 2\n", 131 | "```\n", 132 | "\n", 133 | "Once you have found the command you're looking for, press Return and the search will end.\n", 134 | "We can then use the retrieved command, and carry-on with our session:\n", 135 | "\n", 136 | "```ipython\n", 137 | "In [1]: def square(a):\n", 138 | " \"\"\"Return the square of a\"\"\"\n", 139 | " return a ** 2\n", 140 | "\n", 141 | "In [2]: square(2)\n", 142 | "Out[2]: 4\n", 143 | "```\n", 144 | "\n", 145 | "Note that Ctrl-p/Ctrl-n or the up/down arrow keys can also be used to search through history, but only by matching characters at the beginning of the line.\n", 146 | "That is, if you type **``def``** and then press Ctrl-p, it would find the most recent command (if any) in your history that begins with the characters ``def``." 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "## Miscellaneous Shortcuts\n", 154 | "\n", 155 | "Finally, there are a few miscellaneous shortcuts that don't fit into any of the preceding categories, but are nevertheless useful to know:\n", 156 | "\n", 157 | "| Keystroke | Action |\n", 158 | "|-------------------------------|--------------------------------------------|\n", 159 | "| ``Ctrl-l`` | Clear terminal screen |\n", 160 | "| ``Ctrl-c`` | Interrupt current Python command |\n", 161 | "| ``Ctrl-d`` | Exit IPython session |\n", 162 | "\n", 163 | "The Ctrl-c in particular can be useful when you inadvertently start a very long-running job." 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "While some of the shortcuts discussed here may seem a bit tedious at first, they quickly become automatic with practice.\n", 171 | "Once you develop that muscle memory, I suspect you will even find yourself wishing they were available in other contexts." 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "\n", 179 | "< [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb) | [Contents](Index.ipynb) | [IPython Magic Commands](01.03-Magic-Commands.ipynb) >\n", 180 | "\n", 181 | "\"Open\n" 182 | ] 183 | } 184 | ], 185 | "metadata": { 186 | "anaconda-cloud": {}, 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.5.1" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 0 207 | } 208 | -------------------------------------------------------------------------------- /notebooks/01.03-Magic-Commands.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Keyboard Shortcuts in the IPython Shell](01.02-Shell-Keyboard-Shortcuts.ipynb) | [Contents](Index.ipynb) | [Input and Output History](01.04-Input-Output-History.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# IPython Magic Commands" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "The previous two sections showed how IPython lets you use and explore Python efficiently and interactively.\n", 37 | "Here we'll begin discussing some of the enhancements that IPython adds on top of the normal Python syntax.\n", 38 | "These are known in IPython as *magic commands*, and are prefixed by the ``%`` character.\n", 39 | "These magic commands are designed to succinctly solve various common problems in standard data analysis.\n", 40 | "Magic commands come in two flavors: *line magics*, which are denoted by a single ``%`` prefix and operate on a single line of input, and *cell magics*, which are denoted by a double ``%%`` prefix and operate on multiple lines of input.\n", 41 | "We'll demonstrate and discuss a few brief examples here, and come back to more focused discussion of several useful magic commands later in the chapter." 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## Pasting Code Blocks: ``%paste`` and ``%cpaste``\n", 49 | "\n", 50 | "When working in the IPython interpreter, one common gotcha is that pasting multi-line code blocks can lead to unexpected errors, especially when indentation and interpreter markers are involved.\n", 51 | "A common case is that you find some example code on a website and want to paste it into your interpreter.\n", 52 | "Consider the following simple function:\n", 53 | "\n", 54 | "``` python\n", 55 | ">>> def donothing(x):\n", 56 | "... return x\n", 57 | "\n", 58 | "```\n", 59 | "The code is formatted as it would appear in the Python interpreter, and if you copy and paste this directly into IPython you get an error:\n", 60 | "\n", 61 | "```ipython\n", 62 | "In [2]: >>> def donothing(x):\n", 63 | " ...: ... return x\n", 64 | " ...: \n", 65 | " File \"\", line 2\n", 66 | " ... return x\n", 67 | " ^\n", 68 | "SyntaxError: invalid syntax\n", 69 | "```\n", 70 | "\n", 71 | "In the direct paste, the interpreter is confused by the additional prompt characters.\n", 72 | "But never fear–IPython's ``%paste`` magic function is designed to handle this exact type of multi-line, marked-up input:\n", 73 | "\n", 74 | "```ipython\n", 75 | "In [3]: %paste\n", 76 | ">>> def donothing(x):\n", 77 | "... return x\n", 78 | "\n", 79 | "## -- End pasted text --\n", 80 | "```\n", 81 | "\n", 82 | "The ``%paste`` command both enters and executes the code, so now the function is ready to be used:\n", 83 | "\n", 84 | "```ipython\n", 85 | "In [4]: donothing(10)\n", 86 | "Out[4]: 10\n", 87 | "```\n", 88 | "\n", 89 | "A command with a similar intent is ``%cpaste``, which opens up an interactive multiline prompt in which you can paste one or more chunks of code to be executed in a batch:\n", 90 | "\n", 91 | "```ipython\n", 92 | "In [5]: %cpaste\n", 93 | "Pasting code; enter '--' alone on the line to stop or use Ctrl-D.\n", 94 | ":>>> def donothing(x):\n", 95 | ":... return x\n", 96 | ":--\n", 97 | "```\n", 98 | "\n", 99 | "These magic commands, like others we'll see, make available functionality that would be difficult or impossible in a standard Python interpreter." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Running External Code: ``%run``\n", 107 | "As you begin developing more extensive code, you will likely find yourself working in both IPython for interactive exploration, as well as a text editor to store code that you want to reuse.\n", 108 | "Rather than running this code in a new window, it can be convenient to run it within your IPython session.\n", 109 | "This can be done with the ``%run`` magic.\n", 110 | "\n", 111 | "For example, imagine you've created a ``myscript.py`` file with the following contents:\n", 112 | "\n", 113 | "```python\n", 114 | "#-------------------------------------\n", 115 | "# file: myscript.py\n", 116 | "\n", 117 | "def square(x):\n", 118 | " \"\"\"square a number\"\"\"\n", 119 | " return x ** 2\n", 120 | "\n", 121 | "for N in range(1, 4):\n", 122 | " print(N, \"squared is\", square(N))\n", 123 | "```\n", 124 | "\n", 125 | "You can execute this from your IPython session as follows:\n", 126 | "\n", 127 | "```ipython\n", 128 | "In [6]: %run myscript.py\n", 129 | "1 squared is 1\n", 130 | "2 squared is 4\n", 131 | "3 squared is 9\n", 132 | "```\n", 133 | "\n", 134 | "Note also that after you've run this script, any functions defined within it are available for use in your IPython session:\n", 135 | "\n", 136 | "```ipython\n", 137 | "In [7]: square(5)\n", 138 | "Out[7]: 25\n", 139 | "```\n", 140 | "\n", 141 | "There are several options to fine-tune how your code is run; you can see the documentation in the normal way, by typing **``%run?``** in the IPython interpreter." 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## Timing Code Execution: ``%timeit``\n", 149 | "Another example of a useful magic function is ``%timeit``, which will automatically determine the execution time of the single-line Python statement that follows it.\n", 150 | "For example, we may want to check the performance of a list comprehension:\n", 151 | "\n", 152 | "```ipython\n", 153 | "In [8]: %timeit L = [n ** 2 for n in range(1000)]\n", 154 | "1000 loops, best of 3: 325 µs per loop\n", 155 | "```\n", 156 | "\n", 157 | "The benefit of ``%timeit`` is that for short commands it will automatically perform multiple runs in order to attain more robust results.\n", 158 | "For multi line statements, adding a second ``%`` sign will turn this into a cell magic that can handle multiple lines of input.\n", 159 | "For example, here's the equivalent construction with a ``for``-loop:\n", 160 | "\n", 161 | "```ipython\n", 162 | "In [9]: %%timeit\n", 163 | " ...: L = []\n", 164 | " ...: for n in range(1000):\n", 165 | " ...: L.append(n ** 2)\n", 166 | " ...: \n", 167 | "1000 loops, best of 3: 373 µs per loop\n", 168 | "```\n", 169 | "\n", 170 | "We can immediately see that list comprehensions are about 10% faster than the equivalent ``for``-loop construction in this case.\n", 171 | "We'll explore ``%timeit`` and other approaches to timing and profiling code in [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb)." 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Help on Magic Functions: ``?``, ``%magic``, and ``%lsmagic``\n", 179 | "\n", 180 | "Like normal Python functions, IPython magic functions have docstrings, and this useful\n", 181 | "documentation can be accessed in the standard manner.\n", 182 | "So, for example, to read the documentation of the ``%timeit`` magic simply type this:\n", 183 | "\n", 184 | "```ipython\n", 185 | "In [10]: %timeit?\n", 186 | "```\n", 187 | "\n", 188 | "Documentation for other functions can be accessed similarly.\n", 189 | "To access a general description of available magic functions, including some examples, you can type this:\n", 190 | "\n", 191 | "```ipython\n", 192 | "In [11]: %magic\n", 193 | "```\n", 194 | "\n", 195 | "For a quick and simple list of all available magic functions, type this:\n", 196 | "\n", 197 | "```ipython\n", 198 | "In [12]: %lsmagic\n", 199 | "```\n", 200 | "\n", 201 | "Finally, I'll mention that it is quite straightforward to define your own magic functions if you wish.\n", 202 | "We won't discuss it here, but if you are interested, see the references listed in [More IPython Resources](01.08-More-IPython-Resources.ipynb)." 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "\n", 210 | "< [Keyboard Shortcuts in the IPython Shell](01.02-Shell-Keyboard-Shortcuts.ipynb) | [Contents](Index.ipynb) | [Input and Output History](01.04-Input-Output-History.ipynb) >\n", 211 | "\n", 212 | "\"Open\n" 213 | ] 214 | } 215 | ], 216 | "metadata": { 217 | "anaconda-cloud": {}, 218 | "kernelspec": { 219 | "display_name": "Python 3", 220 | "language": "python", 221 | "name": "python3" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.1" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 0 238 | } 239 | -------------------------------------------------------------------------------- /notebooks/01.04-Input-Output-History.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [IPython Magic Commands](01.03-Magic-Commands.ipynb) | [Contents](Index.ipynb) | [IPython and Shell Commands](01.05-IPython-And-Shell-Commands.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Input and Output History" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "Previously we saw that the IPython shell allows you to access previous commands with the up and down arrow keys, or equivalently the Ctrl-p/Ctrl-n shortcuts.\n", 37 | "Additionally, in both the shell and the notebook, IPython exposes several ways to obtain the output of previous commands, as well as string versions of the commands themselves.\n", 38 | "We'll explore those here." 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## IPython's ``In`` and ``Out`` Objects\n", 46 | "\n", 47 | "By now I imagine you're quite familiar with the ``In [1]:``/``Out[1]:`` style prompts used by IPython.\n", 48 | "But it turns out that these are not just pretty decoration: they give a clue as to how you can access previous inputs and outputs in your current session.\n", 49 | "Imagine you start a session that looks like this:\n", 50 | "\n", 51 | "```ipython\n", 52 | "In [1]: import math\n", 53 | "\n", 54 | "In [2]: math.sin(2)\n", 55 | "Out[2]: 0.9092974268256817\n", 56 | "\n", 57 | "In [3]: math.cos(2)\n", 58 | "Out[3]: -0.4161468365471424\n", 59 | "```" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "We've imported the built-in ``math`` package, then computed the sine and the cosine of the number 2.\n", 67 | "These inputs and outputs are displayed in the shell with ``In``/``Out`` labels, but there's more–IPython actually creates some Python variables called ``In`` and ``Out`` that are automatically updated to reflect this history:\n", 68 | "\n", 69 | "```ipython\n", 70 | "In [4]: print(In)\n", 71 | "['', 'import math', 'math.sin(2)', 'math.cos(2)', 'print(In)']\n", 72 | "\n", 73 | "In [5]: Out\n", 74 | "Out[5]: {2: 0.9092974268256817, 3: -0.4161468365471424}\n", 75 | "```" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "The ``In`` object is a list, which keeps track of the commands in order (the first item in the list is a place-holder so that ``In[1]`` can refer to the first command):\n", 83 | "\n", 84 | "```ipython\n", 85 | "In [6]: print(In[1])\n", 86 | "import math\n", 87 | "```\n", 88 | "\n", 89 | "The ``Out`` object is not a list but a dictionary mapping input numbers to their outputs (if any):\n", 90 | "\n", 91 | "```ipython\n", 92 | "In [7]: print(Out[2])\n", 93 | "0.9092974268256817\n", 94 | "```\n", 95 | "\n", 96 | "Note that not all operations have outputs: for example, ``import`` statements and ``print`` statements don't affect the output.\n", 97 | "The latter may be surprising, but makes sense if you consider that ``print`` is a function that returns ``None``; for brevity, any command that returns ``None`` is not added to ``Out``.\n", 98 | "\n", 99 | "Where this can be useful is if you want to interact with past results.\n", 100 | "For example, let's check the sum of ``sin(2) ** 2`` and ``cos(2) ** 2`` using the previously-computed results:\n", 101 | "\n", 102 | "```ipython\n", 103 | "In [8]: Out[2] ** 2 + Out[3] ** 2\n", 104 | "Out[8]: 1.0\n", 105 | "```\n", 106 | "\n", 107 | "The result is ``1.0`` as we'd expect from the well-known trigonometric identity.\n", 108 | "In this case, using these previous results probably is not necessary, but it can become very handy if you execute a very expensive computation and want to reuse the result!" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "## Underscore Shortcuts and Previous Outputs\n", 116 | "\n", 117 | "The standard Python shell contains just one simple shortcut for accessing previous output; the variable ``_`` (i.e., a single underscore) is kept updated with the previous output; this works in IPython as well:\n", 118 | "\n", 119 | "```ipython\n", 120 | "In [9]: print(_)\n", 121 | "1.0\n", 122 | "```\n", 123 | "\n", 124 | "But IPython takes this a bit further—you can use a double underscore to access the second-to-last output, and a triple underscore to access the third-to-last output (skipping any commands with no output):\n", 125 | "\n", 126 | "```ipython\n", 127 | "In [10]: print(__)\n", 128 | "-0.4161468365471424\n", 129 | "\n", 130 | "In [11]: print(___)\n", 131 | "0.9092974268256817\n", 132 | "```\n", 133 | "\n", 134 | "IPython stops there: more than three underscores starts to get a bit hard to count, and at that point it's easier to refer to the output by line number.\n", 135 | "\n", 136 | "There is one more shortcut we should mention, however–a shorthand for ``Out[X]`` is ``_X`` (i.e., a single underscore followed by the line number):\n", 137 | "\n", 138 | "```ipython\n", 139 | "In [12]: Out[2]\n", 140 | "Out[12]: 0.9092974268256817\n", 141 | "\n", 142 | "In [13]: _2\n", 143 | "Out[13]: 0.9092974268256817\n", 144 | "```" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## Suppressing Output\n", 152 | "Sometimes you might wish to suppress the output of a statement (this is perhaps most common with the plotting commands that we'll explore in [Introduction to Matplotlib](04.00-Introduction-To-Matplotlib.ipynb)).\n", 153 | "Or maybe the command you're executing produces a result that you'd prefer not like to store in your output history, perhaps so that it can be deallocated when other references are removed.\n", 154 | "The easiest way to suppress the output of a command is to add a semicolon to the end of the line:\n", 155 | "\n", 156 | "```ipython\n", 157 | "In [14]: math.sin(2) + math.cos(2);\n", 158 | "```\n", 159 | "\n", 160 | "Note that the result is computed silently, and the output is neither displayed on the screen or stored in the ``Out`` dictionary:\n", 161 | "\n", 162 | "```ipython\n", 163 | "In [15]: 14 in Out\n", 164 | "Out[15]: False\n", 165 | "```" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Related Magic Commands\n", 173 | "For accessing a batch of previous inputs at once, the ``%history`` magic command is very helpful.\n", 174 | "Here is how you can print the first four inputs:\n", 175 | "\n", 176 | "```ipython\n", 177 | "In [16]: %history -n 1-4\n", 178 | " 1: import math\n", 179 | " 2: math.sin(2)\n", 180 | " 3: math.cos(2)\n", 181 | " 4: print(In)\n", 182 | "```\n", 183 | "\n", 184 | "As usual, you can type ``%history?`` for more information and a description of options available.\n", 185 | "Other similar magic commands are ``%rerun`` (which will re-execute some portion of the command history) and ``%save`` (which saves some set of the command history to a file).\n", 186 | "For more information, I suggest exploring these using the ``?`` help functionality discussed in [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)." 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "\n", 194 | "< [IPython Magic Commands](01.03-Magic-Commands.ipynb) | [Contents](Index.ipynb) | [IPython and Shell Commands](01.05-IPython-And-Shell-Commands.ipynb) >\n", 195 | "\n", 196 | "\"Open\n" 197 | ] 198 | } 199 | ], 200 | "metadata": { 201 | "anaconda-cloud": {}, 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.5.1" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 0 222 | } 223 | -------------------------------------------------------------------------------- /notebooks/01.05-IPython-And-Shell-Commands.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Input and Output History](01.04-Input-Output-History.ipynb) | [Contents](Index.ipynb) | [Errors and Debugging](01.06-Errors-and-Debugging.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# IPython and Shell Commands" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "When working interactively with the standard Python interpreter, one of the frustrations is the need to switch between multiple windows to access Python tools and system command-line tools.\n", 37 | "IPython bridges this gap, and gives you a syntax for executing shell commands directly from within the IPython terminal.\n", 38 | "The magic happens with the exclamation point: anything appearing after ``!`` on a line will be executed not by the Python kernel, but by the system command-line.\n", 39 | "\n", 40 | "The following assumes you're on a Unix-like system, such as Linux or Mac OSX.\n", 41 | "Some of the examples that follow will fail on Windows, which uses a different type of shell by default (though with the 2016 announcement of native Bash shells on Windows, soon this may no longer be an issue!).\n", 42 | "If you're unfamiliar with shell commands, I'd suggest reviewing the [Shell Tutorial](http://swcarpentry.github.io/shell-novice/) put together by the always excellent Software Carpentry Foundation." 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## Quick Introduction to the Shell\n", 50 | "\n", 51 | "A full intro to using the shell/terminal/command-line is well beyond the scope of this chapter, but for the uninitiated we will offer a quick introduction here.\n", 52 | "The shell is a way to interact textually with your computer.\n", 53 | "Ever since the mid 1980s, when Microsoft and Apple introduced the first versions of their now ubiquitous graphical operating systems, most computer users have interacted with their operating system through familiar clicking of menus and drag-and-drop movements.\n", 54 | "But operating systems existed long before these graphical user interfaces, and were primarily controlled through sequences of text input: at the prompt, the user would type a command, and the computer would do what the user told it to.\n", 55 | "Those early prompt systems are the precursors of the shells and terminals that most active data scientists still use today.\n", 56 | "\n", 57 | "Someone unfamiliar with the shell might ask why you would bother with this, when many results can be accomplished by simply clicking on icons and menus.\n", 58 | "A shell user might reply with another question: why hunt icons and click menus when you can accomplish things much more easily by typing?\n", 59 | "While it might sound like a typical tech preference impasse, when moving beyond basic tasks it quickly becomes clear that the shell offers much more control of advanced tasks, though admittedly the learning curve can intimidate the average computer user.\n", 60 | "\n", 61 | "As an example, here is a sample of a Linux/OSX shell session where a user explores, creates, and modifies directories and files on their system (``osx:~ $`` is the prompt, and everything after the ``$`` sign is the typed command; text that is preceded by a ``#`` is meant just as description, rather than something you would actually type in):\n", 62 | "\n", 63 | "```bash\n", 64 | "osx:~ $ echo \"hello world\" # echo is like Python's print function\n", 65 | "hello world\n", 66 | "\n", 67 | "osx:~ $ pwd # pwd = print working directory\n", 68 | "/home/jake # this is the \"path\" that we're sitting in\n", 69 | "\n", 70 | "osx:~ $ ls # ls = list working directory contents\n", 71 | "notebooks projects \n", 72 | "\n", 73 | "osx:~ $ cd projects/ # cd = change directory\n", 74 | "\n", 75 | "osx:projects $ pwd\n", 76 | "/home/jake/projects\n", 77 | "\n", 78 | "osx:projects $ ls\n", 79 | "datasci_book mpld3 myproject.txt\n", 80 | "\n", 81 | "osx:projects $ mkdir myproject # mkdir = make new directory\n", 82 | "\n", 83 | "osx:projects $ cd myproject/\n", 84 | "\n", 85 | "osx:myproject $ mv ../myproject.txt ./ # mv = move file. Here we're moving the\n", 86 | " # file myproject.txt from one directory\n", 87 | " # up (../) to the current directory (./)\n", 88 | "osx:myproject $ ls\n", 89 | "myproject.txt\n", 90 | "```\n", 91 | "\n", 92 | "Notice that all of this is just a compact way to do familiar operations (navigating a directory structure, creating a directory, moving a file, etc.) by typing commands rather than clicking icons and menus.\n", 93 | "Note that with just a few commands (``pwd``, ``ls``, ``cd``, ``mkdir``, and ``cp``) you can do many of the most common file operations.\n", 94 | "It's when you go beyond these basics that the shell approach becomes really powerful." 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## Shell Commands in IPython\n", 102 | "\n", 103 | "Any command that works at the command-line can be used in IPython by prefixing it with the ``!`` character.\n", 104 | "For example, the ``ls``, ``pwd``, and ``echo`` commands can be run as follows:\n", 105 | "\n", 106 | "```ipython\n", 107 | "In [1]: !ls\n", 108 | "myproject.txt\n", 109 | "\n", 110 | "In [2]: !pwd\n", 111 | "/home/jake/projects/myproject\n", 112 | "\n", 113 | "In [3]: !echo \"printing from the shell\"\n", 114 | "printing from the shell\n", 115 | "```" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Passing Values to and from the Shell\n", 123 | "\n", 124 | "Shell commands can not only be called from IPython, but can also be made to interact with the IPython namespace.\n", 125 | "For example, you can save the output of any shell command to a Python list using the assignment operator:\n", 126 | "\n", 127 | "```ipython\n", 128 | "In [4]: contents = !ls\n", 129 | "\n", 130 | "In [5]: print(contents)\n", 131 | "['myproject.txt']\n", 132 | "\n", 133 | "In [6]: directory = !pwd\n", 134 | "\n", 135 | "In [7]: print(directory)\n", 136 | "['/Users/jakevdp/notebooks/tmp/myproject']\n", 137 | "```\n", 138 | "\n", 139 | "Note that these results are not returned as lists, but as a special shell return type defined in IPython:\n", 140 | "\n", 141 | "```ipython\n", 142 | "In [8]: type(directory)\n", 143 | "IPython.utils.text.SList\n", 144 | "```\n", 145 | "\n", 146 | "This looks and acts a lot like a Python list, but has additional functionality, such as\n", 147 | "the ``grep`` and ``fields`` methods and the ``s``, ``n``, and ``p`` properties that allow you to search, filter, and display the results in convenient ways.\n", 148 | "For more information on these, you can use IPython's built-in help features." 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "Communication in the other direction–passing Python variables into the shell–is possible using the ``{varname}`` syntax:\n", 156 | "\n", 157 | "```ipython\n", 158 | "In [9]: message = \"hello from Python\"\n", 159 | "\n", 160 | "In [10]: !echo {message}\n", 161 | "hello from Python\n", 162 | "```\n", 163 | "\n", 164 | "The curly braces contain the variable name, which is replaced by the variable's contents in the shell command." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "# Shell-Related Magic Commands\n", 172 | "\n", 173 | "If you play with IPython's shell commands for a while, you might notice that you cannot use ``!cd`` to navigate the filesystem:\n", 174 | "\n", 175 | "```ipython\n", 176 | "In [11]: !pwd\n", 177 | "/home/jake/projects/myproject\n", 178 | "\n", 179 | "In [12]: !cd ..\n", 180 | "\n", 181 | "In [13]: !pwd\n", 182 | "/home/jake/projects/myproject\n", 183 | "```\n", 184 | "\n", 185 | "The reason is that shell commands in the notebook are executed in a temporary subshell.\n", 186 | "If you'd like to change the working directory in a more enduring way, you can use the ``%cd`` magic command:\n", 187 | "\n", 188 | "```ipython\n", 189 | "In [14]: %cd ..\n", 190 | "/home/jake/projects\n", 191 | "```\n", 192 | "\n", 193 | "In fact, by default you can even use this without the ``%`` sign:\n", 194 | "\n", 195 | "```ipython\n", 196 | "In [15]: cd myproject\n", 197 | "/home/jake/projects/myproject\n", 198 | "```\n", 199 | "\n", 200 | "This is known as an ``automagic`` function, and this behavior can be toggled with the ``%automagic`` magic function.\n", 201 | "\n", 202 | "Besides ``%cd``, other available shell-like magic functions are ``%cat``, ``%cp``, ``%env``, ``%ls``, ``%man``, ``%mkdir``, ``%more``, ``%mv``, ``%pwd``, ``%rm``, and ``%rmdir``, any of which can be used without the ``%`` sign if ``automagic`` is on.\n", 203 | "This makes it so that you can almost treat the IPython prompt as if it's a normal shell:\n", 204 | "\n", 205 | "```ipython\n", 206 | "In [16]: mkdir tmp\n", 207 | "\n", 208 | "In [17]: ls\n", 209 | "myproject.txt tmp/\n", 210 | "\n", 211 | "In [18]: cp myproject.txt tmp/\n", 212 | "\n", 213 | "In [19]: ls tmp\n", 214 | "myproject.txt\n", 215 | "\n", 216 | "In [20]: rm -r tmp\n", 217 | "```\n", 218 | "\n", 219 | "This access to the shell from within the same terminal window as your Python session means that there is a lot less switching back and forth between interpreter and shell as you write your Python code." 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "\n", 227 | "< [Input and Output History](01.04-Input-Output-History.ipynb) | [Contents](Index.ipynb) | [Errors and Debugging](01.06-Errors-and-Debugging.ipynb) >\n", 228 | "\n", 229 | "\"Open\n" 230 | ] 231 | } 232 | ], 233 | "metadata": { 234 | "anaconda-cloud": {}, 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.5.1" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 0 255 | } 256 | -------------------------------------------------------------------------------- /notebooks/01.08-More-IPython-Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb) | [Contents](Index.ipynb) | [Introduction to NumPy](02.00-Introduction-to-NumPy.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# More IPython Resources" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "In this chapter, we've just scratched the surface of using IPython to enable data science tasks.\n", 37 | "Much more information is available both in print and on the Web, and here we'll list some other resources that you may find helpful." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Web Resources\n", 45 | "\n", 46 | "- [The IPython website](http://ipython.org): The IPython website links to documentation, examples, tutorials, and a variety of other resources.\n", 47 | "- [The nbviewer website](http://nbviewer.jupyter.org/): This site shows static renderings of any IPython notebook available on the internet. The front page features some example notebooks that you can browse to see what other folks are using IPython for!\n", 48 | "- [A gallery of interesting Jupyter Notebooks](https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks/): This ever-growing list of notebooks, powered by nbviewer, shows the depth and breadth of numerical analysis you can do with IPython. It includes everything from short examples and tutorials to full-blown courses and books composed in the notebook format!\n", 49 | "- Video Tutorials: searching the Internet, you will find many video-recorded tutorials on IPython. I'd especially recommend seeking tutorials from the PyCon, SciPy, and PyData conferenes by Fernando Perez and Brian Granger, two of the primary creators and maintainers of IPython and Jupyter." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Books\n", 57 | "\n", 58 | "- [*Python for Data Analysis*](http://shop.oreilly.com/product/0636920023784.do): Wes McKinney's book includes a chapter that covers using IPython as a data scientist. Although much of the material overlaps what we've discussed here, another perspective is always helpful.\n", 59 | "- [*Learning IPython for Interactive Computing and Data Visualization*](https://www.packtpub.com/big-data-and-business-intelligence/learning-ipython-interactive-computing-and-data-visualization): This short book by Cyrille Rossant offers a good introduction to using IPython for data analysis.\n", 60 | "- [*IPython Interactive Computing and Visualization Cookbook*](https://www.packtpub.com/big-data-and-business-intelligence/ipython-interactive-computing-and-visualization-cookbook): Also by Cyrille Rossant, this book is a longer and more advanced treatment of using IPython for data science. Despite its name, it's not just about IPython–it also goes into some depth on a broad range of data science topics.\n", 61 | "\n", 62 | "Finally, a reminder that you can find help on your own: IPython's ``?``-based help functionality (discussed in [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)) can be very useful if you use it well and use it often.\n", 63 | "As you go through the examples here and elsewhere, this can be used to familiarize yourself with all the tools that IPython has to offer." 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "\n", 71 | "< [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb) | [Contents](Index.ipynb) | [Introduction to NumPy](02.00-Introduction-to-NumPy.ipynb) >\n", 72 | "\n", 73 | "\"Open\n" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "anaconda-cloud": {}, 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.5.1" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 0 99 | } 100 | -------------------------------------------------------------------------------- /notebooks/02.00-Introduction-to-NumPy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "\n", 11 | "\n", 12 | "\n", 13 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 14 | "\n", 15 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "deletable": true, 22 | "editable": true 23 | }, 24 | "source": [ 25 | "\n", 26 | "< [More IPython Resources](01.08-More-IPython-Resources.ipynb) | [Contents](Index.ipynb) | [Understanding Data Types in Python](02.01-Understanding-Data-Types.ipynb) >\n", 27 | "\n", 28 | "\"Open\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "deletable": true, 35 | "editable": true 36 | }, 37 | "source": [ 38 | "# Introduction to NumPy" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "deletable": true, 45 | "editable": true 46 | }, 47 | "source": [ 48 | "This chapter, along with chapter 3, outlines techniques for effectively loading, storing, and manipulating in-memory data in Python.\n", 49 | "The topic is very broad: datasets can come from a wide range of sources and a wide range of formats, including be collections of documents, collections of images, collections of sound clips, collections of numerical measurements, or nearly anything else.\n", 50 | "Despite this apparent heterogeneity, it will help us to think of all data fundamentally as arrays of numbers.\n", 51 | "\n", 52 | "For example, images–particularly digital images–can be thought of as simply two-dimensional arrays of numbers representing pixel brightness across the area.\n", 53 | "Sound clips can be thought of as one-dimensional arrays of intensity versus time.\n", 54 | "Text can be converted in various ways into numerical representations, perhaps binary digits representing the frequency of certain words or pairs of words.\n", 55 | "No matter what the data are, the first step in making it analyzable will be to transform them into arrays of numbers.\n", 56 | "(We will discuss some specific examples of this process later in [Feature Engineering](05.04-Feature-Engineering.ipynb))\n", 57 | "\n", 58 | "For this reason, efficient storage and manipulation of numerical arrays is absolutely fundamental to the process of doing data science.\n", 59 | "We'll now take a look at the specialized tools that Python has for handling such numerical arrays: the NumPy package, and the Pandas package (discussed in Chapter 3).\n", 60 | "\n", 61 | "This chapter will cover NumPy in detail. NumPy (short for *Numerical Python*) provides an efficient interface to store and operate on dense data buffers.\n", 62 | "In some ways, NumPy arrays are like Python's built-in ``list`` type, but NumPy arrays provide much more efficient storage and data operations as the arrays grow larger in size.\n", 63 | "NumPy arrays form the core of nearly the entire ecosystem of data science tools in Python, so time spent learning to use NumPy effectively will be valuable no matter what aspect of data science interests you.\n", 64 | "\n", 65 | "If you followed the advice outlined in the Preface and installed the Anaconda stack, you already have NumPy installed and ready to go.\n", 66 | "If you're more the do-it-yourself type, you can go to http://www.numpy.org/ and follow the installation instructions found there.\n", 67 | "Once you do, you can import NumPy and double-check the version:" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 1, 73 | "metadata": { 74 | "collapsed": false, 75 | "deletable": true, 76 | "editable": true 77 | }, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "'1.11.1'" 83 | ] 84 | }, 85 | "execution_count": 1, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "import numpy\n", 92 | "numpy.__version__" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "deletable": true, 99 | "editable": true 100 | }, 101 | "source": [ 102 | "For the pieces of the package discussed here, I'd recommend NumPy version 1.8 or later.\n", 103 | "By convention, you'll find that most people in the SciPy/PyData world will import NumPy using ``np`` as an alias:" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 2, 109 | "metadata": { 110 | "collapsed": false, 111 | "deletable": true, 112 | "editable": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "import numpy as np" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": { 122 | "deletable": true, 123 | "editable": true 124 | }, 125 | "source": [ 126 | "Throughout this chapter, and indeed the rest of the book, you'll find that this is the way we will import and use NumPy." 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "deletable": true, 133 | "editable": true 134 | }, 135 | "source": [ 136 | "## Reminder about Built In Documentation\n", 137 | "\n", 138 | "As you read through this chapter, don't forget that IPython gives you the ability to quickly explore the contents of a package (by using the tab-completion feature), as well as the documentation of various functions (using the ``?`` character – Refer back to [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)).\n", 139 | "\n", 140 | "For example, to display all the contents of the numpy namespace, you can type this:\n", 141 | "\n", 142 | "```ipython\n", 143 | "In [3]: np.\n", 144 | "```\n", 145 | "\n", 146 | "And to display NumPy's built-in documentation, you can use this:\n", 147 | "\n", 148 | "```ipython\n", 149 | "In [4]: np?\n", 150 | "```\n", 151 | "\n", 152 | "More detailed documentation, along with tutorials and other resources, can be found at http://www.numpy.org." 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": { 158 | "deletable": true, 159 | "editable": true 160 | }, 161 | "source": [ 162 | "\n", 163 | "< [More IPython Resources](01.08-More-IPython-Resources.ipynb) | [Contents](Index.ipynb) | [Understanding Data Types in Python](02.01-Understanding-Data-Types.ipynb) >\n", 164 | "\n", 165 | "\"Open\n" 166 | ] 167 | } 168 | ], 169 | "metadata": { 170 | "anaconda-cloud": {}, 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.5.1" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 0 191 | } 192 | -------------------------------------------------------------------------------- /notebooks/03.00-Introduction-to-Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Structured Data: NumPy's Structured Arrays](02.09-Structured-Data-NumPy.ipynb) | [Contents](Index.ipynb) | [Introducing Pandas Objects](03.01-Introducing-Pandas-Objects.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Data Manipulation with Pandas" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "In the previous chapter, we dove into detail on NumPy and its ``ndarray`` object, which provides efficient storage and manipulation of dense typed arrays in Python.\n", 37 | "Here we'll build on this knowledge by looking in detail at the data structures provided by the Pandas library.\n", 38 | "Pandas is a newer package built on top of NumPy, and provides an efficient implementation of a ``DataFrame``.\n", 39 | "``DataFrame``s are essentially multidimensional arrays with attached row and column labels, and often with heterogeneous types and/or missing data.\n", 40 | "As well as offering a convenient storage interface for labeled data, Pandas implements a number of powerful data operations familiar to users of both database frameworks and spreadsheet programs.\n", 41 | "\n", 42 | "As we saw, NumPy's ``ndarray`` data structure provides essential features for the type of clean, well-organized data typically seen in numerical computing tasks.\n", 43 | "While it serves this purpose very well, its limitations become clear when we need more flexibility (e.g., attaching labels to data, working with missing data, etc.) and when attempting operations that do not map well to element-wise broadcasting (e.g., groupings, pivots, etc.), each of which is an important piece of analyzing the less structured data available in many forms in the world around us.\n", 44 | "Pandas, and in particular its ``Series`` and ``DataFrame`` objects, builds on the NumPy array structure and provides efficient access to these sorts of \"data munging\" tasks that occupy much of a data scientist's time.\n", 45 | "\n", 46 | "In this chapter, we will focus on the mechanics of using ``Series``, ``DataFrame``, and related structures effectively.\n", 47 | "We will use examples drawn from real datasets where appropriate, but these examples are not necessarily the focus." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "## Installing and Using Pandas\n", 55 | "\n", 56 | "Installation of Pandas on your system requires NumPy to be installed, and if building the library from source, requires the appropriate tools to compile the C and Cython sources on which Pandas is built.\n", 57 | "Details on this installation can be found in the [Pandas documentation](http://pandas.pydata.org/).\n", 58 | "If you followed the advice outlined in the [Preface](00.00-Preface.ipynb) and used the Anaconda stack, you already have Pandas installed.\n", 59 | "\n", 60 | "Once Pandas is installed, you can import it and check the version:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 1, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "'0.18.1'" 74 | ] 75 | }, 76 | "execution_count": 1, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "import pandas\n", 83 | "pandas.__version__" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Just as we generally import NumPy under the alias ``np``, we will import Pandas under the alias ``pd``:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 2, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "import pandas as pd" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "This import convention will be used throughout the remainder of this book." 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "## Reminder about Built-In Documentation\n", 116 | "\n", 117 | "As you read through this chapter, don't forget that IPython gives you the ability to quickly explore the contents of a package (by using the tab-completion feature) as well as the documentation of various functions (using the ``?`` character). (Refer back to [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb) if you need a refresher on this.)\n", 118 | "\n", 119 | "For example, to display all the contents of the pandas namespace, you can type\n", 120 | "\n", 121 | "```ipython\n", 122 | "In [3]: pd.\n", 123 | "```\n", 124 | "\n", 125 | "And to display Pandas's built-in documentation, you can use this:\n", 126 | "\n", 127 | "```ipython\n", 128 | "In [4]: pd?\n", 129 | "```\n", 130 | "\n", 131 | "More detailed documentation, along with tutorials and other resources, can be found at http://pandas.pydata.org/." 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "\n", 139 | "< [Structured Data: NumPy's Structured Arrays](02.09-Structured-Data-NumPy.ipynb) | [Contents](Index.ipynb) | [Introducing Pandas Objects](03.01-Introducing-Pandas-Objects.ipynb) >\n", 140 | "\n", 141 | "\"Open\n" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "anaconda-cloud": {}, 147 | "kernelspec": { 148 | "display_name": "Python 3", 149 | "language": "python", 150 | "name": "python3" 151 | }, 152 | "language_info": { 153 | "codemirror_mode": { 154 | "name": "ipython", 155 | "version": 3 156 | }, 157 | "file_extension": ".py", 158 | "mimetype": "text/x-python", 159 | "name": "python", 160 | "nbconvert_exporter": "python", 161 | "pygments_lexer": "ipython3", 162 | "version": "3.5.1" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 0 167 | } 168 | -------------------------------------------------------------------------------- /notebooks/03.13-Further-Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "\n", 11 | "\n", 12 | "\n", 13 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 14 | "\n", 15 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "deletable": true, 22 | "editable": true 23 | }, 24 | "source": [ 25 | "\n", 26 | "< [High-Performance Pandas: eval() and query()](03.12-Performance-Eval-and-Query.ipynb) | [Contents](Index.ipynb) | [Visualization with Matplotlib](04.00-Introduction-To-Matplotlib.ipynb) >\n", 27 | "\n", 28 | "\"Open\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Further Resources" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "deletable": true, 42 | "editable": true 43 | }, 44 | "source": [ 45 | "In this chapter, we've covered many of the basics of using Pandas effectively for data analysis.\n", 46 | "Still, much has been omitted from our discussion.\n", 47 | "To learn more about Pandas, I recommend the following resources:\n", 48 | "\n", 49 | "- [Pandas online documentation](http://pandas.pydata.org/): This is the go-to source for complete documentation of the package. While the examples in the documentation tend to be small generated datasets, the description of the options is complete and generally very useful for understanding the use of various functions.\n", 50 | "\n", 51 | "- [*Python for Data Analysis*](http://shop.oreilly.com/product/0636920023784.do) Written by Wes McKinney (the original creator of Pandas), this book contains much more detail on the Pandas package than we had room for in this chapter. In particular, he takes a deep dive into tools for time series, which were his bread and butter as a financial consultant. The book also has many entertaining examples of applying Pandas to gain insight from real-world datasets. Keep in mind, though, that the book is now several years old, and the Pandas package has quite a few new features that this book does not cover (but be on the lookout for a new edition in 2017).\n", 52 | "\n", 53 | "- [Stack Overflow](http://stackoverflow.com/questions/tagged/pandas): Pandas has so many users that any question you have has likely been asked and answered on Stack Overflow. Using Pandas is a case where some Google-Fu is your best friend. Simply go to your favorite search engine and type in the question, problem, or error you're coming across–more than likely you'll find your answer on a Stack Overflow page.\n", 54 | "\n", 55 | "- [Pandas on PyVideo](http://pyvideo.org/search?q=pandas): From PyCon to SciPy to PyData, many conferences have featured tutorials from Pandas developers and power users. The PyCon tutorials in particular tend to be given by very well-vetted presenters.\n", 56 | "\n", 57 | "Using these resources, combined with the walk-through given in this chapter, my hope is that you'll be poised to use Pandas to tackle any data analysis problem you come across!" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "deletable": true, 64 | "editable": true 65 | }, 66 | "source": [ 67 | "\n", 68 | "< [High-Performance Pandas: eval() and query()](03.12-Performance-Eval-and-Query.ipynb) | [Contents](Index.ipynb) | [Visualization with Matplotlib](04.00-Introduction-To-Matplotlib.ipynb) >\n", 69 | "\n", 70 | "\"Open\n" 71 | ] 72 | } 73 | ], 74 | "metadata": { 75 | "anaconda-cloud": {}, 76 | "kernelspec": { 77 | "display_name": "Python 3", 78 | "language": "python", 79 | "name": "python3" 80 | }, 81 | "language_info": { 82 | "codemirror_mode": { 83 | "name": "ipython", 84 | "version": 3 85 | }, 86 | "file_extension": ".py", 87 | "mimetype": "text/x-python", 88 | "name": "python", 89 | "nbconvert_exporter": "python", 90 | "pygments_lexer": "ipython3", 91 | "version": "3.5.1" 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 0 96 | } 97 | -------------------------------------------------------------------------------- /notebooks/04.15-Further-Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Visualization with Seaborn](04.14-Visualization-With-Seaborn.ipynb) | [Contents](Index.ipynb) | [Machine Learning](05.00-Machine-Learning.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Further Resources" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Matplotlib Resources\n", 37 | "\n", 38 | "A single chapter in a book can never hope to cover all the available features and plot types available in Matplotlib.\n", 39 | "As with other packages we've seen, liberal use of IPython's tab-completion and help functions (see [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)) can be very helpful when exploring Matplotlib's API.\n", 40 | "In addition, Matplotlib’s [online documentation](http://matplotlib.org/) can be a helpful reference.\n", 41 | "See in particular the [Matplotlib gallery](http://matplotlib.org/gallery.html) linked on that page: it shows thumbnails of hundreds of different plot types, each one linked to a page with the Python code snippet used to generate it.\n", 42 | "In this way, you can visually inspect and learn about a wide range of different plotting styles and visualization techniques.\n", 43 | "\n", 44 | "For a book-length treatment of Matplotlib, I would recommend [*Interactive Applications Using Matplotlib*](https://www.packtpub.com/application-development/interactive-applications-using-matplotlib), written by Matplotlib core developer Ben Root." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Other Python Graphics Libraries\n", 52 | "\n", 53 | "Although Matplotlib is the most prominent Python visualization library, there are other more modern tools that are worth exploring as well.\n", 54 | "I'll mention a few of them briefly here:\n", 55 | "\n", 56 | "- [Bokeh](http://bokeh.pydata.org) is a JavaScript visualization library with a Python frontend that creates highly interactive visualizations capable of handling very large and/or streaming datasets. The Python front-end outputs a JSON data structure that can be interpreted by the Bokeh JS engine.\n", 57 | "- [Plotly](http://plot.ly) is the eponymous open source product of the Plotly company, and is similar in spirit to Bokeh. Because Plotly is the main product of a startup, it is receiving a high level of development effort. Use of the library is entirely free.\n", 58 | "- [Vispy](http://vispy.org/) is an actively developed project focused on dynamic visualizations of very large datasets. Because it is built to target OpenGL and make use of efficient graphics processors in your computer, it is able to render some quite large and stunning visualizations.\n", 59 | "- [Vega](https://vega.github.io/) and [Vega-Lite](https://vega.github.io/vega-lite) are declarative graphics representations, and are the product of years of research into the fundamental language of data visualization. The reference rendering implementation is JavaScript, but the API is language agnostic. There is a Python API under development in the [Altair](https://altair-viz.github.io/) package. Though as of summer 2016 it's not yet fully mature, I'm quite excited for the possibilities of this project to provide a common reference point for visualization in Python and other languages.\n", 60 | "\n", 61 | "The visualization space in the Python community is very dynamic, and I fully expect this list to be out of date as soon as it is published.\n", 62 | "Keep an eye out for what's coming in the future!" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "\n", 70 | "< [Visualization with Seaborn](04.14-Visualization-With-Seaborn.ipynb) | [Contents](Index.ipynb) | [Machine Learning](05.00-Machine-Learning.ipynb) >\n", 71 | "\n", 72 | "\"Open\n" 73 | ] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.5.1" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 0 97 | } 98 | -------------------------------------------------------------------------------- /notebooks/05.00-Machine-Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "\n", 10 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 11 | "\n", 12 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "\n", 20 | "< [Further Resources](04.15-Further-Resources.ipynb) | [Contents](Index.ipynb) | [What Is Machine Learning?](05.01-What-Is-Machine-Learning.ipynb) >\n", 21 | "\n", 22 | "\"Open\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Machine Learning" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "In many ways, machine learning is the primary means by which data science manifests itself to the broader world.\n", 37 | "Machine learning is where these computational and algorithmic skills of data science meet the statistical thinking of data science, and the result is a collection of approaches to inference and data exploration that are not about effective theory so much as effective computation.\n", 38 | "\n", 39 | "The term \"machine learning\" is sometimes thrown around as if it is some kind of magic pill: *apply machine learning to your data, and all your problems will be solved!*\n", 40 | "As you might expect, the reality is rarely this simple.\n", 41 | "While these methods can be incredibly powerful, to be effective they must be approached with a firm grasp of the strengths and weaknesses of each method, as well as a grasp of general concepts such as bias and variance, overfitting and underfitting, and more.\n", 42 | "\n", 43 | "This chapter will dive into practical aspects of machine learning, primarily using Python's [Scikit-Learn](http://scikit-learn.org) package.\n", 44 | "This is not meant to be a comprehensive introduction to the field of machine learning; that is a large subject and necessitates a more technical approach than we take here.\n", 45 | "Nor is it meant to be a comprehensive manual for the use of the Scikit-Learn package (for this, you can refer to the resources listed in [Further Machine Learning Resources](05.15-Learning-More.ipynb)).\n", 46 | "Rather, the goals of this chapter are:\n", 47 | "\n", 48 | "- To introduce the fundamental vocabulary and concepts of machine learning.\n", 49 | "- To introduce the Scikit-Learn API and show some examples of its use.\n", 50 | "- To take a deeper dive into the details of several of the most important machine learning approaches, and develop an intuition into how they work and when and where they are applicable.\n", 51 | "\n", 52 | "Much of this material is drawn from the Scikit-Learn tutorials and workshops I have given on several occasions at PyCon, SciPy, PyData, and other conferences.\n", 53 | "Any clarity in the following pages is likely due to the many workshop participants and co-instructors who have given me valuable feedback on this material over the years!\n", 54 | "\n", 55 | "Finally, if you are seeking a more comprehensive or technical treatment of any of these subjects, I've listed several resources and references in [Further Machine Learning Resources](05.15-Learning-More.ipynb)." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "\n", 63 | "< [Further Resources](04.15-Further-Resources.ipynb) | [Contents](Index.ipynb) | [What Is Machine Learning?](05.01-What-Is-Machine-Learning.ipynb) >\n", 64 | "\n", 65 | "\"Open\n" 66 | ] 67 | } 68 | ], 69 | "metadata": { 70 | "anaconda-cloud": {}, 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.5.1" 87 | } 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 0 91 | } 92 | -------------------------------------------------------------------------------- /notebooks/05.15-Learning-More.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "\n", 11 | "\n", 12 | "\n", 13 | "*This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 14 | "\n", 15 | "*The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "deletable": true, 22 | "editable": true 23 | }, 24 | "source": [ 25 | "\n", 26 | "< [Application: A Face Detection Pipeline](05.14-Image-Features.ipynb) | [Contents](Index.ipynb) | [Appendix: Figure Code](06.00-Figure-Code.ipynb) >\n", 27 | "\n", 28 | "\"Open\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Further Machine Learning Resources" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "deletable": true, 42 | "editable": true 43 | }, 44 | "source": [ 45 | "This chapter has been a quick tour of machine learning in Python, primarily using the tools within the Scikit-Learn library.\n", 46 | "As long as the chapter is, it is still too short to cover many interesting and important algorithms, approaches, and discussions.\n", 47 | "Here I want to suggest some resources to learn more about machine learning for those who are interested." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "deletable": true, 54 | "editable": true 55 | }, 56 | "source": [ 57 | "## Machine Learning in Python\n", 58 | "\n", 59 | "To learn more about machine learning in Python, I'd suggest some of the following resources:\n", 60 | "\n", 61 | "- [The Scikit-Learn website](http://scikit-learn.org): The Scikit-Learn website has an impressive breadth of documentation and examples covering some of the models discussed here, and much, much more. If you want a brief survey of the most important and often-used machine learning algorithms, this website is a good place to start.\n", 62 | "\n", 63 | "- *SciPy, PyCon, and PyData tutorial videos*: Scikit-Learn and other machine learning topics are perennial favorites in the tutorial tracks of many Python-focused conference series, in particular the PyCon, SciPy, and PyData conferences. You can find the most recent ones via a simple web search.\n", 64 | "\n", 65 | "- [*Introduction to Machine Learning with Python*](http://shop.oreilly.com/product/0636920030515.do): Written by Andreas C. Mueller and Sarah Guido, this book includes a fuller treatment of the topics in this chapter. If you're interested in reviewing the fundamentals of Machine Learning and pushing the Scikit-Learn toolkit to its limits, this is a great resource, written by one of the most prolific developers on the Scikit-Learn team.\n", 66 | "\n", 67 | "- [*Python Machine Learning*](https://www.packtpub.com/big-data-and-business-intelligence/python-machine-learning): Sebastian Raschka's book focuses less on Scikit-learn itself, and more on the breadth of machine learning tools available in Python. In particular, there is some very useful discussion on how to scale Python-based machine learning approaches to large and complex datasets." 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "deletable": true, 74 | "editable": true 75 | }, 76 | "source": [ 77 | "## General Machine Learning\n", 78 | "\n", 79 | "Of course, machine learning is much broader than just the Python world. There are many good resources to take your knowledge further, and here I will highlight a few that I have found useful:\n", 80 | "\n", 81 | "- [*Machine Learning*](https://www.coursera.org/learn/machine-learning): Taught by Andrew Ng (Coursera), this is a very clearly-taught free online course which covers the basics of machine learning from an algorithmic perspective. It assumes undergraduate-level understanding of mathematics and programming, and steps through detailed considerations of some of the most important machine learning algorithms. Homework assignments, which are algorithmically graded, have you actually implement some of these models yourself.\n", 82 | "\n", 83 | "- [*Pattern Recognition and Machine Learning*](http://www.springer.com/us/book/9780387310732): Written by Christopher Bishop, this classic technical text covers the concepts of machine learning discussed in this chapter in detail. If you plan to go further in this subject, you should have this book on your shelf.\n", 84 | "\n", 85 | "- [*Machine Learning: a Probabilistic Perspective*](https://mitpress.mit.edu/books/machine-learning-0): Written by Kevin Murphy, this is an excellent graduate-level text that explores nearly all important machine learning algorithms from a ground-up, unified probabilistic perspective.\n", 86 | "\n", 87 | "These resources are more technical than the material presented in this book, but to really understand the fundamentals of these methods requires a deep dive into the mathematics behind them.\n", 88 | "If you're up for the challenge and ready to bring your data science to the next level, don't hesitate to dive-in!" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "deletable": true, 95 | "editable": true 96 | }, 97 | "source": [ 98 | "\n", 99 | "< [Application: A Face Detection Pipeline](05.14-Image-Features.ipynb) | [Contents](Index.ipynb) | [Appendix: Figure Code](06.00-Figure-Code.ipynb) >\n", 100 | "\n", 101 | "\"Open\n" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "anaconda-cloud": {}, 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.5.1" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 0 127 | } 128 | -------------------------------------------------------------------------------- /notebooks/Index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Data Science Handbook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "*Jake VanderPlas*\n", 15 | "\n", 16 | "![Book Cover](figures/PDSH-cover.png)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "This is the Jupyter notebook version of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 24 | "The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Table of Contents\n", 32 | "\n", 33 | "### [Preface](00.00-Preface.ipynb)\n", 34 | "\n", 35 | "### [1. IPython: Beyond Normal Python](01.00-IPython-Beyond-Normal-Python.ipynb)\n", 36 | "- [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)\n", 37 | "- [Keyboard Shortcuts in the IPython Shell](01.02-Shell-Keyboard-Shortcuts.ipynb)\n", 38 | "- [IPython Magic Commands](01.03-Magic-Commands.ipynb)\n", 39 | "- [Input and Output History](01.04-Input-Output-History.ipynb)\n", 40 | "- [IPython and Shell Commands](01.05-IPython-And-Shell-Commands.ipynb)\n", 41 | "- [Errors and Debugging](01.06-Errors-and-Debugging.ipynb)\n", 42 | "- [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb)\n", 43 | "- [More IPython Resources](01.08-More-IPython-Resources.ipynb)\n", 44 | "\n", 45 | "### [2. Introduction to NumPy](02.00-Introduction-to-NumPy.ipynb)\n", 46 | "- [Understanding Data Types in Python](02.01-Understanding-Data-Types.ipynb)\n", 47 | "- [The Basics of NumPy Arrays](02.02-The-Basics-Of-NumPy-Arrays.ipynb)\n", 48 | "- [Computation on NumPy Arrays: Universal Functions](02.03-Computation-on-arrays-ufuncs.ipynb)\n", 49 | "- [Aggregations: Min, Max, and Everything In Between](02.04-Computation-on-arrays-aggregates.ipynb)\n", 50 | "- [Computation on Arrays: Broadcasting](02.05-Computation-on-arrays-broadcasting.ipynb)\n", 51 | "- [Comparisons, Masks, and Boolean Logic](02.06-Boolean-Arrays-and-Masks.ipynb)\n", 52 | "- [Fancy Indexing](02.07-Fancy-Indexing.ipynb)\n", 53 | "- [Sorting Arrays](02.08-Sorting.ipynb)\n", 54 | "- [Structured Data: NumPy's Structured Arrays](02.09-Structured-Data-NumPy.ipynb)\n", 55 | "\n", 56 | "### [3. Data Manipulation with Pandas](03.00-Introduction-to-Pandas.ipynb)\n", 57 | "- [Introducing Pandas Objects](03.01-Introducing-Pandas-Objects.ipynb)\n", 58 | "- [Data Indexing and Selection](03.02-Data-Indexing-and-Selection.ipynb)\n", 59 | "- [Operating on Data in Pandas](03.03-Operations-in-Pandas.ipynb)\n", 60 | "- [Handling Missing Data](03.04-Missing-Values.ipynb)\n", 61 | "- [Hierarchical Indexing](03.05-Hierarchical-Indexing.ipynb)\n", 62 | "- [Combining Datasets: Concat and Append](03.06-Concat-And-Append.ipynb)\n", 63 | "- [Combining Datasets: Merge and Join](03.07-Merge-and-Join.ipynb)\n", 64 | "- [Aggregation and Grouping](03.08-Aggregation-and-Grouping.ipynb)\n", 65 | "- [Pivot Tables](03.09-Pivot-Tables.ipynb)\n", 66 | "- [Vectorized String Operations](03.10-Working-With-Strings.ipynb)\n", 67 | "- [Working with Time Series](03.11-Working-with-Time-Series.ipynb)\n", 68 | "- [High-Performance Pandas: eval() and query()](03.12-Performance-Eval-and-Query.ipynb)\n", 69 | "- [Further Resources](03.13-Further-Resources.ipynb)\n", 70 | "\n", 71 | "### [4. Visualization with Matplotlib](04.00-Introduction-To-Matplotlib.ipynb)\n", 72 | "- [Simple Line Plots](04.01-Simple-Line-Plots.ipynb)\n", 73 | "- [Simple Scatter Plots](04.02-Simple-Scatter-Plots.ipynb)\n", 74 | "- [Visualizing Errors](04.03-Errorbars.ipynb)\n", 75 | "- [Density and Contour Plots](04.04-Density-and-Contour-Plots.ipynb)\n", 76 | "- [Histograms, Binnings, and Density](04.05-Histograms-and-Binnings.ipynb)\n", 77 | "- [Customizing Plot Legends](04.06-Customizing-Legends.ipynb)\n", 78 | "- [Customizing Colorbars](04.07-Customizing-Colorbars.ipynb)\n", 79 | "- [Multiple Subplots](04.08-Multiple-Subplots.ipynb)\n", 80 | "- [Text and Annotation](04.09-Text-and-Annotation.ipynb)\n", 81 | "- [Customizing Ticks](04.10-Customizing-Ticks.ipynb)\n", 82 | "- [Customizing Matplotlib: Configurations and Stylesheets](04.11-Settings-and-Stylesheets.ipynb)\n", 83 | "- [Three-Dimensional Plotting in Matplotlib](04.12-Three-Dimensional-Plotting.ipynb)\n", 84 | "- [Geographic Data with Basemap](04.13-Geographic-Data-With-Basemap.ipynb)\n", 85 | "- [Visualization with Seaborn](04.14-Visualization-With-Seaborn.ipynb)\n", 86 | "- [Further Resources](04.15-Further-Resources.ipynb)\n", 87 | "\n", 88 | "### [5. Machine Learning](05.00-Machine-Learning.ipynb)\n", 89 | "- [What Is Machine Learning?](05.01-What-Is-Machine-Learning.ipynb)\n", 90 | "- [Introducing Scikit-Learn](05.02-Introducing-Scikit-Learn.ipynb)\n", 91 | "- [Hyperparameters and Model Validation](05.03-Hyperparameters-and-Model-Validation.ipynb)\n", 92 | "- [Feature Engineering](05.04-Feature-Engineering.ipynb)\n", 93 | "- [In Depth: Naive Bayes Classification](05.05-Naive-Bayes.ipynb)\n", 94 | "- [In Depth: Linear Regression](05.06-Linear-Regression.ipynb)\n", 95 | "- [In-Depth: Support Vector Machines](05.07-Support-Vector-Machines.ipynb)\n", 96 | "- [In-Depth: Decision Trees and Random Forests](05.08-Random-Forests.ipynb)\n", 97 | "- [In Depth: Principal Component Analysis](05.09-Principal-Component-Analysis.ipynb)\n", 98 | "- [In-Depth: Manifold Learning](05.10-Manifold-Learning.ipynb)\n", 99 | "- [In Depth: k-Means Clustering](05.11-K-Means.ipynb)\n", 100 | "- [In Depth: Gaussian Mixture Models](05.12-Gaussian-Mixtures.ipynb)\n", 101 | "- [In-Depth: Kernel Density Estimation](05.13-Kernel-Density-Estimation.ipynb)\n", 102 | "- [Application: A Face Detection Pipeline](05.14-Image-Features.ipynb)\n", 103 | "- [Further Machine Learning Resources](05.15-Learning-More.ipynb)\n", 104 | "\n", 105 | "### [Appendix: Figure Code](06.00-Figure-Code.ipynb)" 106 | ] 107 | } 108 | ], 109 | "metadata": { 110 | "anaconda-cloud": {}, 111 | "kernelspec": { 112 | "display_name": "Python 3", 113 | "language": "python", 114 | "name": "python3" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.5.1" 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 0 131 | } 132 | -------------------------------------------------------------------------------- /notebooks/data/president_heights.csv: -------------------------------------------------------------------------------- 1 | order,name,height(cm) 2 | 1,George Washington,189 3 | 2,John Adams,170 4 | 3,Thomas Jefferson,189 5 | 4,James Madison,163 6 | 5,James Monroe,183 7 | 6,John Quincy Adams,171 8 | 7,Andrew Jackson,185 9 | 8,Martin Van Buren,168 10 | 9,William Henry Harrison,173 11 | 10,John Tyler,183 12 | 11,James K. Polk,173 13 | 12,Zachary Taylor,173 14 | 13,Millard Fillmore,175 15 | 14,Franklin Pierce,178 16 | 15,James Buchanan,183 17 | 16,Abraham Lincoln,193 18 | 17,Andrew Johnson,178 19 | 18,Ulysses S. Grant,173 20 | 19,Rutherford B. Hayes,174 21 | 20,James A. Garfield,183 22 | 21,Chester A. Arthur,183 23 | 23,Benjamin Harrison,168 24 | 25,William McKinley,170 25 | 26,Theodore Roosevelt,178 26 | 27,William Howard Taft,182 27 | 28,Woodrow Wilson,180 28 | 29,Warren G. Harding,183 29 | 30,Calvin Coolidge,178 30 | 31,Herbert Hoover,182 31 | 32,Franklin D. Roosevelt,188 32 | 33,Harry S. Truman,175 33 | 34,Dwight D. Eisenhower,179 34 | 35,John F. Kennedy,183 35 | 36,Lyndon B. Johnson,193 36 | 37,Richard Nixon,182 37 | 38,Gerald Ford,183 38 | 39,Jimmy Carter,177 39 | 40,Ronald Reagan,185 40 | 41,George H. W. Bush,188 41 | 42,Bill Clinton,188 42 | 43,George W. Bush,182 43 | 44,Barack Obama,185 44 | -------------------------------------------------------------------------------- /notebooks/data/state-abbrevs.csv: -------------------------------------------------------------------------------- 1 | "state","abbreviation" 2 | "Alabama","AL" 3 | "Alaska","AK" 4 | "Arizona","AZ" 5 | "Arkansas","AR" 6 | "California","CA" 7 | "Colorado","CO" 8 | "Connecticut","CT" 9 | "Delaware","DE" 10 | "District of Columbia","DC" 11 | "Florida","FL" 12 | "Georgia","GA" 13 | "Hawaii","HI" 14 | "Idaho","ID" 15 | "Illinois","IL" 16 | "Indiana","IN" 17 | "Iowa","IA" 18 | "Kansas","KS" 19 | "Kentucky","KY" 20 | "Louisiana","LA" 21 | "Maine","ME" 22 | "Montana","MT" 23 | "Nebraska","NE" 24 | "Nevada","NV" 25 | "New Hampshire","NH" 26 | "New Jersey","NJ" 27 | "New Mexico","NM" 28 | "New York","NY" 29 | "North Carolina","NC" 30 | "North Dakota","ND" 31 | "Ohio","OH" 32 | "Oklahoma","OK" 33 | "Oregon","OR" 34 | "Maryland","MD" 35 | "Massachusetts","MA" 36 | "Michigan","MI" 37 | "Minnesota","MN" 38 | "Mississippi","MS" 39 | "Missouri","MO" 40 | "Pennsylvania","PA" 41 | "Rhode Island","RI" 42 | "South Carolina","SC" 43 | "South Dakota","SD" 44 | "Tennessee","TN" 45 | "Texas","TX" 46 | "Utah","UT" 47 | "Vermont","VT" 48 | "Virginia","VA" 49 | "Washington","WA" 50 | "West Virginia","WV" 51 | "Wisconsin","WI" 52 | "Wyoming","WY" -------------------------------------------------------------------------------- /notebooks/data/state-areas.csv: -------------------------------------------------------------------------------- 1 | state,area (sq. mi) 2 | Alabama,52423 3 | Alaska,656425 4 | Arizona,114006 5 | Arkansas,53182 6 | California,163707 7 | Colorado,104100 8 | Connecticut,5544 9 | Delaware,1954 10 | Florida,65758 11 | Georgia,59441 12 | Hawaii,10932 13 | Idaho,83574 14 | Illinois,57918 15 | Indiana,36420 16 | Iowa,56276 17 | Kansas,82282 18 | Kentucky,40411 19 | Louisiana,51843 20 | Maine,35387 21 | Maryland,12407 22 | Massachusetts,10555 23 | Michigan,96810 24 | Minnesota,86943 25 | Mississippi,48434 26 | Missouri,69709 27 | Montana,147046 28 | Nebraska,77358 29 | Nevada,110567 30 | New Hampshire,9351 31 | New Jersey,8722 32 | New Mexico,121593 33 | New York,54475 34 | North Carolina,53821 35 | North Dakota,70704 36 | Ohio,44828 37 | Oklahoma,69903 38 | Oregon,98386 39 | Pennsylvania,46058 40 | Rhode Island,1545 41 | South Carolina,32007 42 | South Dakota,77121 43 | Tennessee,42146 44 | Texas,268601 45 | Utah,84904 46 | Vermont,9615 47 | Virginia,42769 48 | Washington,71303 49 | West Virginia,24231 50 | Wisconsin,65503 51 | Wyoming,97818 52 | District of Columbia,68 53 | Puerto Rico,3515 54 | -------------------------------------------------------------------------------- /notebooks/figures/02.05-broadcasting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/02.05-broadcasting.png -------------------------------------------------------------------------------- /notebooks/figures/03.08-split-apply-combine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/03.08-split-apply-combine.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-classification-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-classification-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-classification-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-classification-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-classification-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-classification-3.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-clustering-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-clustering-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-clustering-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-clustering-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-dimesionality-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-dimesionality-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-dimesionality-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-dimesionality-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-regression-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-regression-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-regression-3.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.01-regression-4.png -------------------------------------------------------------------------------- /notebooks/figures/05.02-samples-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.02-samples-features.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-2-fold-CV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.03-2-fold-CV.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-5-fold-CV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.03-5-fold-CV.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-bias-variance-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.03-bias-variance-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-bias-variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.03-bias-variance.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-learning-curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.03-learning-curve.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-validation-curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.03-validation-curve.png -------------------------------------------------------------------------------- /notebooks/figures/05.05-gaussian-NB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.05-gaussian-NB.png -------------------------------------------------------------------------------- /notebooks/figures/05.06-gaussian-basis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.06-gaussian-basis.png -------------------------------------------------------------------------------- /notebooks/figures/05.08-decision-tree-levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.08-decision-tree-levels.png -------------------------------------------------------------------------------- /notebooks/figures/05.08-decision-tree-overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.08-decision-tree-overfitting.png -------------------------------------------------------------------------------- /notebooks/figures/05.08-decision-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.08-decision-tree.png -------------------------------------------------------------------------------- /notebooks/figures/05.09-PCA-rotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.09-PCA-rotation.png -------------------------------------------------------------------------------- /notebooks/figures/05.09-digits-pca-components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.09-digits-pca-components.png -------------------------------------------------------------------------------- /notebooks/figures/05.09-digits-pixel-components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.09-digits-pixel-components.png -------------------------------------------------------------------------------- /notebooks/figures/05.10-LLE-vs-MDS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.10-LLE-vs-MDS.png -------------------------------------------------------------------------------- /notebooks/figures/05.11-expectation-maximization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.11-expectation-maximization.png -------------------------------------------------------------------------------- /notebooks/figures/05.12-covariance-type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/05.12-covariance-type.png -------------------------------------------------------------------------------- /notebooks/figures/Data_Science_VD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/Data_Science_VD.png -------------------------------------------------------------------------------- /notebooks/figures/PDSH-cover-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/PDSH-cover-small.png -------------------------------------------------------------------------------- /notebooks/figures/PDSH-cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/PDSH-cover.png -------------------------------------------------------------------------------- /notebooks/figures/array_vs_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/array_vs_list.png -------------------------------------------------------------------------------- /notebooks/figures/cint_vs_pyint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/notebooks/figures/cint_vs_pyint.png -------------------------------------------------------------------------------- /notebooks/helpers_05_08.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.tree import DecisionTreeClassifier 5 | from ipywidgets import interact 6 | 7 | 8 | def visualize_tree(estimator, X, y, boundaries=True, 9 | xlim=None, ylim=None, ax=None): 10 | ax = ax or plt.gca() 11 | 12 | # Plot the training points 13 | ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap='viridis', 14 | clim=(y.min(), y.max()), zorder=3) 15 | ax.axis('tight') 16 | ax.axis('off') 17 | if xlim is None: 18 | xlim = ax.get_xlim() 19 | if ylim is None: 20 | ylim = ax.get_ylim() 21 | 22 | # fit the estimator 23 | estimator.fit(X, y) 24 | xx, yy = np.meshgrid(np.linspace(*xlim, num=200), 25 | np.linspace(*ylim, num=200)) 26 | Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) 27 | 28 | # Put the result into a color plot 29 | n_classes = len(np.unique(y)) 30 | Z = Z.reshape(xx.shape) 31 | contours = ax.contourf(xx, yy, Z, alpha=0.3, 32 | levels=np.arange(n_classes + 1) - 0.5, 33 | cmap='viridis', clim=(y.min(), y.max()), 34 | zorder=1) 35 | 36 | ax.set(xlim=xlim, ylim=ylim) 37 | 38 | # Plot the decision boundaries 39 | def plot_boundaries(i, xlim, ylim): 40 | if i >= 0: 41 | tree = estimator.tree_ 42 | 43 | if tree.feature[i] == 0: 44 | ax.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k', zorder=2) 45 | plot_boundaries(tree.children_left[i], 46 | [xlim[0], tree.threshold[i]], ylim) 47 | plot_boundaries(tree.children_right[i], 48 | [tree.threshold[i], xlim[1]], ylim) 49 | 50 | elif tree.feature[i] == 1: 51 | ax.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k', zorder=2) 52 | plot_boundaries(tree.children_left[i], xlim, 53 | [ylim[0], tree.threshold[i]]) 54 | plot_boundaries(tree.children_right[i], xlim, 55 | [tree.threshold[i], ylim[1]]) 56 | 57 | if boundaries: 58 | plot_boundaries(0, xlim, ylim) 59 | 60 | 61 | def plot_tree_interactive(X, y): 62 | def interactive_tree(depth=5): 63 | clf = DecisionTreeClassifier(max_depth=depth, random_state=0) 64 | visualize_tree(clf, X, y) 65 | 66 | return interact(interactive_tree, depth=[1, 5]) 67 | 68 | 69 | def randomized_tree_interactive(X, y): 70 | N = int(0.75 * X.shape[0]) 71 | 72 | xlim = (X[:, 0].min(), X[:, 0].max()) 73 | ylim = (X[:, 1].min(), X[:, 1].max()) 74 | 75 | def fit_randomized_tree(random_state=0): 76 | clf = DecisionTreeClassifier(max_depth=15) 77 | i = np.arange(len(y)) 78 | rng = np.random.RandomState(random_state) 79 | rng.shuffle(i) 80 | visualize_tree(clf, X[i[:N]], y[i[:N]], boundaries=False, 81 | xlim=xlim, ylim=ylim) 82 | 83 | interact(fit_randomized_tree, random_state=[0, 100]); -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.11.1 2 | pandas==0.18.1 3 | scipy==0.17.1 4 | scikit-learn==0.17.1 5 | scikit-image==0.12.3 6 | pillow==3.4.2 7 | matplotlib==1.5.1 8 | seaborn==0.7.0 9 | jupyter 10 | notebook 11 | line_profiler 12 | memory_profiler 13 | numexpr 14 | pandas-datareader 15 | netcdf4 16 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # Tools 2 | 3 | These are tools for managing the notebooks in this repository. 4 | 5 | - ``generate_contents.py``: this will generate a markdown table of contents for use in the README and in the Index.ipynb notebook 6 | 7 | - ``add_navigation.py``: this script adds navigation links at the top and bottom of each notebook. 8 | 9 | - ``add_book_info.py``: this script adds book information to the top of each notebook. -------------------------------------------------------------------------------- /tools/add_book_info.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import nbformat 4 | from nbformat.v4.nbbase import new_markdown_cell 5 | 6 | from generate_contents import iter_notebooks, NOTEBOOK_DIR 7 | 8 | 9 | BOOK_COMMENT = "" 10 | 11 | 12 | BOOK_INFO = BOOK_COMMENT + """ 13 | 14 | 15 | *This notebook contains an excerpt from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).* 16 | 17 | *The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!*""" 18 | 19 | 20 | def add_book_info(): 21 | for nb_name in iter_notebooks(): 22 | nb_file = os.path.join(NOTEBOOK_DIR, nb_name) 23 | nb = nbformat.read(nb_file, as_version=4) 24 | 25 | is_comment = lambda cell: cell.source.startswith(BOOK_COMMENT) 26 | 27 | if is_comment(nb.cells[0]): 28 | print('- amending comment for {0}'.format(nb_name)) 29 | nb.cells[0].source = BOOK_INFO 30 | else: 31 | print('- inserting comment for {0}'.format(nb_name)) 32 | nb.cells.insert(0, new_markdown_cell(BOOK_INFO)) 33 | nbformat.write(nb, nb_file) 34 | 35 | 36 | if __name__ == '__main__': 37 | add_book_info() 38 | -------------------------------------------------------------------------------- /tools/add_navigation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import itertools 3 | 4 | from ipykernel import kernelspec as ks 5 | import nbformat 6 | from nbformat.v4.nbbase import new_markdown_cell 7 | 8 | from generate_contents import NOTEBOOK_DIR, REG, iter_notebooks, get_notebook_title 9 | 10 | 11 | def prev_this_next(it): 12 | a, b, c = itertools.tee(it,3) 13 | next(c) 14 | return zip(itertools.chain([None], a), b, itertools.chain(c, [None])) 15 | 16 | 17 | PREV_TEMPLATE = "< [{title}]({url}) " 18 | CONTENTS = "| [Contents](Index.ipynb) |" 19 | NEXT_TEMPLATE = " [{title}]({url}) >" 20 | NAV_COMMENT = "\n" 21 | 22 | COLAB_LINK = """ 23 | 24 | Open in Colab 25 | """ 26 | 27 | 28 | def iter_navbars(): 29 | for prev_nb, nb, next_nb in prev_this_next(iter_notebooks()): 30 | navbar = NAV_COMMENT 31 | if prev_nb: 32 | navbar += PREV_TEMPLATE.format(title=get_notebook_title(prev_nb), 33 | url=prev_nb) 34 | navbar += CONTENTS 35 | if next_nb: 36 | navbar += NEXT_TEMPLATE.format(title=get_notebook_title(next_nb), 37 | url=next_nb) 38 | 39 | navbar += COLAB_LINK.format(notebook_filename=os.path.basename(nb)) 40 | 41 | yield os.path.join(NOTEBOOK_DIR, nb), navbar 42 | 43 | 44 | def write_navbars(): 45 | for nb_name, navbar in iter_navbars(): 46 | nb = nbformat.read(nb_name, as_version=4) 47 | nb_file = os.path.basename(nb_name) 48 | is_comment = lambda cell: cell.source.startswith(NAV_COMMENT) 49 | 50 | if is_comment(nb.cells[1]): 51 | print("- amending navbar for {0}".format(nb_file)) 52 | nb.cells[1].source = navbar 53 | else: 54 | print("- inserting navbar for {0}".format(nb_file)) 55 | nb.cells.insert(1, new_markdown_cell(source=navbar)) 56 | 57 | if is_comment(nb.cells[-1]): 58 | nb.cells[-1].source = navbar 59 | else: 60 | nb.cells.append(new_markdown_cell(source=navbar)) 61 | nbformat.write(nb, nb_name) 62 | 63 | 64 | if __name__ == '__main__': 65 | write_navbars() 66 | -------------------------------------------------------------------------------- /tools/fix_kernelspec.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import nbformat 4 | 5 | from generate_contents import iter_notebooks, NOTEBOOK_DIR 6 | 7 | def fix_kernelspec(): 8 | for nb_name in iter_notebooks(): 9 | nb_file = os.path.join(NOTEBOOK_DIR, nb_name) 10 | nb = nbformat.read(nb_file, as_version=4) 11 | 12 | print("- Updating kernelspec for {0}".format(nb_name)) 13 | nb['metadata']['kernelspec']['display_name'] = 'Python 3' 14 | 15 | nbformat.write(nb, nb_file) 16 | 17 | 18 | if __name__ == '__main__': 19 | fix_kernelspec() 20 | -------------------------------------------------------------------------------- /tools/generate_contents.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import itertools 4 | import nbformat 5 | 6 | NOTEBOOK_DIR = os.path.join(os.path.dirname(__file__), '..', 'notebooks') 7 | 8 | CHAPTERS = {"00": "Preface", 9 | "01": "IPython: Beyond Normal Python", 10 | "02": "NumPy", 11 | "03": "Pandas", 12 | "04": "Matplotlib", 13 | "05": "Machine Learning"} 14 | 15 | REG = re.compile(r'(\d\d)\.(\d\d)-(.*)\.ipynb') 16 | 17 | 18 | def iter_notebooks(): 19 | return sorted(nb for nb in os.listdir(NOTEBOOK_DIR) if REG.match(nb)) 20 | 21 | 22 | def get_notebook_title(nb_file): 23 | nb = nbformat.read(os.path.join(NOTEBOOK_DIR, nb_file), as_version=4) 24 | for cell in nb.cells: 25 | if cell.source.startswith('#'): 26 | return cell.source[1:].splitlines()[0].strip() 27 | 28 | 29 | def gen_contents(directory=None): 30 | for nb in iter_notebooks(): 31 | if directory: 32 | nb_url = os.path.join(directory, nb) 33 | else: 34 | nb_url = nb 35 | chapter, section, title = REG.match(nb).groups() 36 | title = get_notebook_title(nb) 37 | if section == '00': 38 | if chapter in ['00', '06']: 39 | yield '\n### [{0}]({1})'.format(title, nb_url) 40 | else: 41 | yield '\n### [{0}. {1}]({2})'.format(int(chapter), 42 | title, nb_url) 43 | else: 44 | yield "- [{0}]({1})".format(title, nb_url) 45 | 46 | 47 | def print_contents(directory=None): 48 | print('\n'.join(gen_contents(directory))) 49 | 50 | 51 | if __name__ == '__main__': 52 | print_contents() 53 | print('\n', 70 * '#', '\n') 54 | print_contents('http://nbviewer.jupyter.org/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/') 55 | -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | content/pages/*.md 2 | output 3 | content/figures 4 | content/notebooks -------------------------------------------------------------------------------- /website/Makefile: -------------------------------------------------------------------------------- 1 | PY?=python3 2 | PELICAN?=pelican 3 | PELICANOPTS= 4 | 5 | BASEDIR=$(CURDIR) 6 | INPUTDIR=$(BASEDIR)/content 7 | OUTPUTDIR=$(BASEDIR)/output 8 | CONFFILE=$(BASEDIR)/pelicanconf.py 9 | PUBLISHCONF=$(BASEDIR)/publishconf.py 10 | 11 | FTP_HOST=localhost 12 | FTP_USER=anonymous 13 | FTP_TARGET_DIR=/ 14 | 15 | SSH_HOST=localhost 16 | SSH_PORT=22 17 | SSH_USER=root 18 | SSH_TARGET_DIR=/var/www 19 | 20 | S3_BUCKET=my_s3_bucket 21 | 22 | CLOUDFILES_USERNAME=my_rackspace_username 23 | CLOUDFILES_API_KEY=my_rackspace_api_key 24 | CLOUDFILES_CONTAINER=my_cloudfiles_container 25 | 26 | DROPBOX_DIR=~/Dropbox/Public/ 27 | 28 | GITHUB_PAGES_REMOTE=git@github.com:jakevdp/PythonDataScienceHandbook.git 29 | GITHUB_PAGES_BRANCH=gh-pages 30 | 31 | GIT_COMMIT_HASH = $(shell git rev-parse HEAD) 32 | 33 | DEBUG ?= 0 34 | ifeq ($(DEBUG), 1) 35 | PELICANOPTS += -D 36 | endif 37 | 38 | RELATIVE ?= 0 39 | ifeq ($(RELATIVE), 1) 40 | PELICANOPTS += --relative-urls 41 | endif 42 | 43 | 44 | help: 45 | @echo 'Makefile for a pelican Web site ' 46 | @echo ' ' 47 | @echo 'Usage: ' 48 | @echo ' make html (re)generate the web site ' 49 | @echo ' make clean remove the generated files ' 50 | @echo ' make regenerate regenerate files upon modification ' 51 | @echo ' make publish generate using production settings ' 52 | @echo ' make serve [PORT=8000] serve site at http://localhost:8000' 53 | @echo ' make serve-global [SERVER=0.0.0.0] serve (as root) to $(SERVER):80 ' 54 | @echo ' make devserver [PORT=8000] start/restart develop_server.sh ' 55 | @echo ' make stopserver stop local server ' 56 | @echo ' make ssh_upload upload the web site via SSH ' 57 | @echo ' make rsync_upload upload the web site via rsync+ssh ' 58 | @echo ' make dropbox_upload upload the web site via Dropbox ' 59 | @echo ' make ftp_upload upload the web site via FTP ' 60 | @echo ' make s3_upload upload the web site via S3 ' 61 | @echo ' make cf_upload upload the web site via Cloud Files' 62 | @echo ' make github upload the web site via gh-pages ' 63 | @echo ' ' 64 | @echo 'Set the DEBUG variable to 1 to enable debugging, e.g. make DEBUG=1 html ' 65 | @echo 'Set the RELATIVE variable to 1 to enable relative urls ' 66 | @echo ' ' 67 | 68 | html: 69 | $(PELICAN) $(INPUTDIR) -o $(OUTPUTDIR) -s $(CONFFILE) $(PELICANOPTS) 70 | 71 | clean: 72 | [ ! -d $(OUTPUTDIR) ] || rm -rf $(OUTPUTDIR) 73 | 74 | regenerate: 75 | $(PELICAN) -r $(INPUTDIR) -o $(OUTPUTDIR) -s $(CONFFILE) $(PELICANOPTS) 76 | 77 | serve: 78 | ifdef PORT 79 | cd $(OUTPUTDIR) && $(PY) -m pelican.server $(PORT) 80 | else 81 | cd $(OUTPUTDIR) && $(PY) -m pelican.server 82 | endif 83 | 84 | serve-global: 85 | ifdef SERVER 86 | cd $(OUTPUTDIR) && $(PY) -m pelican.server 80 $(SERVER) 87 | else 88 | cd $(OUTPUTDIR) && $(PY) -m pelican.server 80 0.0.0.0 89 | endif 90 | 91 | 92 | devserver: 93 | ifdef PORT 94 | $(BASEDIR)/develop_server.sh restart $(PORT) 95 | else 96 | $(BASEDIR)/develop_server.sh restart 97 | endif 98 | 99 | stopserver: 100 | $(BASEDIR)/develop_server.sh stop 101 | @echo 'Stopped Pelican and SimpleHTTPServer processes running in background.' 102 | 103 | publish: 104 | $(PELICAN) $(INPUTDIR) -o $(OUTPUTDIR) -s $(PUBLISHCONF) $(PELICANOPTS) 105 | 106 | ssh_upload: publish 107 | scp -P $(SSH_PORT) -r $(OUTPUTDIR)/* $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR) 108 | 109 | rsync_upload: publish 110 | rsync -e "ssh -p $(SSH_PORT)" -P -rvzc --delete $(OUTPUTDIR)/ $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR) --cvs-exclude 111 | 112 | dropbox_upload: publish 113 | cp -r $(OUTPUTDIR)/* $(DROPBOX_DIR) 114 | 115 | ftp_upload: publish 116 | lftp ftp://$(FTP_USER)@$(FTP_HOST) -e "mirror -R $(OUTPUTDIR) $(FTP_TARGET_DIR) ; quit" 117 | 118 | s3_upload: publish 119 | s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed --guess-mime-type --no-mime-magic --no-preserve 120 | 121 | cf_upload: publish 122 | cd $(OUTPUTDIR) && swift -v -A https://auth.api.rackspacecloud.com/v1.0 -U $(CLOUDFILES_USERNAME) -K $(CLOUDFILES_API_KEY) upload -c $(CLOUDFILES_CONTAINER) . 123 | 124 | publish-to-github: publish 125 | ghp-import -n -m "publish-to-github from $(GIT_COMMIT_HASH)" -b blog-build $(OUTPUTDIR) 126 | git push $(GITHUB_PAGES_REMOTE) blog-build:$(GITHUB_PAGES_BRANCH) 127 | 128 | publish-to-github-force: publish 129 | ghp-import -n -m "publish-to-github-force from $(GIT_COMMIT_HASH)" -b blog-build $(OUTPUTDIR) 130 | git push -f $(GITHUB_PAGES_REMOTE) blog-build:$(GITHUB_PAGES_BRANCH) 131 | 132 | .PHONY: html help clean regenerate serve serve-global devserver stopserver publish ssh_upload rsync_upload dropbox_upload ftp_upload s3_upload cf_upload github 133 | -------------------------------------------------------------------------------- /website/README.md: -------------------------------------------------------------------------------- 1 | 2# Tools for creating http://jakevdp.github.io/PythonDataScienceHandbook/ 2 | 3 | The website is generated using the [Pelican](http://docs.getpelican.com/) static site generator. 4 | The themes here are adapted from those used for my blog: https://github.com/jakevdp/jakevdp.github.io-source 5 | 6 | ## Building the Website 7 | 8 | Clone the repository & make sure submodules are included 9 | 10 | ``` 11 | $ git clone https://github.com/jakevdp/PythonDataScienceHandbook.git 12 | $ git checkout origin/website 13 | $ git submodule update --init --recursive 14 | $ cd website 15 | ``` 16 | 17 | Install the required packages: 18 | 19 | ``` 20 | $ conda create -n pelican-blog python=3.5 jupyter notebook 21 | $ source activate pelican-blog 22 | $ pip install pelican Markdown ghp-import 23 | $ mkdir plugins 24 | $ git submodule add git://github.com/danielfrg/pelican-ipynb.git plugins/ipynb 25 | $ git submodule add https://github.com/getpelican/pelican-plugins.git plugins/pelican-plugins 26 | ``` 27 | 28 | Copy the notebook content to the right location (this script also modifies some links for the HTML): 29 | ``` 30 | $ python copy_notebooks.py 31 | ``` 32 | 33 | Build the html and serve locally: 34 | 35 | ``` 36 | $ make html 37 | $ make serve 38 | $ open http://localhost:8000 39 | ``` 40 | 41 | Deploy to github pages 42 | 43 | ``` 44 | $ make publish-to-github 45 | ``` 46 | -------------------------------------------------------------------------------- /website/content/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/website/content/favicon.ico -------------------------------------------------------------------------------- /website/copy_notebooks.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script copies all notebooks from the book into the website directory, and 3 | creates pages which wrap them and link together. 4 | """ 5 | import os 6 | import nbformat 7 | import shutil 8 | 9 | PAGEFILE = """title: {title} 10 | url: 11 | save_as: {htmlfile} 12 | Template: {template} 13 | 14 | {{% notebook notebooks/{notebook_file} cells[{cells}] %}} 15 | """ 16 | 17 | INTRO_TEXT = """This website contains the full text of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook) in the form of Jupyter notebooks. 18 | 19 | The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). 20 | 21 | If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)! 22 | """ 23 | 24 | 25 | def abspath_from_here(*args): 26 | here = os.path.dirname(__file__) 27 | path = os.path.join(here, *args) 28 | return os.path.abspath(path) 29 | 30 | NB_SOURCE_DIR = abspath_from_here('..', 'notebooks') 31 | NB_DEST_DIR = abspath_from_here('content', 'notebooks') 32 | PAGE_DEST_DIR = abspath_from_here('content', 'pages') 33 | 34 | 35 | def copy_notebooks(): 36 | if not os.path.exists(NB_DEST_DIR): 37 | os.makedirs(NB_DEST_DIR) 38 | if not os.path.exists(PAGE_DEST_DIR): 39 | os.makedirs(PAGE_DEST_DIR) 40 | 41 | nblist = sorted(nb for nb in os.listdir(NB_SOURCE_DIR) 42 | if nb.endswith('.ipynb')) 43 | name_map = {nb: nb.rsplit('.', 1)[0].lower() + '.html' 44 | for nb in nblist} 45 | 46 | figsource = abspath_from_here('..', 'notebooks', 'figures') 47 | figdest = abspath_from_here('content', 'figures') 48 | 49 | if os.path.exists(figdest): 50 | shutil.rmtree(figdest) 51 | shutil.copytree(figsource, figdest) 52 | 53 | figurelist = os.listdir(abspath_from_here('content', 'figures')) 54 | figure_map = {os.path.join('figures', fig) : os.path.join('/PythonDataScienceHandbook/figures', fig) 55 | for fig in figurelist} 56 | 57 | for nb in nblist: 58 | base, ext = os.path.splitext(nb) 59 | print('-', nb) 60 | 61 | content = nbformat.read(os.path.join(NB_SOURCE_DIR, nb), 62 | as_version=4) 63 | 64 | if nb == 'Index.ipynb': 65 | # content[0] is the title 66 | # content[1] is the cover image 67 | # content[2] is the license 68 | cells = '1:' 69 | template = 'page' 70 | title = 'Python Data Science Handbook' 71 | content.cells[2].source = INTRO_TEXT 72 | else: 73 | # content[0] is the book information 74 | # content[1] is the navigation bar 75 | # content[2] is the title 76 | cells = '2:' 77 | template = 'booksection' 78 | title = content.cells[2].source 79 | if not title.startswith('#') or len(title.splitlines()) > 1: 80 | raise ValueError('title not found in third cell') 81 | title = title.lstrip('#').strip() 82 | 83 | # put nav below title 84 | content.cells.insert(0, content.cells.pop(2)) 85 | 86 | # Replace internal URLs and figure links in notebook 87 | for cell in content.cells: 88 | if cell.cell_type == 'markdown': 89 | for nbname, htmlname in name_map.items(): 90 | if nbname in cell.source: 91 | cell.source = cell.source.replace(nbname, htmlname) 92 | for figname, newfigname in figure_map.items(): 93 | if figname in cell.source: 94 | cell.source = cell.source.replace(figname, newfigname) 95 | if cell.source.startswith(""): 96 | # Undo replacement of notebook link in the colab badge 97 | cell.source = nb.join(cell.source.rsplit(name_map[nb], 1)) 98 | 99 | nbformat.write(content, os.path.join(NB_DEST_DIR, nb)) 100 | 101 | pagefile = os.path.join(PAGE_DEST_DIR, base + '.md') 102 | htmlfile = base.lower() + '.html' 103 | with open(pagefile, 'w') as f: 104 | f.write(PAGEFILE.format(title=title, 105 | htmlfile=htmlfile, 106 | notebook_file=nb, 107 | template=template, 108 | cells=cells)) 109 | 110 | if __name__ == '__main__': 111 | copy_notebooks() 112 | -------------------------------------------------------------------------------- /website/fabfile.py: -------------------------------------------------------------------------------- 1 | from fabric.api import * 2 | import fabric.contrib.project as project 3 | import os 4 | import shutil 5 | import sys 6 | import SocketServer 7 | 8 | from pelican.server import ComplexHTTPRequestHandler 9 | 10 | # Local path configuration (can be absolute or relative to fabfile) 11 | env.deploy_path = 'output' 12 | DEPLOY_PATH = env.deploy_path 13 | 14 | # Remote server configuration 15 | production = 'root@localhost:22' 16 | dest_path = '/var/www' 17 | 18 | # Rackspace Cloud Files configuration settings 19 | env.cloudfiles_username = 'my_rackspace_username' 20 | env.cloudfiles_api_key = 'my_rackspace_api_key' 21 | env.cloudfiles_container = 'my_cloudfiles_container' 22 | 23 | # Github Pages configuration 24 | env.github_pages_branch = "master" 25 | 26 | # Port for `serve` 27 | PORT = 8000 28 | 29 | def clean(): 30 | """Remove generated files""" 31 | if os.path.isdir(DEPLOY_PATH): 32 | shutil.rmtree(DEPLOY_PATH) 33 | os.makedirs(DEPLOY_PATH) 34 | 35 | def build(): 36 | """Build local version of site""" 37 | local('pelican -s pelicanconf.py') 38 | 39 | def rebuild(): 40 | """`build` with the delete switch""" 41 | local('pelican -d -s pelicanconf.py') 42 | 43 | def regenerate(): 44 | """Automatically regenerate site upon file modification""" 45 | local('pelican -r -s pelicanconf.py') 46 | 47 | def serve(): 48 | """Serve site at http://localhost:8000/""" 49 | os.chdir(env.deploy_path) 50 | 51 | class AddressReuseTCPServer(SocketServer.TCPServer): 52 | allow_reuse_address = True 53 | 54 | server = AddressReuseTCPServer(('', PORT), ComplexHTTPRequestHandler) 55 | 56 | sys.stderr.write('Serving on port {0} ...\n'.format(PORT)) 57 | server.serve_forever() 58 | 59 | def reserve(): 60 | """`build`, then `serve`""" 61 | build() 62 | serve() 63 | 64 | def preview(): 65 | """Build production version of site""" 66 | local('pelican -s publishconf.py') 67 | 68 | def cf_upload(): 69 | """Publish to Rackspace Cloud Files""" 70 | rebuild() 71 | with lcd(DEPLOY_PATH): 72 | local('swift -v -A https://auth.api.rackspacecloud.com/v1.0 ' 73 | '-U {cloudfiles_username} ' 74 | '-K {cloudfiles_api_key} ' 75 | 'upload -c {cloudfiles_container} .'.format(**env)) 76 | 77 | @hosts(production) 78 | def publish(): 79 | """Publish to production via rsync""" 80 | local('pelican -s publishconf.py') 81 | project.rsync_project( 82 | remote_dir=dest_path, 83 | exclude=".DS_Store", 84 | local_dir=DEPLOY_PATH.rstrip('/') + '/', 85 | delete=True, 86 | extra_opts='-c', 87 | ) 88 | 89 | def gh_pages(): 90 | """Publish to GitHub Pages""" 91 | rebuild() 92 | local("ghp-import -b {github_pages_branch} {deploy_path} -p".format(**env)) 93 | -------------------------------------------------------------------------------- /website/pelicanconf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- # 3 | from __future__ import unicode_literals 4 | 5 | AUTHOR = 'Jake VanderPlas' 6 | SITENAME = 'Python Data Science Handbook' 7 | SITESUBTITLE = u'Essential Tools for Working with Data' 8 | SITEURL = '' 9 | SITESUBURL = 'PythonDataScienceHandbook/' 10 | PATH = 'content' 11 | TIMEZONE = 'America/Los_Angeles' 12 | DEFAULT_LANG = 'en' 13 | 14 | # Feed generation is usually not desired when developing 15 | FEED_ALL_ATOM = None 16 | CATEGORY_FEED_ATOM = None 17 | TRANSLATION_FEED_ATOM = None 18 | AUTHOR_FEED_ATOM = None 19 | AUTHOR_FEED_RSS = None 20 | 21 | # Set the article URL 22 | ARTICLE_URL = 'blog/{date:%Y}/{date:%m}/{date:%d}/{slug}/' 23 | ARTICLE_SAVE_AS = 'blog/{date:%Y}/{date:%m}/{date:%d}/{slug}/index.html' 24 | 25 | DEFAULT_PAGINATION = 10 26 | 27 | # Uncomment following line if you want document-relative URLs when developing 28 | #RELATIVE_URLS = True 29 | 30 | #MARKUP = ('md', 'ipynb') 31 | #PLUGINS = ['ipynb.markup'] 32 | 33 | MARKUP = ['md'] 34 | PLUGIN_PATHS = ['./plugins', './plugins/pelican-plugins'] 35 | PLUGINS = [ 36 | 'summary', # auto-summarizing articles 37 | 'feed_summary', # use summaries for RSS, not full articles 38 | 'ipynb.liquid', # for embedding notebooks 39 | 'liquid_tags.img', # embedding images 40 | 'liquid_tags.video', # embedding videos 41 | 'liquid_tags.include_code', # including code blocks 42 | 'liquid_tags.literal' 43 | ] 44 | IGNORE_FILES = ['.ipynb_checkpoints'] 45 | 46 | # for liquid tags 47 | CODE_DIR = 'downloads/code' 48 | NOTEBOOK_DIR = 'downloads/notebooks' 49 | 50 | # THEME SETTINGS 51 | THEME = './theme/' 52 | 53 | ABOUT_PAGE = '/pages/about.html' 54 | TWITTER_USERNAME = 'jakevdp' 55 | GITHUB_USERNAME = 'jakevdp' 56 | STACKOVERFLOW_ADDRESS = 'http://stackoverflow.com/users/2937831/jakevdp' 57 | AUTHOR_WEBSITE = 'http://vanderplas.com' 58 | AUTHOR_BLOG = 'http://jakevdp.github.io' 59 | AUTHOR_CV = "http://staff.washington.edu/jakevdp/media/pdfs/CV.pdf" 60 | SHOW_ARCHIVES = True 61 | SHOW_FEED = False # Need to address large feeds 62 | 63 | ENABLE_MATHJAX = True 64 | 65 | STATIC_PATHS = ['images', 'figures', 'videos', 'downloads', 'favicon.ico'] 66 | 67 | # Footer info 68 | 69 | LICENSE_URL = "https://github.com/jakevdp/jakevdp.github.io-source/blob/master/LICENSE" 70 | LICENSE = "MIT" 71 | -------------------------------------------------------------------------------- /website/publishconf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- # 3 | from __future__ import unicode_literals 4 | 5 | # This file is only used if you use `make publish` or 6 | # explicitly specify it as your config file. 7 | 8 | import os 9 | import sys 10 | sys.path.append(os.curdir) 11 | from pelicanconf import * 12 | 13 | SITEURL = 'http://jakevdp.github.io/PythonDataScienceHandbook' 14 | RELATIVE_URLS = False 15 | 16 | SHOW_FEED = False 17 | FEED_ALL_ATOM = 'feeds/all.atom.xml' 18 | CATEGORY_FEED_ATOM = 'feeds/%s.atom.xml' 19 | FEED_USE_SUMMARY = True # from the feed_summary plugin 20 | 21 | DELETE_OUTPUT_DIRECTORY = True 22 | 23 | DISQUS_SITENAME = "pythonicperambulations" 24 | GOOGLE_ANALYTICS = "UA-34061646-1" 25 | -------------------------------------------------------------------------------- /website/theme/README.md: -------------------------------------------------------------------------------- 1 | # Pythonic Perambulations Theme 2 | 3 | This theme was adapted from that at https://github.com/danielfrg/danielfrg.github.io-source; the original is released under the Apache v2.0 license. 4 | Adaptations are contained in this directory. -------------------------------------------------------------------------------- /website/theme/static/css/icons.css: -------------------------------------------------------------------------------- 1 | /* Copied from https://github.com/porterjamesj/crowsfoot */ 2 | 3 | @font-face { 4 | font-family: 'icons'; 5 | src: url('../font/icons.eot?79801659'); 6 | src: url('../font/icons.eot?79801659#iefix') format('embedded-opentype'), 7 | url('../font/icons.woff?79801659') format('woff'), 8 | url('../font/icons.ttf?79801659') format('truetype'), 9 | url('../font/icons.svg?79801659#icons') format('svg'); 10 | font-weight: normal; 11 | font-style: normal; 12 | } 13 | /* Chrome hack: SVG is rendered more smooth in Windozze. 100% magic, uncomment if you need it. */ 14 | /* Note, that will break hinting! In other OS-es font will be not as sharp as it could be */ 15 | /* 16 | @media screen and (-webkit-min-device-pixel-ratio:0) { 17 | @font-face { 18 | font-family: 'icons'; 19 | src: url('../font/icons.svg?79801659#icons') format('svg'); 20 | } 21 | } 22 | */ 23 | 24 | [class^="icon-"]:before, [class*=" icon-"]:before { 25 | font-family: "icons"; 26 | font-style: normal; 27 | font-weight: normal; 28 | speak: none; 29 | 30 | display: inline-block; 31 | text-decoration: inherit; 32 | width: 1em; 33 | margin-right: .2em; 34 | text-align: center; 35 | /* opacity: .8; */ 36 | 37 | /* For safety - reset parent styles, that can break glyph codes*/ 38 | font-variant: normal; 39 | text-transform: none; 40 | 41 | /* fix buttons height, for twitter bootstrap */ 42 | line-height: 1em; 43 | 44 | /* Animation center compensation - margins should be symmetric */ 45 | /* remove if not needed */ 46 | margin-left: .2em; 47 | 48 | /* you can be more comfortable with increased icons size */ 49 | /* font-size: 120%; */ 50 | 51 | /* Uncomment for 3D effect */ 52 | /* text-shadow: 1px 1px 1px rgba(127, 127, 127, 0.3); */ 53 | } 54 | 55 | .icon-stackoverflow:before { content: '\e032'; } /* '' */ 56 | .icon-twitter:before { content: '\e801'; } /* '' */ 57 | .icon-facebook:before { content: '\e802'; } /* '' */ 58 | .icon-rss:before { content: '\e800'; } /* '' */ 59 | .icon-mail-alt:before { content: '\f0e0'; } /* '' */ 60 | .icon-github:before { content: '\f113'; } /* '' */ -------------------------------------------------------------------------------- /website/theme/static/font/icons.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/website/theme/static/font/icons.eot -------------------------------------------------------------------------------- /website/theme/static/font/icons.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Copyright (C) 2012 by original authors @ fontello.com 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /website/theme/static/font/icons.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/website/theme/static/font/icons.ttf -------------------------------------------------------------------------------- /website/theme/static/font/icons.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Supraav/PythonDataScienceHandbook/8a34a4f653bdbdc01415a94dc20d4e9b97438965/website/theme/static/font/icons.woff -------------------------------------------------------------------------------- /website/theme/templates/_includes/analytics.html: -------------------------------------------------------------------------------- 1 | {% if GOOGLE_UNIVERSAL_ANALYTICS %} 2 | 11 | {% elif GOOGLE_ANALYTICS %} 12 | 30 | {% endif %} 31 | -------------------------------------------------------------------------------- /website/theme/templates/_includes/disqus_thread.html: -------------------------------------------------------------------------------- 1 | {% if DISQUS_SITENAME and SITEURL and article.status != "draft" %} 2 |
3 |

Comments

4 |
5 | 16 |
17 | {% endif %} 18 | -------------------------------------------------------------------------------- /website/theme/templates/about.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}{{ page.title }}{% endblock %} 3 | {% block headerimg %}{% if page.headerimg %}{{ page.headerimg }}{% else %}{{ DEFAULT_HEADER_BG }}{% endif %}{% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |
9 |
10 |

{{ page.title }}

11 | {% if page.date %} 12 | 13 | {% endif %} 14 |
36 | 37 |
38 | {{ page.content }} 39 |
40 | 41 |
42 |
43 | {% endblock %} 44 | -------------------------------------------------------------------------------- /website/theme/templates/archives.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}Archives{% endblock %} 3 | {% block headerimg %}{{ DEFAUT_HEADER_BG }}{% endblock %} 4 | 5 | {% block content %} 6 |
7 | 27 |
28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /website/theme/templates/article.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}{{ article.title }}{% endblock %} 3 | {% block headerimg %}{% if article.headerimg %}{{ article.headerimg }}{% else %}{{ DEFAULT_HEADER_BG }}{% endif %}{% endblock %} 4 | 5 | {% block extra_head %} 6 | {% if 'angular' in article.include %} 7 | 8 | {% endif %} 9 | {% if 'jquery' in article.include %} 10 | 11 | {% endif %} 12 | {% endblock %} 13 | 14 | {% block content %} 15 |
16 |
17 |
18 |

{{ article.title }}

19 | 20 |
21 | 22 |
23 | {{ article.content }} 24 |
25 | 26 |
27 |
28 | {% for tag in article.tags %} 29 | {{ tag }} 30 | {% endfor %} 31 |
32 |
33 |
34 | 35 | {% include '_includes/disqus_thread.html' %} 36 | 37 |
38 | 39 | 42 | 43 | {% endblock %} 44 | -------------------------------------------------------------------------------- /website/theme/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | {% block title %}{% endblock %} | {{ SITENAME }} 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | {% if ENABLE_MATHJAX %} 25 | 26 | 45 | {% endif %} 46 | 47 | {% block extra_head %}{%endblock%} 48 | 49 | 50 | 51 | 79 | 80 |
81 | {% block content %}{% endblock %} 82 |
83 | 92 | {% include '_includes/analytics.html' %} 93 | 94 | 95 | -------------------------------------------------------------------------------- /website/theme/templates/booksection.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}{{ page.title }}{% endblock %} 3 | {% block headerimg %}{% if page.headerimg %}{{ page.headerimg }}{% else %}{{ DEFAULT_HEADER_BG }}{% endif %}{% endblock %} 4 | 5 | {% block content %} 6 | 7 |
8 |

9 | 10 | This is an excerpt from the Python Data Science Handbook by Jake VanderPlas; Jupyter notebooks are available on GitHub. 11 |

12 |

13 | The text is released under the CC-BY-NC-ND license, and code is released under the MIT license. If you find this content useful, please consider supporting the work by buying the book! 14 |

15 |
16 | 17 | 18 |
19 | 20 |
21 |
22 |

{{ page.title }}

23 | {% if page.date %} 24 | 25 | {% endif %} 26 |
27 | 28 |
29 | {{ page.content }} 30 |
31 | 32 |
33 |
34 | {% endblock %} 35 | -------------------------------------------------------------------------------- /website/theme/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}Home{% endblock %} 3 | {% block headerimg %}{{ DEFAULT_HEADER_BG }}{% endblock %} 4 | 5 | {% block content %} 6 |
7 | 8 | {% for article in articles_page.object_list %} 9 |
10 |
11 |

{{ article.title }}

12 | 13 |
14 | 15 |
16 | {{ article.summary }} 17 |
18 | 19 |
20 |
21 | Read more → 22 |
23 | 24 |
25 | {% for tag in article.tags %} 26 | {{ tag }} 27 | {% endfor %} 28 |
29 | 30 |
31 | 32 |
33 |
34 | {% endfor %} 35 | 36 | 48 | 49 |
50 | 51 | 52 | 53 | {% endblock %} 54 | -------------------------------------------------------------------------------- /website/theme/templates/ipynb.css: -------------------------------------------------------------------------------- 1 | { 2 | max-width: 700px; 3 | } 4 | 5 | .text_cell .prompt { 6 | display: none; 7 | } 8 | 9 | div.cell { 10 | padding: 0; 11 | } 12 | 13 | div.text_cell_render { 14 | padding: 0; 15 | } 16 | 17 | div.prompt { 18 | font-size: 13px; 19 | } 20 | 21 | div.input_prompt { 22 | padding: .7em 0.2em; 23 | } 24 | 25 | div.output_prompt { 26 | padding: .4em .2em; 27 | } 28 | 29 | div.input_area { 30 | margin: .2em 0.4em; 31 | max-width: 580px; 32 | } 33 | 34 | table.dataframe { 35 | font-family: Arial, sans-serif; 36 | font-size: 13px; 37 | line-height: 20px; 38 | } 39 | 40 | table.dataframe th, td { 41 | padding: 4px; 42 | text-align: left; 43 | } 44 | 45 | pre code { 46 | background-color: inherit; 47 | } 48 | -------------------------------------------------------------------------------- /website/theme/templates/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | padding: 0; 4 | font: 15px 'Source Sans Pro', sans-serif; 5 | line-height: 1.6em; 6 | color: #222; 7 | text-rendering: optimizeLegibility; 8 | -webkit-font-smoothing: antialiased; 9 | } 10 | a { 11 | color: #007EE5; 12 | text-decoration: none; 13 | } 14 | a:hover { 15 | color: #007EE5; 16 | text-decoration: none; 17 | } 18 | header.main-header { 19 | background: none repeat scroll 0% 0% #205F29; 20 | margin-bottom: 0px; 21 | } 22 | header.main-header a { 23 | color: #fff; 24 | } 25 | header.main-header .container { 26 | max-width: 1000px; 27 | } 28 | header.main-header .container nav a:hover { 29 | background-color: #5C881C; 30 | } 31 | article { 32 | margin: 0; 33 | } 34 | article header.about { 35 | margin-bottom: 0px; 36 | padding-bottom: 0px; 37 | } 38 | article header { 39 | margin-bottom: 20px; 40 | padding-bottom: 20px; 41 | } 42 | article header h1 { 43 | margin-bottom: 2px; 44 | font-weight: 700; 45 | color: #000; 46 | } 47 | article header time { 48 | color: #9E9E9E; 49 | font-size: 0.85em; 50 | float: right; 51 | } 52 | article header time.left { 53 | color: #9E9E9E; 54 | font-size: 0.85em; 55 | float: left; 56 | } 57 | article div.social-links ul { 58 | padding: 0px; 59 | } 60 | article div.social-links li { 61 | display: inline; 62 | font-size: 20px; 63 | } 64 | article div.social-links li a { 65 | color: #000; 66 | padding: 10px; 67 | } 68 | article div.social-links li a:hover { 69 | color: #666; 70 | text-decoration: none; 71 | } 72 | article p { 73 | font-size: 16px; 74 | margin-bottom: 20px; 75 | line-height: 1.6em; 76 | } 77 | article p.note { 78 | background: #f5f5f5; 79 | border: 1px solid #ddd; 80 | padding: 0.533em 0.733em; 81 | } 82 | article p.update { 83 | background-color: #FEEFB3; 84 | border: 1px solid #e6e68a; 85 | padding: 0.533em 0.733em; 86 | } 87 | article p.alert { 88 | background-color: #ffe2e2; 89 | border: 1px solid #ffb2b2; 90 | padding: 0.533em 0.733em; 91 | } 92 | article ul, 93 | article ol { 94 | margin-top: 0px; 95 | margin-bottom: 25px; 96 | } 97 | article li { 98 | font-size: 16px; 99 | line-height: 1.6em; 100 | } 101 | article a:hover { 102 | text-decoration: underline; 103 | } 104 | article blockquote { 105 | border-left: 2px solid #c7c7cc; 106 | color: #666; 107 | margin: 30px 0; 108 | padding: 0 0 0 25px; 109 | } 110 | article img { 111 | max-width: 100%; 112 | } 113 | article code { 114 | color: #333; 115 | background-color: #EEE; 116 | border-radius: 0; 117 | font-size: 13px; 118 | } 119 | article .meta { 120 | font-size: 11px; 121 | } 122 | article .meta a:hover { 123 | text-decoration: none; 124 | } 125 | article .meta div { 126 | margin-bottom: 20px; 127 | display: block; 128 | } 129 | article .meta a.tag { 130 | margin: 0 10px 10px 0; 131 | padding: 1px 12px; 132 | display: inline-block; 133 | font-size: 14px; 134 | color: rgba(0, 0, 0, 0.8); 135 | background: rgba(0, 0, 0, 0.05); 136 | } 137 | article .meta a.tag:hover { 138 | background: rgba(0, 0, 0, 0.15); 139 | } 140 | article .meta a.read_more, 141 | article .meta a.comments_btn { 142 | font-size: 14px; 143 | font-weight: 800; 144 | padding: 10px 20px; 145 | color: #205F29; 146 | background: #FFF; 147 | border: 1px solid #205F29; 148 | } 149 | article .meta a.read_more:hover, 150 | article .meta a.comments_btn:hover { 151 | color: #FFF; 152 | background: #5C881C; 153 | } 154 | .index { 155 | max-width: 700px; 156 | } 157 | .index article header h2 { 158 | font-size: 36px; 159 | margin-bottom: 2px; 160 | font-weight: 700; 161 | } 162 | .index article header h2 a { 163 | color: #000; 164 | } 165 | .index article header h2 a:hover { 166 | color: #007EE5; 167 | text-decoration: none; 168 | } 169 | .index .separator { 170 | padding: 40px 0 0 0; 171 | margin: 0 0 40px 0; 172 | height: 10px; 173 | border-bottom: solid 1px #CCC; 174 | } 175 | .index .pagination { 176 | display: block; 177 | margin-bottom: 100px; 178 | } 179 | .index .pagination .left { 180 | text-align: right; 181 | } 182 | .index .pagination .right { 183 | text-align: left; 184 | } 185 | .index .pagination a { 186 | display: inline-block; 187 | border: 2px solid #5C881C; 188 | margin: 0 5px; 189 | padding: 8px 20px; 190 | font-weight: bold; 191 | color: #5C881C; 192 | } 193 | .index .pagination a:hover { 194 | color: #FFF; 195 | background: #5C881C; 196 | } 197 | .post { 198 | max-width: 700px; 199 | } 200 | .post h2:before { 201 | content: "# "; 202 | font-weight: bold; 203 | color: #DDD; 204 | } 205 | .post h3:before { 206 | content: "## "; 207 | font-weight: bold; 208 | color: #DDD; 209 | } 210 | .post h4:before { 211 | content: "### "; 212 | font-weight: bold; 213 | color: #DDD; 214 | } 215 | .post article .meta { 216 | margin: 50px 0 100px; 217 | } 218 | .list { 219 | max-width: 700px; 220 | } 221 | .list ul.double-list { 222 | margin: 0 auto 60px; 223 | padding: 0; 224 | list-style-type: none; 225 | } 226 | .list ul.double-list li { 227 | padding: 5px 0; 228 | } 229 | .list ul.double-list li h2 { 230 | font-size: 1em; 231 | display: inline; 232 | font-weight: normal; 233 | } 234 | .list ul.double-list li span { 235 | font-family: sans-serif; 236 | text-transform: uppercase; 237 | text-align: right; 238 | float: right; 239 | padding-top: 3px; 240 | font-size: 12px; 241 | color: #999; 242 | } 243 | .full-width-content { 244 | padding-top: 10px; 245 | padding-left: 0px; 246 | padding-right: 0px; 247 | margin-left: -20px; 248 | margin-right: -20px; 249 | } 250 | .col-xs-1, 251 | .col-sm-1, 252 | .col-md-1, 253 | .col-lg-1, 254 | .col-xs-2, 255 | .col-sm-2, 256 | .col-md-2, 257 | .col-lg-2, 258 | .col-xs-3, 259 | .col-sm-3, 260 | .col-md-3, 261 | .col-lg-3, 262 | .col-xs-4, 263 | .col-sm-4, 264 | .col-md-4, 265 | .col-lg-4, 266 | .col-xs-5, 267 | .col-sm-5, 268 | .col-md-5, 269 | .col-lg-5, 270 | .col-xs-6, 271 | .col-sm-6, 272 | .col-md-6, 273 | .col-lg-6, 274 | .col-xs-7, 275 | .col-sm-7, 276 | .col-md-7, 277 | .col-lg-7, 278 | .col-xs-8, 279 | .col-sm-8, 280 | .col-md-8, 281 | .col-lg-8, 282 | .col-xs-9, 283 | .col-sm-9, 284 | .col-md-9, 285 | .col-lg-9, 286 | .col-xs-10, 287 | .col-sm-10, 288 | .col-md-10, 289 | .col-lg-10, 290 | .col-xs-11, 291 | .col-sm-11, 292 | .col-md-11, 293 | .col-lg-11, 294 | .col-xs-12, 295 | .col-sm-12, 296 | .col-md-12, 297 | .col-lg-12 { 298 | padding-right: 0px; 299 | padding-left: 0px; 300 | } 301 | -------------------------------------------------------------------------------- /website/theme/templates/main.less: -------------------------------------------------------------------------------- 1 | // out: ./main.css, compress: true 2 | 3 | @text-color: #222; 4 | @link-color: #007EE5; 5 | 6 | body { 7 | margin: 0; 8 | padding: 0; 9 | font: 15px 'Source Sans Pro', sans-serif; 10 | line-height: 1.6em; 11 | color: @text-color; 12 | text-rendering: optimizeLegibility; 13 | -webkit-font-smoothing: antialiased; 14 | } 15 | 16 | a { 17 | color: @link-color; 18 | text-decoration: none; 19 | } 20 | 21 | a:hover { 22 | color: @link-color; 23 | text-decoration: none; 24 | } 25 | 26 | header.main-header { 27 | background: none repeat scroll 0% 0% #205F29; 28 | margin-bottom: 0px; 29 | 30 | a { 31 | color: #fff; 32 | } 33 | 34 | .container { 35 | max-width: 1000px; 36 | 37 | nav { 38 | a:hover { 39 | background-color: #5C881C; 40 | } 41 | } 42 | } 43 | } 44 | 45 | article { 46 | margin: 0; 47 | 48 | header.about { 49 | margin-bottom: 0px; 50 | padding-bottom: 0px; 51 | } 52 | 53 | header { 54 | margin-bottom: 20px; 55 | padding-bottom: 20px; 56 | 57 | h1 { 58 | margin-bottom: 2px; 59 | font-weight: 700; 60 | color: #000; 61 | } 62 | 63 | time { 64 | color: #9E9E9E; 65 | font-size: 0.85em; 66 | float: right; 67 | } 68 | 69 | time.left { 70 | color: #9E9E9E; 71 | font-size: 0.85em; 72 | float: left; 73 | } 74 | } 75 | 76 | div.social-links { 77 | ul { 78 | padding: 0px; 79 | } 80 | li { 81 | display: inline; 82 | font-size: 20px; 83 | a { 84 | color: #000; 85 | padding: 10px; 86 | } 87 | a:hover { 88 | color: #666; 89 | text-decoration: none; 90 | } 91 | } 92 | } 93 | 94 | p { 95 | font-size: 16px; 96 | margin-bottom: 20px; 97 | line-height: 1.6em; 98 | } 99 | 100 | p.note { 101 | background: #f5f5f5; 102 | border: 1px solid #ddd; 103 | padding: 0.533em 0.733em; 104 | } 105 | 106 | p.update { 107 | background-color: #FEEFB3; 108 | border: 1px solid #e6e68a; 109 | padding: 0.533em 0.733em; 110 | } 111 | 112 | p.alert { 113 | background-color: #ffe2e2; 114 | border: 1px solid #ffb2b2; 115 | padding: 0.533em 0.733em; 116 | } 117 | 118 | ul, ol { 119 | margin-top: 0px; 120 | margin-bottom: 25px; 121 | } 122 | 123 | li { 124 | font-size: 16px; 125 | line-height: 1.6em; 126 | } 127 | 128 | a:hover { 129 | text-decoration: underline; 130 | } 131 | 132 | blockquote { 133 | border-left: 2px solid #c7c7cc; 134 | color: #666; 135 | margin: 30px 0; 136 | padding: 0 0 0 25px; 137 | } 138 | 139 | img { 140 | max-width: 100%; 141 | } 142 | 143 | code { 144 | color: #333; 145 | background-color: #EEE; 146 | border-radius: 0; 147 | font-size: 13px; 148 | } 149 | 150 | .meta { 151 | font-size: 11px; 152 | 153 | a:hover { 154 | text-decoration: none; 155 | } 156 | 157 | div { 158 | margin-bottom: 20px; 159 | display: block; 160 | } 161 | 162 | a.tag { 163 | margin: 0 10px 10px 0; 164 | padding: 1px 12px; 165 | display: inline-block; 166 | font-size: 14px; 167 | color: rgba(0,0,0,0.8);; 168 | background: rgba(0,0,0,0.05); 169 | } 170 | 171 | a.tag:hover { 172 | background: rgba(0,0,0,0.15); 173 | } 174 | 175 | a.read_more, a.comments_btn { 176 | font-size: 14px; 177 | font-weight: 800; 178 | padding: 10px 20px; 179 | color: #205F29; 180 | background: #FFF; 181 | border: 1px solid #205F29; 182 | } 183 | 184 | a.read_more:hover, a.comments_btn:hover { 185 | color: #FFF; 186 | background: #5C881C; 187 | } 188 | } 189 | } 190 | 191 | .index { 192 | max-width: 700px; 193 | 194 | article { 195 | header { 196 | h2 { 197 | font-size: 36px; 198 | margin-bottom: 2px; 199 | font-weight: 700; 200 | 201 | a { 202 | color: #000; 203 | } 204 | a:hover { 205 | color: @link-color; 206 | text-decoration: none; 207 | } 208 | } 209 | } 210 | } 211 | 212 | .separator { 213 | padding: 40px 0 0 0; 214 | margin: 0 0 40px 0; 215 | height: 10px; 216 | border-bottom: solid 1px #CCC; 217 | } 218 | 219 | .pagination { 220 | display: block; 221 | margin-bottom: 100px; 222 | 223 | .left { 224 | text-align: right; 225 | } 226 | 227 | .right { 228 | text-align: left; 229 | } 230 | 231 | a { 232 | display: inline-block; 233 | border: 2px solid #5C881C; 234 | margin: 0 5px; 235 | padding: 8px 20px; 236 | font-weight: bold; 237 | color: #5C881C; 238 | } 239 | 240 | a:hover { 241 | color: #FFF; 242 | background: #5C881C; 243 | } 244 | } 245 | } 246 | 247 | .post { 248 | max-width: 700px; 249 | 250 | h2:before { 251 | content: "# "; 252 | font-weight: bold; 253 | color: #DDD; 254 | } 255 | 256 | h3:before { 257 | content: "## "; 258 | font-weight: bold; 259 | color: #DDD; 260 | } 261 | 262 | h4:before { 263 | content: "### "; 264 | font-weight: bold; 265 | color: #DDD; 266 | } 267 | 268 | article { 269 | .meta { 270 | margin: 50px 0 100px; 271 | } 272 | } 273 | } 274 | 275 | .list { 276 | max-width: 700px; 277 | 278 | ul.double-list { 279 | margin: 0 auto 60px; 280 | padding: 0; 281 | list-style-type: none; 282 | 283 | li { 284 | padding: 5px 0; 285 | 286 | h2 { 287 | font-size: 1em; 288 | display: inline; 289 | font-weight: normal; 290 | } 291 | 292 | span { 293 | font-family: sans-serif; 294 | text-transform: uppercase; 295 | text-align: right; 296 | float: right; 297 | padding-top: 3px; 298 | font-size: 12px; 299 | color: #999; 300 | } 301 | } 302 | } 303 | } 304 | 305 | .full-width-content { 306 | padding-top: 10px; 307 | padding-left: 0px; 308 | padding-right: 0px; 309 | margin-left: -20px; 310 | margin-right: -20px; 311 | } 312 | 313 | .col-xs-1, .col-sm-1, .col-md-1, .col-lg-1, .col-xs-2, .col-sm-2, .col-md-2, .col-lg-2, .col-xs-3, .col-sm-3, .col-md-3, .col-lg-3, .col-xs-4, .col-sm-4, .col-md-4, .col-lg-4, .col-xs-5, .col-sm-5, .col-md-5, .col-lg-5, .col-xs-6, .col-sm-6, .col-md-6, .col-lg-6, .col-xs-7, .col-sm-7, .col-md-7, .col-lg-7, .col-xs-8, .col-sm-8, .col-md-8, .col-lg-8, .col-xs-9, .col-sm-9, .col-md-9, .col-lg-9, .col-xs-10, .col-sm-10, .col-md-10, .col-lg-10, .col-xs-11, .col-sm-11, .col-md-11, .col-lg-11, .col-xs-12, .col-sm-12, .col-md-12, .col-lg-12 { 314 | padding-right: 0px; 315 | padding-left: 0px; 316 | } 317 | -------------------------------------------------------------------------------- /website/theme/templates/page.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}{{ page.title }}{% endblock %} 3 | {% block headerimg %}{% if page.headerimg %}{{ page.headerimg }}{% else %}{{ DEFAULT_HEADER_BG }}{% endif %}{% endblock %} 4 | 5 | {% block content %} 6 |
7 | 8 |
9 |
10 |

{{ page.title }}

11 | {% if page.date %} 12 | 13 | {% endif %} 14 |
15 | 16 |
17 | {{ page.content }} 18 |
19 | 20 |
21 |
22 | {% endblock %} 23 | -------------------------------------------------------------------------------- /website/theme/templates/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight .c { color: #60a0b0; font-style: italic } /* Comment */ 3 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 4 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 5 | .highlight .o { color: #666666 } /* Operator */ 6 | .highlight .cm { color: #60a0b0; font-style: italic } /* Comment.Multiline */ 7 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 8 | .highlight .c1 { color: #60a0b0; font-style: italic } /* Comment.Single */ 9 | .highlight .cs { color: #60a0b0; background-color: #fff0f0 } /* Comment.Special */ 10 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 11 | .highlight .ge { font-style: italic } /* Generic.Emph */ 12 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 13 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 14 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 15 | .highlight .go { color: #808080 } /* Generic.Output */ 16 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 17 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 18 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 19 | .highlight .gt { color: #0040D0 } /* Generic.Traceback */ 20 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 21 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 22 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 23 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 24 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 25 | .highlight .kt { color: #902000 } /* Keyword.Type */ 26 | .highlight .m { color: #40a070 } /* Literal.Number */ 27 | .highlight .s { color: #4070a0 } /* Literal.String */ 28 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 29 | .highlight .nb { color: #007020 } /* Name.Builtin */ 30 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 31 | .highlight .no { color: #60add5 } /* Name.Constant */ 32 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 33 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 34 | .highlight .ne { color: #007020 } /* Name.Exception */ 35 | .highlight .nf { color: #06287e } /* Name.Function */ 36 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 37 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 38 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 39 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 40 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 41 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 42 | .highlight .mf { color: #40a070 } /* Literal.Number.Float */ 43 | .highlight .mh { color: #40a070 } /* Literal.Number.Hex */ 44 | .highlight .mi { color: #40a070 } /* Literal.Number.Integer */ 45 | .highlight .mo { color: #40a070 } /* Literal.Number.Oct */ 46 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 47 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 48 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 49 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 50 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 51 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 52 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 53 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 54 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 55 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 56 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 57 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 58 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 59 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 60 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 61 | .highlight .il { color: #40a070 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /website/theme/templates/tag.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}Archives{% endblock %} 3 | {% block headerimg %}{{ DEFAUT_HEADER_BG }}{% endblock %} 4 | 5 | {% block content %} 6 |
7 | 21 |
22 | {% endblock %} 23 | --------------------------------------------------------------------------------