├── .gitignore ├── LICENSE ├── LICENSE-CODE ├── LICENSE-TEXT ├── README.md ├── notebooks ├── 00.00-Preface.ipynb ├── 01.00-IPython-Beyond-Normal-Python.ipynb ├── 01.01-Help-And-Documentation.ipynb ├── 01.02-Shell-Keyboard-Shortcuts.ipynb ├── 01.03-Magic-Commands.ipynb ├── 01.04-Input-Output-History.ipynb ├── 01.05-IPython-And-Shell-Commands.ipynb ├── 01.06-Errors-and-Debugging.ipynb ├── 01.07-Timing-and-Profiling.ipynb ├── 01.08-More-IPython-Resources.ipynb ├── 02.00-Introduction-to-NumPy.ipynb ├── 02.01-Understanding-Data-Types.ipynb ├── 02.02-The-Basics-Of-NumPy-Arrays.ipynb ├── 02.03-Computation-on-arrays-ufuncs.ipynb ├── 02.04-Computation-on-arrays-aggregates.ipynb ├── 02.05-Computation-on-arrays-broadcasting.ipynb ├── 02.06-Boolean-Arrays-and-Masks.ipynb ├── 02.07-Fancy-Indexing.ipynb ├── 02.08-Sorting.ipynb ├── 02.09-Structured-Data-NumPy.ipynb ├── 03.00-Introduction-to-Pandas.ipynb ├── 03.01-Introducing-Pandas-Objects.ipynb ├── 03.02-Data-Indexing-and-Selection.ipynb ├── 03.03-Operations-in-Pandas.ipynb ├── 03.04-Missing-Values.ipynb ├── 03.05-Hierarchical-Indexing.ipynb ├── 03.06-Concat-And-Append.ipynb ├── 03.07-Merge-and-Join.ipynb ├── 03.08-Aggregation-and-Grouping.ipynb ├── 03.09-Pivot-Tables.ipynb ├── 03.10-Working-With-Strings.ipynb ├── 03.11-Working-with-Time-Series.ipynb ├── 03.12-Performance-Eval-and-Query.ipynb ├── 03.13-Further-Resources.ipynb ├── 04.00-Introduction-To-Matplotlib.ipynb ├── 04.01-Simple-Line-Plots.ipynb ├── 04.02-Simple-Scatter-Plots.ipynb ├── 04.03-Errorbars.ipynb ├── 04.04-Density-and-Contour-Plots.ipynb ├── 04.05-Histograms-and-Binnings.ipynb ├── 04.06-Customizing-Legends.ipynb ├── 04.07-Customizing-Colorbars.ipynb ├── 04.08-Multiple-Subplots.ipynb ├── 04.09-Text-and-Annotation.ipynb ├── 04.10-Customizing-Ticks.ipynb ├── 04.11-Settings-and-Stylesheets.ipynb ├── 04.12-Three-Dimensional-Plotting.ipynb ├── 04.13-Geographic-Data-With-Basemap.ipynb ├── 04.14-Visualization-With-Seaborn.ipynb ├── 04.15-Further-Resources.ipynb ├── 05.00-Machine-Learning.ipynb ├── 05.01-What-Is-Machine-Learning.ipynb ├── 05.02-Introducing-Scikit-Learn.ipynb ├── 05.03-Hyperparameters-and-Model-Validation.ipynb ├── 05.04-Feature-Engineering.ipynb ├── 05.05-Naive-Bayes.ipynb ├── 05.06-Linear-Regression.ipynb ├── 05.07-Support-Vector-Machines.ipynb ├── 05.08-Random-Forests.ipynb ├── 05.09-Principal-Component-Analysis.ipynb ├── 05.10-Manifold-Learning.ipynb ├── 05.11-K-Means.ipynb ├── 05.12-Gaussian-Mixtures.ipynb ├── 05.13-Kernel-Density-Estimation.ipynb ├── 05.14-Image-Features.ipynb ├── 05.15-Learning-More.ipynb ├── 06.00-Figure-Code.ipynb ├── Index.ipynb ├── data │ ├── 20170107-061401-recipeitems.json.bz2 │ ├── BicycleWeather.csv │ ├── FremontBridge.csv │ ├── Seattle2014.csv │ ├── births.csv │ ├── california_cities.csv │ ├── gistemp250.nc.bz2 │ ├── marathon-data.csv │ ├── president_heights.csv │ ├── state-abbrevs.csv │ ├── state-areas.csv │ ├── state-population.csv │ └── tips.csv ├── figures │ ├── 02.05-broadcasting.png │ ├── 03.08-split-apply-combine.png │ ├── 05.01-classification-1.png │ ├── 05.01-classification-2.png │ ├── 05.01-classification-3.png │ ├── 05.01-clustering-1.png │ ├── 05.01-clustering-2.png │ ├── 05.01-dimesionality-1.png │ ├── 05.01-dimesionality-2.png │ ├── 05.01-regression-1.png │ ├── 05.01-regression-2.png │ ├── 05.01-regression-3.png │ ├── 05.01-regression-4.png │ ├── 05.02-samples-features.png │ ├── 05.03-2-fold-CV.png │ ├── 05.03-5-fold-CV.png │ ├── 05.03-bias-variance-2.png │ ├── 05.03-bias-variance.png │ ├── 05.03-learning-curve.png │ ├── 05.03-validation-curve.png │ ├── 05.05-gaussian-NB.png │ ├── 05.06-gaussian-basis.png │ ├── 05.08-decision-tree-levels.png │ ├── 05.08-decision-tree-overfitting.png │ ├── 05.08-decision-tree.png │ ├── 05.09-PCA-rotation.png │ ├── 05.09-digits-pca-components.png │ ├── 05.09-digits-pixel-components.png │ ├── 05.10-LLE-vs-MDS.png │ ├── 05.11-expectation-maximization.png │ ├── 05.12-covariance-type.png │ ├── Data_Science_VD.png │ ├── PDSH-cover-small.png │ ├── PDSH-cover.png │ ├── array_vs_list.png │ └── cint_vs_pyint.png ├── hello.png ├── helpers_05_08.py └── my_figure.png └── printable ├── 00.00-Preface.pdf ├── 01.00-IPython-Beyond-Normal-Python.pdf ├── 01.01-Help-And-Documentation.pdf ├── 01.02-Shell-Keyboard-Shortcuts.pdf ├── 01.03-Magic-Commands.pdf ├── 01.04-Input-Output-History.pdf ├── 01.05-IPython-And-Shell-Commands.pdf ├── 01.06-Errors-and-Debugging.pdf ├── 01.07-Timing-and-Profiling.pdf ├── 01.08-More-IPython-Resources.pdf ├── 02.00-Introduction-to-NumPy.pdf ├── 02.01-Understanding-Data-Types.pdf ├── 02.02-The-Basics-Of-NumPy-Arrays.pdf ├── 02.03-Computation-on-arrays-ufuncs.pdf ├── 02.04-Computation-on-arrays-aggregates.pdf ├── 02.05-Computation-on-arrays-broadcasting.pdf ├── 02.06-Boolean-Arrays-and-Masks.pdf ├── 02.07-Fancy-Indexing.pdf ├── 02.08-Sorting.pdf ├── 02.09-Structured-Data-NumPy.pdf ├── 03.00-Introduction-to-Pandas.pdf ├── 03.01-Introducing-Pandas-Objects.pdf ├── 03.02-Data-Indexing-and-Selection.pdf ├── 03.03-Operations-in-Pandas.pdf ├── 03.04-Missing-Values.pdf ├── 03.05-Hierarchical-Indexing.pdf ├── 03.06-Concat-And-Append.pdf ├── 03.07-Merge-and-Join.pdf ├── 03.08-Aggregation-and-Grouping.pdf ├── 03.09-Pivot-Tables.pdf ├── 03.10-Working-With-Strings.pdf ├── 03.11-Working-with-Time-Series.pdf ├── 03.12-Performance-Eval-and-Query.pdf ├── 03.13-Further-Resources.pdf ├── 04.00-Introduction-To-Matplotlib.pdf ├── 04.01-Simple-Line-Plots.pdf ├── 04.02-Simple-Scatter-Plots.pdf ├── 04.03-Errorbars.pdf ├── 04.04-Density-and-Contour-Plots.pdf ├── 04.05-Histograms-and-Binnings.pdf ├── 04.06-Customizing-Legends.pdf ├── 04.07-Customizing-Colorbars.pdf ├── 04.08-Multiple-Subplots.pdf ├── 04.09-Text-and-Annotation.pdf ├── 04.10-Customizing-Ticks.pdf ├── 04.11-Settings-and-Stylesheets.pdf ├── 04.12-Three-Dimensional-Plotting.pdf ├── 04.13-Geographic-Data-With-Basemap.pdf ├── 04.14-Visualization-With-Seaborn.pdf ├── 04.15-Further-Resources.pdf ├── 05.00-Machine-Learning.pdf ├── 05.01-What-Is-Machine-Learning.pdf ├── 05.02-Introducing-Scikit-Learn.pdf ├── 05.03-Hyperparameters-and-Model-Validation.pdf ├── 05.04-Feature-Engineering.pdf ├── 05.05-Naive-Bayes.pdf ├── 05.06-Linear-Regression.pdf ├── 05.07-Support-Vector-Machines.pdf ├── 05.08-Random-Forests.pdf ├── 05.09-Principal-Component-Analysis.pdf ├── 05.10-Manifold-Learning.pdf ├── 05.11-K-Means.pdf ├── 05.12-Gaussian-Mixtures.pdf ├── 05.13-Kernel-Density-Estimation.pdf ├── 05.14-Image-Features.pdf ├── 05.15-Learning-More.pdf ├── 06.00-Figure-Code.pdf ├── Index.pdf └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # nprun demo 107 | notebooks/mprun_demo.py 108 | notebooks/data/20170107-061401-recipeitems.json 109 | notebooks/data/gistemp250.nc 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /LICENSE-CODE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Jacob VanderPlas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Data Science Handbook 2 | 3 | # Python数据科学手册 4 | 5 | > [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/jakevdp/PythonDataScienceHandbook/master?filepath=notebooks%2FIndex.ipynb) 6 | > [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/Index.ipynb) 7 | 8 | [![在线Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/wangyingsm/Python-Data-Science-Handbook/master?filepath=notebooks%2FIndex.ipynb) 9 | [![谷歌Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wangyingsm/Python-Data-Science-Handbook/blob/master/notebooks/Index.ipynb) 10 | 11 | **[本书Github免费notebook格式中文翻译目录](notebooks/Index.ipynb)** 12 | 13 | **[下载中文版PDF可打印格式](printable/README.md)** 14 | 15 | > This repository contains the entire [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do), in the form of (free!) Jupyter notebooks. 16 | 17 | 本代码仓库包含着整本[Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do)书籍,使用免费的Jupyter notebook格式呈现。 18 | 19 | 译者注:作者英文版[原仓库地址](https://github.com/jakevdp/PythonDataScienceHandbook)。 20 | 21 | ![cover image](notebooks/figures/PDSH-cover.png) 22 | 23 | ## How to Use this Book 24 | 25 | ## 如何阅读本书 26 | 27 | > - Read the book in its entirety online at https://jakevdp.github.io/PythonDataScienceHandbook/ 28 | 29 | > - Run the code using the Jupyter notebooks available in this repository's [notebooks](notebooks) directory. 30 | 31 | > - Launch executable versions of these notebooks using [Google Colab](http://colab.research.google.com): [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/Index.ipynb) 32 | 33 | > - Launch a live notebook server with these notebooks using [binder](https://beta.mybinder.org/): [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/jakevdp/PythonDataScienceHandbook/master?filepath=notebooks%2FIndex.ipynb) 34 | 35 | > - Buy the printed book through [O'Reilly Media](http://shop.oreilly.com/product/0636920034919.do) 36 | 37 | - 在线阅读本书(英文原版):https://jakevdp.github.io/PythonDataScienceHandbook/ 。 38 | - 使用Jupyter notebook运行本仓库的代码,代码在[notebooks](notebooks)目录下。 39 | - 在[谷歌Colab](http://colab.research.google.com): [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wangyingsm/Python-Data-Science-Handbook/blob/master/notebooks/Index.ipynb)上运行这些notebooks。 40 | - 在[在线binder](https://beta.mybinder.org/): [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/wangyingsm/Python-Data-Science-Handbook/master?filepath=notebooks%2FIndex.ipynb)上运行这些notebooks。 41 | - 在[O'Reilly Media](http://shop.oreilly.com/product/0636920034919.do)网站上购买本书(英文原版)纸质版。 42 | 43 | ## About 44 | 45 | ## 关于 46 | 47 | > The book was written and tested with Python 3.5, though other Python versions (including Python 2.7) should work in nearly all cases. 48 | 49 | 本书在Python 3.5环境中编写和测试,但其他的Python版本(包括Python 2.7)也应该可以通过绝大部分示例。 50 | 51 | > The book introduces the core libraries essential for working with data in Python: particularly [IPython](http://ipython.org), [NumPy](http://numpy.org), [Pandas](http://pandas.pydata.org), [Matplotlib](http://matplotlib.org), [Scikit-Learn](http://scikit-learn.org), and related packages. 52 | Familiarity with Python as a language is assumed; if you need a quick introduction to the language itself, see the free companion project, 53 | [A Whirlwind Tour of Python](https://github.com/jakevdp/WhirlwindTourOfPython): it's a fast-paced introduction to the Python language aimed at researchers and scientists. 54 | 55 | 本书介绍了在Python中操作数据的核心库:特别包括 [IPython](http://ipython.org), [NumPy](http://numpy.org), [Pandas](http://pandas.pydata.org), [Matplotlib](http://matplotlib.org), [Scikit-Learn](http://scikit-learn.org) 和其他相关的包。预设前提是读者已经熟悉Python语言;如果你需要语言本身的一个快速入门介绍,可以去参考兄弟项目,[A Whirlwind Tour of Python](https://github.com/jakevdp/WhirlwindTourOfPython): 这是一个面向研究人员和科学家的快速Python语言入门介绍。 56 | 57 | 译者注:[Python旋风之旅](https://github.com/wangyingsm/wwtop)中文翻译已经全部完成。 58 | 59 | > See [Index.ipynb](http://nbviewer.jupyter.org/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/Index.ipynb) for an index of the notebooks available to accompany the text. 60 | 61 | 可以参见[目录](notebooks/Index.ipynb)查看所有notebooks的列表。 62 | 63 | ## Software 64 | 65 | ## 软件 66 | 67 | > The code in the book was tested with Python 3.5, though most (but not all) will also work correctly with Python 2.7 and other older Python versions. 68 | 69 | 本书的代码在Python 3.5环境中测试通过,但绝大部分情况下,代码都能在Python 2.7和其他更早的版本下正常使用。 70 | 71 | 译者注:Python2将于2020年1月停止维护,因此强烈不建议继续使用Python2。 72 | 73 | > The packages I used to run the code in the book are listed in [requirements.txt](requirements.txt) (Note that some of these exact version numbers may not be available on your platform: you may have to tweak them for your own use). 74 | To install the requirements using [conda](http://conda.pydata.org), run the following at the command-line: 75 | 76 | ``` 77 | $ conda install --file requirements.txt 78 | ``` 79 | 80 | 运行本书的代码需要用到的第三方包列在 [requirements.txt](requirements.txt) 文件中(请注意,部分确定版本的包可能并不适用于你的系统:你可能需要根据需要调整它们)。要使用 [conda](http://conda.pydata.org) 安装需要的依赖包,可以执行下面的命令: 81 | 82 | ```shell 83 | $ conda install --file requirements.txt 84 | ``` 85 | 86 | > To create a stand-alone environment named ``PDSH`` with Python 3.5 and all the required package versions, run the following: 87 | 88 | ``` 89 | $ conda create -n PDSH python=3.5 --file requirements.txt 90 | ``` 91 | 92 | 如果需要创建一个独立的Python环境,例如叫`PDSH`,使用Python 3.5环境和安装所需的第三方包,执行下面的命令: 93 | 94 | ```shell 95 | $ conda create -n PDSH python=3.5 --file requirements.txt 96 | ``` 97 | 98 | > You can read more about using conda environments in the [Managing Environments](http://conda.pydata.org/docs/using/envs.html) section of the conda documentation. 99 | 100 | 可以参见conda文档的[管理Python环境](http://conda.pydata.org/docs/using/envs.html)章节了解更多的内容。 101 | 102 | ## License 103 | 104 | ## 协议 105 | 106 | ### Code 107 | 108 | ### 代码 109 | 110 | 不翻译了,[MIT license](LICENSE-CODE)。 111 | 112 | > The code in this repository, including all code samples in the notebooks listed above, is released under the [MIT license](LICENSE-CODE). Read more at the [Open Source Initiative](https://opensource.org/licenses/MIT). 113 | 114 | ### Text 115 | 116 | ### 文字 117 | 118 | 不翻译了,[CC-BY-NC-ND license](LICENSE-TEXT)。 119 | 120 | > The text content of the book is released under the [CC-BY-NC-ND license](LICENSE-TEXT). Read more at [Creative Commons](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode). 121 | -------------------------------------------------------------------------------- /notebooks/00.00-Preface.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "| [目录](Index.ipynb) | [IPython:超越Python解释器](01.00-IPython-Beyond-Normal-Python.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Preface\n", 18 | "\n", 19 | "# 序言" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## What Is Data Science?\n", 27 | "\n", 28 | "## 什么是数据科学?\n", 29 | "\n", 30 | "> This is a book about doing data science with Python, which immediately begs the question: what is *data science*?\n", 31 | "It's a surprisingly hard definition to nail down, especially given how ubiquitous the term has become.\n", 32 | "Vocal critics have variously dismissed the term as a superfluous label (after all, what science doesn't involve data?) or a simple buzzword that only exists to salt resumes and catch the eye of overzealous tech recruiters.\n", 33 | "\n", 34 | "这是一本介绍使用Python完成数据科学工作的书,那么立刻就会带来一个问题:什么是*数据科学*?这是一个十分难以定义的概念,尤其是这几年这个术语几乎随处可见。批评的声音认为这是一个多余的标签(毕竟,哪样科学不包含数据呢?)或者这只是一个为了博取关注而产生的流行词汇。\n", 35 | "\n", 36 | "> In my mind, these critiques miss something important.\n", 37 | "Data science, despite its hype-laden veneer, is perhaps the best label we have for the cross-disciplinary set of skills that are becoming increasingly important in many applications across industry and academia.\n", 38 | "This cross-disciplinary piece is key: in my mind, the best extisting definition of data science is illustrated by Drew Conway's Data Science Venn Diagram, first published on his blog in September 2010:\n", 39 | "\n", 40 | "在作者看来,这些批评忽略了一些重要的东西。数据科学,除了部分炒作的成分外,可能是目前我们能够找到的最合适的词汇来表达这种跨学科领域的技术了,特别是越来越多的工业和学术应用都在使用它。这里的关键是跨学科领域:作者认为,最好表达数据科学的定义的方式是2010年9月Drew Conway在他的博客里面发表的下面这张图:" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "![Data Science Venn Diagram](https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/7fac9497d573c8f3ea3545b7fcb0a98d59e1c9cb/notebooks/figures/Data_Science_VD.png)\n", 48 | "\n", 49 | "(Source: [Drew Conway](http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram). Used by permission.)\n", 50 | "\n", 51 | "(来源: [Drew Conway](http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram)。授权使用。)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "> While some of the intersection labels are a bit tongue-in-cheek, this diagram captures the essence of what I think people mean when they say \"data science\": it is fundamentally an *interdisciplinary* subject.\n", 59 | "Data science comprises three distinct and overlapping areas: the skills of a *statistician* who knows how to model and summarize datasets (which are growing ever larger); the skills of a *computer scientist* who can design and use algorithms to efficiently store, process, and visualize this data; and the *domain expertise*—what we might think of as \"classical\" training in a subject—necessary both to formulate the right questions and to put their answers in context.\n", 60 | "\n", 61 | "虽然图中,圆形重叠部分的标签看起来很有些嘲讽的意味,但这张图把握了当人们使用“数据科学”这个术语时候的精髓:最根本来说,数据科学是一门交叉学科。数据科学有三个领域交叉而成:需要*统计学家*来对数据集(正在变得越来越巨大)进行建模和统计;需要*计算机科学家*来使用算法有效地存储、处理和展现这些数据;还需要*领域专家*(通常在传统意义上我们就是这么做的)来在相关垂直领域整理出正确的问题和相应的解决方法。\n", 62 | "\n", 63 | "> With this in mind, I would encourage you to think of data science not as a new domain of knowledge to learn, but a new set of skills that you can apply within your current area of expertise.\n", 64 | "Whether you are reporting election results, forecasting stock returns, optimizing online ad clicks, identifying microorganisms in microscope photos, seeking new classes of astronomical objects, or working with data in any other field, the goal of this book is to give you the ability to ask and answer new questions about your chosen subject area.\n", 65 | "\n", 66 | "根据上述解释,读者与其将数据科学当成是一个新的知识领域来学习,还不如将你已有的专业知识融会贯通,发展出新的数据科学技巧。无论你是在统计选举结果、预测股市回报、优化在线广告点击、在显微镜图像中识别微小组织、寻找一类新的天文物体、或者是其他任何与数据相关的工作,本书的目标就是为你提供一种新的能力来提出和解答该领域的相关问题。" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Who Is This Book For?\n", 74 | "\n", 75 | "## 谁适合读这本书?\n", 76 | "\n", 77 | "> In my teaching both at the University of Washington and at various tech-focused conferences and meetups, one of the most common questions I have heard is this: \"how should I learn Python?\"\n", 78 | "The people asking are generally technically minded students, developers, or researchers, often with an already strong background in writing code and using computational and numerical tools.\n", 79 | "Most of these folks don't want to learn Python *per se*, but want to learn the language with the aim of using it as a tool for data-intensive and computational science.\n", 80 | "While a large patchwork of videos, blog posts, and tutorials for this audience is available online, I've long been frustrated by the lack of a single good answer to this question; that is what inspired this book.\n", 81 | "\n", 82 | "在作者华盛顿大学教学经历和在其他论坛会议演讲的过程中,最多被问到的问题之一就是:“我应该怎样学习Python?”\n", 83 | "提问者包括想在技术上深造的学生、开发人员或者研究人员,而且他们往往已经具备很强大的代码编写和使用数值计算工具的背景。他们其实并不是渴望学习Python语言*本身*,而只是想要学习Python语言有关数据方面或科学数值计算方面的内容。\n", 84 | "网络上已经有数之不尽的视频、博客和教程,很难找到一个关于这个问题的唯一答案。是什么促使作者写这本书。\n", 85 | "\n", 86 | "> The book is not meant to be an introduction to Python or to programming in general; I assume the reader has familiarity with the Python language, including defining functions, assigning variables, calling methods of objects, controlling the flow of a program, and other basic tasks.NumPy, Pandas, Matplotlib, Scikit-Learn, and related tools–to effectively store, manipulate, and gain insight from data.\n", 87 | "\n", 88 | "本书不会作为Python语言的通用介绍;作者假定读者对于Python语言已经比较熟悉,包括函数定义,变量赋值,对象方法调用,程序流程控制,和其他基本的任何。使用Numpy、Pandas、Matplotlib、Scikit-Learn和相关的工具来存储、处理和展示数据。" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Why Python?\n", 96 | "\n", 97 | "## 为什么要用Python?\n", 98 | "\n", 99 | "> Python has emerged over the last couple decades as a first-class tool for scientific computing tasks, including the analysis and visualization of large datasets.\n", 100 | "This may have come as a surprise to early proponents of the Python language: the language itself was not specifically designed with data analysis or scientific computing in mind.\n", 101 | "The usefulness of Python for data science stems primarily from the large and active ecosystem of third-party packages: *NumPy* for manipulation of homogeneous array-based data, *Pandas* for manipulation of heterogeneous and labeled data, *SciPy* for common scientific computing tasks, *Matplotlib* for publication-quality visualizations, *IPython* for interactive execution and sharing of code, *Scikit-Learn* for machine learning, and many more tools that will be mentioned in the following pages.\n", 102 | "\n", 103 | "Python在最近20年已经发展成为科学计算包括分析和展示大型数据集的最流行工具。这对于Python语言的早期支持者来说是一个惊喜:因为这门语言本身并不是专门为了数据分析和科学计算来设计的。\n", 104 | "Python在数据科学中的蓬勃发展主要来源于其大量活跃的第三方包:*Numpy*用于处理同类的数组结构数据;*Pandas*用于处理不同种类和标签化的数据;*SciPy*用于通用的科学运算任务;*Matplotlib*用于可打印标准的图表展示;*IPython*用于交互式执行和共享代码;*Scikit-Learn*用于机器学习,这些工具将在后续的章节中介绍。\n", 105 | "\n", 106 | "> If you are looking for a guide to the Python language itself, I would suggest the sister project to this book, \"[A Whirlwind Tour of the Python Language](https://github.com/jakevdp/WhirlwindTourOfPython)\".\n", 107 | "This short report provides a tour of the essential features of the Python language, aimed at data scientists who already are familiar with one or more other programming languages.\n", 108 | "\n", 109 | "如果你需要的是Python语言本身的指引,作者推荐本项目的兄弟项目\"[A Whirlwind Tour of the Python Language](https://github.com/jakevdp/WhirlwindTourOfPython)\"(译者注:[Python旋风之旅](https://github.com/wangyingsm/wwtop)中文版已经全部翻译完成)。这个项目提供了Python语言最基本特性的一个简单介绍,针对已经掌握了一门或更多其他编程语言数据科学家。" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Python 2 vs Python 3\n", 117 | "\n", 118 | "### Python 2 还是 3\n", 119 | "\n", 120 | "> This book uses the syntax of Python 3, which contains language enhancements that are not compatible with the 2.x series of Python.\n", 121 | "Though Python 3.0 was first released in 2008, adoption has been relatively slow, particularly in the scientific and web development communities.\n", 122 | "This is primarily because it took some time for many of the essential third-party packages and toolkits to be made compatible with the new language internals.\n", 123 | "Since early 2014, however, stable releases of the most important tools in the data science ecosystem have been fully compatible with both Python 2 and 3, and so this book will use the newer Python 3 syntax.\n", 124 | "However, the vast majority of code snippets in this book will also work without modification in Python 2: in cases where a Py2-incompatible syntax is used, I will make every effort to note it explicitly.\n", 125 | "\n", 126 | "本书采用Python 3的语法编写,内含一些2.x版本不具备的语言增强特性。虽然Python 3.0在2008年就已经发布,但是转换并不迅速,尤其在科学和Web开发社区中。这主要是因为很多核心的第三方库和工具需要时间才能兼容新的语言版本特性。自2014年初起,大多数重要的数据科学生态工具都已经发布了兼容Python 2和3的稳定版本,因此本书采用新的Python 3语法。然而,本书很大一部分代码片段都能不需修改地运行在Python 2环境:在使用了Py2不兼容的语法的地方,作者会尽力标明。\n", 127 | "\n", 128 | "译者注:Python 2将于2020年1月1日停止维护,因此强烈建议读者不要继续使用Python 2环境编写代码。" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Outline of the Book\n", 136 | "\n", 137 | "## 大纲\n", 138 | "\n", 139 | "> Each chapter of this book focuses on a particular package or tool that contributes a fundamental piece of the Python Data Sciece story.\n", 140 | "\n", 141 | "> 1. IPython and Jupyter: these packages provide the computational environment in which many Python-using data scientists work.\n", 142 | "> 2. NumPy: this library provides the ``ndarray`` for efficient storage and manipulation of dense data arrays in Python.\n", 143 | "> 3. Pandas: this library provides the ``DataFrame`` for efficient storage and manipulation of labeled/columnar data in Python.\n", 144 | "> 4. Matplotlib: this library provides capabilities for a flexible range of data visualizations in Python.\n", 145 | "> 5. Scikit-Learn: this library provides efficient & clean Python implementations of the most important and established machine learning algorithms.\n", 146 | "\n", 147 | "本书的每一章都聚焦于一个特定的包或工具,它对数据科学某个方面都有重要的应用和帮助。\n", 148 | "\n", 149 | "1. IPython 和 Jupyter: 这两个包提供了使用Python的数据科学家最喜爱的计算环境。\n", 150 | "2. NumPy: 这个包提供了`ndarray`对象用于有效的存储和处理数组中的非稀疏数据。\n", 151 | "3. Pandas: 这个包提供了`DataFrame`对象用于有效的存储和处理标签化的基于列结构的数据。\n", 152 | "4. Matplotlib: 这个包提供了最灵活的数据图表展示功能。\n", 153 | "5. Scikit-Learn: 这个包提供了很多重要的机器学习算法以及有效和简洁的Python实现。\n", 154 | "\n", 155 | "> The PyData world is certainly much larger than these five packages, and is growing every day.\n", 156 | "With this in mind, I make every attempt through these pages to provide references to other interesting efforts, projects, and packages that are pushing the boundaries of what can be done in Python.\n", 157 | "Nevertheless, these five are currently fundamental to much of the work being done in the Python data science space, and I expect they will remain important even as the ecosystem continues growing around them.\n", 158 | "\n", 159 | "Python的数据科学领域肯定远远不止这5个包,而且每天都在不断增长。作者会在每个章节都尽量提供其他有趣的项目和包的推荐。无论如何,这五个包目前是Python数据科学领域最基础的内容,作者期待他们会在未来依然保持其重要性,甚至在生态持续发展的情况下。" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Using Code Examples\n", 167 | "\n", 168 | "## 使用代码例子\n", 169 | "\n", 170 | "> Supplemental material (code examples, figures, etc.) is available for download at http://github.com/jakevdp/PythonDataScienceHandbook/. This book is here to help you get your job done. In general, if example code is offered with this book, you may use it in your programs and documentation. You do not need to contact us for permission unless you’re reproducing a significant portion of the code. For example, writing a program that uses several chunks of code from this book does not require permission. Selling or distributing a CD-ROM of examples from O’Reilly books does require permission. Answering a question by citing this book and quoting example code does not require permission. Incorporating a significant amount of example code from this book into your product’s documentation does require permission.\n", 171 | "\n", 172 | "本书附带的资源(代码示例,图表等)可以在 http://github.com/wangyingsm/Python-Data-Science-Handbook/ 下载。本书的代码例子是为了帮助你理解内容。在通常意义下,本书附带的代码可以被使用在你的程序和文档中。你不需要联系作者获得授权,除非你在修改或重构代码非常重要的部分。例如,使用本书的代码编写你的程序不需要获得作者授权;销售和分发本书的代码不需要获得作者的授权;引用本书或书中的代码例子回答问题不需要获得作者的授权。将本书大部分的代码例子组织在你产品的文档中确实需要获得作者的授权。\n", 173 | "\n", 174 | "> We appreciate, but do not require, attribution. An attribution usually includes the title, author, publisher, and ISBN. For example:\n", 175 | "\n", 176 | "> > *The Python Data Science Handbook* by Jake VanderPlas (O’Reilly). Copyright 2016 Jake VanderPlas, 978-1-491-91205-8.\n", 177 | "\n", 178 | "虽然不是必须的,但是如果你在引用时声明了标题、作者、出版社和ISBN的话,作者表示感激。\n", 179 | "\n", 180 | "> If you feel your use of code examples falls outside fair use or the per‐ mission given above, feel free to contact us at permissions@oreilly.com.\n", 181 | "\n", 182 | "如果你认为你对于代码例子的使用超出了上述的授权范围,请联系 permissions@oreilly.com。" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "## Installation Considerations\n", 190 | "\n", 191 | "## 安装\n", 192 | "\n", 193 | "> Installing Python and the suite of libraries that enable scientific computing is straightforward . This section will outline some of the considerations when setting up your computer.\n", 194 | "\n", 195 | "安装Python和科学计算库的套件是很直接的。本节简单介绍一下配置你的计算机的方法。\n", 196 | "\n", 197 | "> Though there are various ways to install Python, the one I would suggest for use in data science is the Anaconda distribution, which works similarly whether you use Windows, Linux, or Mac OS X.\n", 198 | "The Anaconda distribution comes in two flavors:\n", 199 | "\n", 200 | "> - [Miniconda](http://conda.pydata.org/miniconda.html) gives you the Python interpreter itself, along with a command-line tool called ``conda`` which operates as a cross-platform package manager geared toward Python packages, similar in spirit to the apt or yum tools that Linux users might be familiar with.\n", 201 | "\n", 202 | "> - [Anaconda](https://www.continuum.io/downloads) includes both Python and conda, and additionally bundles a suite of other pre-installed packages geared toward scientific computing. Because of the size of this bundle, expect the installation to consume several gigabytes of disk space.\n", 203 | "\n", 204 | "虽然有很多种方式安装Python,作者推荐使用Anaconda发行版安装,就像你的操作系统使用Windows、Linux或Mac OS X一样。Anaconda发行版有两种模式:\n", 205 | "\n", 206 | "- [Miniconda](http://conda.pydata.org/miniconda.html) 带有Python解释器,还有一个命令行工具`conda`的包管理器,就像你在Linux操作系统发行版中常用的apt或yum工具一样。\n", 207 | "\n", 208 | "- [Anaconda](https://www.continuum.io/downloads) 安装Python解释器和conda,还会预装科学计算套件,因为这个发行版包括了很多的第三方库,因此可能会占用你磁盘几G的空间。\n", 209 | "\n", 210 | "> Any of the packages included with Anaconda can also be installed manually on top of Miniconda; for this reason I suggest starting with Miniconda.\n", 211 | "\n", 212 | "任何包括在Anaconda发行版中的包都可以在Miniconda的基础上安装;因此,作者建议使用Miniconda。\n", 213 | "\n", 214 | "译者注:如果磁盘空间不紧张,网络带宽也好的情况下,强烈建议使用Anaconda。\n", 215 | "\n", 216 | "> To get started, download and install the Miniconda package–make sure to choose a version with Python 3–and then install the core packages used in this book:\n", 217 | "\n", 218 | "```\n", 219 | "[~]$ conda install numpy pandas scikit-learn matplotlib seaborn jupyter\n", 220 | "```\n", 221 | "\n", 222 | "在正式开始之前,下载和安装Miniconda,确认选择的是Python 3的版本,然后使用命令行安装本书需要用到的核心包:\n", 223 | "\n", 224 | "```shell\n", 225 | "$ conda install numpy pandas scikit-learn matplotlib seaborn jupyter\n", 226 | "```\n", 227 | "\n", 228 | "> Throughout the text, we will also make use of other more specialized tools in Python's scientific ecosystem; installation is usually as easy as typing **``conda install packagename``**.\n", 229 | "For more information on conda, including information about creating and using conda environments (which I would *highly* recommend), refer to [conda's online documentation](http://conda.pydata.org/docs/).\n", 230 | "\n", 231 | "由上我们可知,我们可以使用conda命令安装Python生态中的任何工具,只需要简单的运行**`conda install 包名称`**即可。更多关于conda的信息,包括创建和使用conda环境(作者*强烈*推荐阅读),请参见[conda在线文档](http://conda.pydata.org/docs/)。" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "\n", 239 | "| [目录](Index.ipynb) | [IPython:超越Python解释器](01.00-IPython-Beyond-Normal-Python.ipynb) >\n", 240 | "\n", 241 | "\"Open" 242 | ] 243 | } 244 | ], 245 | "metadata": { 246 | "anaconda-cloud": {}, 247 | "kernelspec": { 248 | "display_name": "Python 3", 249 | "language": "python", 250 | "name": "python3" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.7.6" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 1 267 | } 268 | -------------------------------------------------------------------------------- /notebooks/01.00-IPython-Beyond-Normal-Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [序言](00.00-Preface.ipynb) | [目录](Index.ipynb) | [IPython帮助和文档](01.01-Help-And-Documentation.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# IPython: Beyond Normal Python\n", 18 | "\n", 19 | "# IPython:超越Python解释器" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> There are many options for development environments for Python, and I'm often asked which one I use in my own work.\n", 27 | "My answer sometimes surprises people: my preferred environment is [IPython](http://ipython.org/) plus a text editor (in my case, Emacs or Atom depending on my mood).\n", 28 | "IPython (short for *Interactive Python*) was started in 2001 by Fernando Perez as an enhanced Python interpreter, and has since grown into a project aiming to provide, in Perez's words, \"Tools for the entire life cycle of research computing.\"\n", 29 | "If Python is the engine of our data science task, you might think of IPython as the interactive control panel.\n", 30 | "\n", 31 | "对于Python来说,开发环境有很多种选择,作者经常会被问到他在自己工作中使用哪一个。答案有时会让提问者感到吃惊:作者习惯的环境是[IPython](http://ipython.org/) 在加上一个文本编辑器(取决于作者的心情,可能是Emacs或者Atom)。IPython(是*交互式Python*的缩写)是Fernando Perez在2001年创建的一个增强Python解释器的项目,目前已经发展成为一个超级工具,目标是提供(用Perez自己的话来说)“研究计算领域完整生命周期的工具”。如果类比Python是我们数据科学的引擎的话,那么你可以认为IPython就是一个交互式的控制面板。\n", 32 | "\n", 33 | "> As well as being a useful interactive interface to Python, IPython also provides a number of useful syntactic additions to the language; we'll cover the most useful of these additions here.\n", 34 | "In addition, IPython is closely tied with the [Jupyter project](http://jupyter.org), which provides a browser-based notebook that is useful for development, collaboration, sharing, and even publication of data science results.\n", 35 | "The IPython notebook is actually a special case of the broader Jupyter notebook structure, which encompasses notebooks for Julia, R, and other programming languages.\n", 36 | "As an example of the usefulness of the notebook format, look no further than the page you are reading: the entire manuscript for this book was composed as a set of IPython notebooks.\n", 37 | "\n", 38 | "除了提供Python十分方便的交互式界面外,IPython还提供了一些语言的语法扩展;我们会在这里介绍其中最有用的扩展内容。\n", 39 | "并且,IPython紧密的绑定在[Jupyter项目](http://jupyter.org)之上,Jupyter能够提供一个浏览器界面的笔记本(译者注:后续一律称为notebook,不再翻译该名词),能够非常方便的开发、合作、分享甚至发布数据科学的结果。\n", 40 | "IPython notebook是Jupyter这个庞大项目中的Python部分,后者希望为Julia、R和其他编程语言都能提供notebook功能。\n", 41 | "本书就可以作为notebook格式的一个有力证明:所有本书的内容都是使用IPython notebook编写的。\n", 42 | "\n", 43 | "> IPython is about using Python effectively for interactive scientific and data-intensive computing.\n", 44 | "This chapter will start by stepping through some of the IPython features that are useful to the practice of data science, focusing especially on the syntax it offers beyond the standard features of Python.\n", 45 | "Next, we will go into a bit more depth on some of the more useful \"magic commands\" that can speed-up common tasks in creating and using data science code.\n", 46 | "Finally, we will touch on some of the features of the notebook that make it useful in understanding data and sharing results.\n", 47 | "\n", 48 | "IPython的目标是让科学和数据计算在Python中更加有效和具有交互性。本章将会介绍许多对于数据科学实践来说非常有用的IPython特性,尤其聚焦在它提供在Python标准之外的语法扩展。\n", 49 | "然后,我们将会进一步深入到一些有用的“魔术命令”中,这些命令能够提高你在编写和使用数据科学代码的时候的生产效率。\n", 50 | "最后,我们将接触到在notebook当中有用的数据分析和分享结果的特性。" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Shell or Notebook?\n", 58 | "\n", 59 | "## Shell 还是 Notebook?\n", 60 | "\n", 61 | "> There are two primary means of using IPython that we'll discuss in this chapter: the IPython shell and the IPython notebook.\n", 62 | "The bulk of the material in this chapter is relevant to both, and the examples will switch between them depending on what is most convenient.\n", 63 | "In the few sections that are relevant to just one or the other, we will explicitly state that fact.\n", 64 | "Before we start, some words on how to launch the IPython shell and IPython notebook.\n", 65 | "\n", 66 | "IPython提供了两种主要的使用方法,Shell和Notebook。本章将会都使用到,例子将会根据最方便的方式切换。如果某些小节只会使用到其中一个,作者会明确指出。在我们开始之前,我们先来学习如何启动shell和notebook。" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Launching the IPython Shell\n", 74 | "\n", 75 | "### 启动IPython Shell\n", 76 | "\n", 77 | "> This chapter, like most of this book, is not designed to be absorbed passively.\n", 78 | "I recommend that as you read through it, you follow along and experiment with the tools and syntax we cover: the muscle-memory you build through doing this will be far more useful than the simple act of reading about it.\n", 79 | "Start by launching the IPython interpreter by typing **``ipython``** on the command-line; alternatively, if you've installed a distribution like Anaconda or EPD, there may be a launcher specific to your system (we'll discuss this more fully in [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)).\n", 80 | "\n", 81 | "本章和本书大多数章节一样,不是希望你只是被动学习。作者建议当你阅读的时候,能够跟着内容进行实践,对我们介绍的工具和语法进行试验:在此过程中你将会获得肌肉记忆,这远比简单的阅读牢靠的多。\n", 82 | "启动IPython解释器,你只需要在命令行上输入**`ipython`**即可;同样的,如果你安装的是Anaconda或者EPD这样的发行版,那么你的操作系统上可能会有相应的启动图标(我们会在[IPython帮助和文档](01.01-Help-And-Documentation.ipynb)中更详细的讨论)。\n", 83 | "\n", 84 | "> Once you do this, you should see a prompt like the following:\n", 85 | "\n", 86 | "```\n", 87 | "IPython 4.0.1 -- An enhanced Interactive Python.\n", 88 | "? -> Introduction and overview of IPython's features.\n", 89 | "%quickref -> Quick reference.\n", 90 | "help -> Python's own help system.\n", 91 | "object? -> Details about 'object', use 'object??' for extra details.\n", 92 | "In [1]:\n", 93 | "```\n", 94 | "\n", 95 | "> With that, you're ready to follow along.\n", 96 | "\n", 97 | "当你输入命令后,你应该会看到如下的一个提示符:\n", 98 | "\n", 99 | "```\n", 100 | "IPython 4.0.1 -- An enhanced Interactive Python.\n", 101 | "? -> Introduction and overview of IPython's features.\n", 102 | "%quickref -> Quick reference.\n", 103 | "help -> Python's own help system.\n", 104 | "object? -> Details about 'object', use 'object??' for extra details.\n", 105 | "In [1]:\n", 106 | "```\n", 107 | "\n", 108 | "然后,你就可以接着往下进行了。" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### Launching the Jupyter Notebook\n", 116 | "\n", 117 | "### 启动Jupyter Notebook\n", 118 | "\n", 119 | "> The Jupyter notebook is a browser-based graphical interface to the IPython shell, and builds on it a rich set of dynamic display capabilities.\n", 120 | "As well as executing Python/IPython statements, the notebook allows the user to include formatted text, static and dynamic visualizations, mathematical equations, JavaScript widgets, and much more. Furthermore, these documents can be saved in a way that lets other people open them and execute the code on their own systems.\n", 121 | "\n", 122 | "Jupyter notebook是一个浏览器图形界面的IPython shell,上面构建了一整套丰富的动态展示功能。\n", 123 | "除了能够执行Python/IPython代码,notebook还允许用户书写格式化的文本,静态和动态的展示数学公式,JavaScript组件和其他很多功能。然后这些文档能被保存成一种能让其他人在他们自己的系统中打开和执行的文件格式。\n", 124 | "\n", 125 | "> Though the IPython notebook is viewed and edited through your web browser window, it must connect to a running Python process in order to execute code.\n", 126 | "This process (known as a \"kernel\") can be started by running the following command in your system shell:\n", 127 | "\n", 128 | "```\n", 129 | "$ jupyter notebook\n", 130 | "```\n", 131 | "\n", 132 | "虽然IPython notebook在你的浏览器窗口中展示和编辑,但是它需要连接到一个执行中的Python进程才能真正执行代码。这个进程(被称为“kernel”)可以在命令行中使用下面的命令启动:\n", 133 | "\n", 134 | "```bash\n", 135 | "$ jupyter notebook\n", 136 | "```\n", 137 | "\n", 138 | "> This command will launch a local web server that will be visible to your browser.\n", 139 | "It immediately spits out a log showing what it is doing; that log will look something like this:\n", 140 | "\n", 141 | "```\n", 142 | "$ jupyter notebook\n", 143 | "[NotebookApp] Serving notebooks from local directory: /Users/jakevdp/PythonDataScienceHandbook\n", 144 | "[NotebookApp] 0 active kernels \n", 145 | "[NotebookApp] The IPython Notebook is running at: http://localhost:8888/\n", 146 | "[NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).\n", 147 | "```\n", 148 | "\n", 149 | "这个命令会启动一个web服务器让你的浏览器访问。它会立刻在你的控制台打印出日志,日志的格式类似下面:\n", 150 | "\n", 151 | "```\n", 152 | "$ jupyter notebook\n", 153 | "[NotebookApp] Serving notebooks from local directory: /Users/jakevdp/PythonDataScienceHandbook\n", 154 | "[NotebookApp] 0 active kernels \n", 155 | "[NotebookApp] The IPython Notebook is running at: http://localhost:8888/\n", 156 | "[NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).\n", 157 | "```\n", 158 | "\n", 159 | "> Upon issuing the command, your default browser should automatically open and navigate to the listed local URL;\n", 160 | "the exact address will depend on your system.\n", 161 | "If the browser does not open automatically, you can open a window and manually open this address (*http://localhost:8888/* in this example).\n", 162 | "\n", 163 | "输入上述命令之后,你系统的默认浏览器应该会自动打开然后指向本地的地址;完整的地址取决于你的系统。如果你的浏览器没有自动打开,你可以手动打开它并输入地址(本例中是*http://localhost:8888/* )。" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "\n", 171 | "< [序言](00.00-Preface.ipynb) | [目录](Index.ipynb) | [IPython帮助和文档](01.01-Help-And-Documentation.ipynb) >\n", 172 | "\n", 173 | "\"Open\n" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "anaconda-cloud": {}, 179 | "kernelspec": { 180 | "display_name": "Python 3", 181 | "language": "python", 182 | "name": "python3" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.7.6" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 1 199 | } 200 | -------------------------------------------------------------------------------- /notebooks/01.02-Shell-Keyboard-Shortcuts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [IPython帮助和文档](01.01-Help-And-Documentation.ipynb) | [目录](Index.ipynb) | [IPython魔术命令](01.03-Magic-Commands.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Keyboard Shortcuts in the IPython Shell\n", 18 | "\n", 19 | "# IPython Shell中的键盘快捷键" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> If you spend any amount of time on the computer, you've probably found a use for keyboard shortcuts in your workflow.\n", 27 | "Most familiar perhaps are the Cmd-C and Cmd-V (or Ctrl-C and Ctrl-V) for copying and pasting in a wide variety of programs and systems.\n", 28 | "Power-users tend to go even further: popular text editors like Emacs, Vim, and others provide users an incredible range of operations through intricate combinations of keystrokes.\n", 29 | "\n", 30 | "如果你已经使用计算机一段时间了,你会发现键盘快捷键在你的工作中经常会被用到。这里面最常用的莫过于Cmd-C和Cmd-V(或者Ctrl-C和Ctrl-V),用来复制和粘贴。熟练的用户可能走得更远:流行的文本编辑器如Emacs、Vim等会给用户提供很多的组合快捷键。\n", 31 | "\n", 32 | "> The IPython shell doesn't go this far, but does provide a number of keyboard shortcuts for fast navigation while typing commands.\n", 33 | "These shortcuts are not in fact provided by IPython itself, but through its dependency on the GNU Readline library: as such, some of the following shortcuts may differ depending on your system configuration.\n", 34 | "Also, while some of these shortcuts do work in the browser-based notebook, this section is primarily about shortcuts in the IPython shell.\n", 35 | "\n", 36 | "IPython shell没有像上述的文本编辑器那么复杂,但是也提供了不少的快捷键能让用户在输入命令的时候提高工作效率。这些快捷键实际上并不是IPython本身提供的,是基于它所依赖的GNU Readline库提供的:因此,下面介绍的某些快捷键可能会根据你的系统设置不同而发生改变。虽然本小节介绍的一些快捷键也在浏览器中的notebook应用,但是目前我们聚焦在IPython shell上。\n", 37 | "\n", 38 | "> Once you get accustomed to these, they can be very useful for quickly performing certain commands without moving your hands from the \"home\" keyboard position.\n", 39 | "If you're an Emacs user or if you have experience with Linux-style shells, the following will be very familiar.\n", 40 | "We'll group these shortcuts into a few categories: *navigation shortcuts*, *text entry shortcuts*, *command history shortcuts*, and *miscellaneous shortcuts*.\n", 41 | "\n", 42 | "一旦你习惯了这些快捷键,你会发现它们能大大提高你在shell中输入命令的效率,甚至在你的手指不需要离开键盘主位置的情况下。如果你是一个Emacs编辑器的用户,或者是一个Linux shell的用户,以下内容对你来说不会陌生。我们将这些快捷键分为几组:*导航快捷键*, *文字输入快捷键*, *命令历史实现快捷键*以及*杂项快捷键*。" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## Navigation shortcuts\n", 50 | "\n", 51 | "## 导航快捷键\n", 52 | "\n", 53 | "> While the use of the left and right arrow keys to move backward and forward in the line is quite obvious, there are other options that don't require moving your hands from the \"home\" keyboard position:\n", 54 | "\n", 55 | "> | Keystroke | Action |\n", 56 | "|-----------------------------------|--------------------------------------------|\n", 57 | "| ``Ctrl-a`` | Move cursor to the beginning of the line |\n", 58 | "| ``Ctrl-e`` | Move cursor to the end of the line |\n", 59 | "| ``Ctrl-b`` or the left arrow key | Move cursor back one character |\n", 60 | "| ``Ctrl-f`` or the right arrow key | Move cursor forward one character |\n", 61 | "\n", 62 | "显然使用左右箭头键来在一行命令中前后移动是很明显的,但是也有其他的选择让你无需将手移动到主键盘位置之外:\n", 63 | "\n", 64 | "| 按键 | 动作 |\n", 65 | "|-----------------------------------|--------------------------------------------|\n", 66 | "| ``Ctrl-a`` | 将光标移动到本行开始位置 |\n", 67 | "| ``Ctrl-e`` | 将光标移动到本行结束位置 |\n", 68 | "| ``Ctrl-b`` 或者 左箭头 | 将光标向左移动一个字符 |\n", 69 | "| ``Ctrl-f`` 或者 右箭头 | 将光标向右移动一个字符 |\n", 70 | "\n", 71 | "译者注:如果你熟悉BASH,这四个快捷键一定不陌生。" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Text Entry Shortcuts\n", 79 | "\n", 80 | "## 文字输入快捷键\n", 81 | "\n", 82 | "> While everyone is familiar with using the Backspace key to delete the previous character, reaching for the key often requires some minor finger gymnastics, and it only deletes a single character at a time.\n", 83 | "In IPython there are several shortcuts for removing some portion of the text you're typing.\n", 84 | "The most immediately useful of these are the commands to delete entire lines of text.\n", 85 | "You'll know these have become second-nature if you find yourself using a combination of Ctrl-b and Ctrl-d instead of reaching for Backspace to delete the previous character!\n", 86 | "\n", 87 | "> | Keystroke | Action |\n", 88 | "|-------------------------------|--------------------------------------------------|\n", 89 | "| Backspace key | Delete previous character in line |\n", 90 | "| ``Ctrl-d`` | Delete next character in line |\n", 91 | "| ``Ctrl-k`` | Cut text from cursor to end of line |\n", 92 | "| ``Ctrl-u`` | Cut text from beginning of line to cursor |\n", 93 | "| ``Ctrl-y`` | Yank (i.e. paste) text that was previously cut |\n", 94 | "| ``Ctrl-t`` | Transpose (i.e., switch) previous two characters |\n", 95 | "\n", 96 | "我们都知道使用回退键可以删除前一个字符,去按下这个键有时也需要将手移出主键盘位置,而且这个键每次只能删除一个字符。在IPython中,有一些快捷键可以删除部分你正在输入的文字。这其中最有用的可能就是删除整行文字。当你熟练之后,你也可能本能的使用Ctrl-b和Ctrl-d来代替回退键。\n", 97 | "\n", 98 | "| 按键 | 动作 |\n", 99 | "|-------------------------------|--------------------------------------------------|\n", 100 | "| 回退键 | 删除光标前一个字符 |\n", 101 | "| ``Ctrl-d`` 或者 删除键 | 删除光标所在字符 |\n", 102 | "| ``Ctrl-k`` | 剪切光标所在位置直至末尾的字符 |\n", 103 | "| ``Ctrl-u`` | 剪切开头直至光标所在前一个位置的字符 |\n", 104 | "| ``Ctrl-y`` | 粘贴字符到光标所在位置 |\n", 105 | "| ``Ctrl-t`` | 交换光标前一位置和光标所在位置的字符 |" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## Command History Shortcuts\n", 113 | "\n", 114 | "## 命令历史快捷键\n", 115 | "\n", 116 | "> Perhaps the most impactful shortcuts discussed here are the ones IPython provides for navigating the command history.\n", 117 | "This command history goes beyond your current IPython session: your entire command history is stored in a SQLite database in your IPython profile directory.\n", 118 | "The most straightforward way to access these is with the up and down arrow keys to step through the history, but other options exist as well:\n", 119 | "\n", 120 | "> | Keystroke | Action |\n", 121 | "|-------------------------------------|--------------------------------------------|\n", 122 | "| ``Ctrl-p`` (or the up arrow key) | Access previous command in history |\n", 123 | "| ``Ctrl-n`` (or the down arrow key) | Access next command in history |\n", 124 | "| ``Ctrl-r`` | Reverse-search through command history |\n", 125 | "\n", 126 | "本小节讨论的快捷键中,可能提供在命令历史中导航的快捷键最令人震撼。命令的历史不仅仅是当前的IPython会话有效,所有的命令历史都会被记录到一个SQLite的数据库中,保存在你的IPython配置目录下。最直接使用命令历史的方法就是向上的箭头和向下的箭头,下表列示了命令历史的快捷键:\n", 127 | "\n", 128 | "| 按键 | 动作 |\n", 129 | "|-------------------------------------|--------------------------------------------|\n", 130 | "| ``Ctrl-p`` 或者 上箭头 | 获取上一条命令历史 |\n", 131 | "| ``Ctrl-n`` 或者 下箭头 | 获取下一条命令历史 |\n", 132 | "| ``Ctrl-r`` | 反向搜索命令历史 |" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "> The reverse-search can be particularly useful.\n", 140 | "Recall that in the previous section we defined a function called ``square``.\n", 141 | "Let's reverse-search our Python history from a new IPython shell and find this definition again.\n", 142 | "When you press Ctrl-r in the IPython terminal, you'll see the following prompt:\n", 143 | "\n", 144 | "反向搜索有时会非常有用。回忆一下上一节中我们定义了一个函数名叫`square`。让我们在IPython shell中使用命令历史回查这个函数的定义。当我们在IPython终端中按下Ctrl-r时,你会看到如下的提示符:\n", 145 | "\n", 146 | "```python\n", 147 | "In [1]:\n", 148 | "(reverse-i-search)`': \n", 149 | "```\n", 150 | "\n", 151 | "> If you start typing characters at this prompt, IPython will auto-fill the most recent command, if any, that matches those characters:\n", 152 | "\n", 153 | "如果你在这个提示符下输入,IPython会根据你输入的部分内容自动补充最近使用的命令:\n", 154 | "\n", 155 | "```python\n", 156 | "In [1]: \n", 157 | "(reverse-i-search)`sqa': square??\n", 158 | "```\n", 159 | "\n", 160 | "> At any point, you can add more characters to refine the search, or press Ctrl-r again to search further for another command that matches the query. If you followed along in the previous section, pressing Ctrl-r twice more gives:\n", 161 | "\n", 162 | "在这种情况下,你还可以输入更多的字符来精准搜索,或者继续按键Ctrl-r来查找下一个(更早)能匹配的命令。如果你输入了上例中的`sqa`,再按下一次Ctrl-r会得到:\n", 163 | "\n", 164 | "```python\n", 165 | "In [1]: \n", 166 | "(reverse-i-search)`sqa': def square(a):\n", 167 | " \"\"\"Return the square of a\"\"\"\n", 168 | " return a ** 2\n", 169 | "```\n", 170 | "\n", 171 | "> Once you have found the command you're looking for, press Return and the search will end.\n", 172 | "We can then use the retrieved command, and carry-on with our session:\n", 173 | "\n", 174 | "一旦你找到了你需要的命令,敲击回车将结束反向搜索。然后你就能使用找到的命令继续了:\n", 175 | "\n", 176 | "\n", 177 | "```python\n", 178 | "In [1]: def square(a):\n", 179 | " \"\"\"Return the square of a\"\"\"\n", 180 | " return a ** 2\n", 181 | "\n", 182 | "In [2]: square(2)\n", 183 | "Out[2]: 4\n", 184 | "```\n", 185 | "\n", 186 | "> Note that Ctrl-p/Ctrl-n or the up/down arrow keys can also be used to search through history, but only by matching characters at the beginning of the line.\n", 187 | "That is, if you type **``def``** and then press Ctrl-p, it would find the most recent command (if any) in your history that begins with the characters ``def``.\n", 188 | "\n", 189 | "注意Ctrl-p/Ctrl-n或者上箭头/下箭头键也可以用来进行反向搜索,但是仅能匹配命令开头的那些字符。意思是,如果你输入**`def`**然后键入Ctrl-p或者向上箭头,IPython会试图寻找最新的一条命令历史,并且以`def`开头。" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Miscellaneous Shortcuts\n", 197 | "\n", 198 | "## 杂项快捷键\n", 199 | "\n", 200 | "> Finally, there are a few miscellaneous shortcuts that don't fit into any of the preceding categories, but are nevertheless useful to know:\n", 201 | "\n", 202 | "> | Keystroke | Action |\n", 203 | "|-------------------------------|--------------------------------------------|\n", 204 | "| ``Ctrl-l`` | Clear terminal screen |\n", 205 | "| ``Ctrl-c`` | Interrupt current Python command |\n", 206 | "| ``Ctrl-d`` | Exit IPython session |\n", 207 | "\n", 208 | "最后,还有一些杂项的快捷键不属于上述的组别中,但是也挺有用:\n", 209 | "\n", 210 | "| 按键 | 动作 |\n", 211 | "|-------------------------------|--------------------------------------------|\n", 212 | "| ``Ctrl-l`` | 清除终端窗口内容 |\n", 213 | "| ``Ctrl-c`` | 终止当前的Python语句执行 |\n", 214 | "| ``Ctrl-d`` | 退出IPython会话 |\n", 215 | "\n", 216 | "> The Ctrl-c in particular can be useful when you inadvertently start a very long-running job.\n", 217 | "\n", 218 | "Ctrl-c当你在不小心运行了一个非常花时间(或者无限循环)的任务时会很有用。" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "> While some of the shortcuts discussed here may seem a bit tedious at first, they quickly become automatic with practice.\n", 226 | "Once you develop that muscle memory, I suspect you will even find yourself wishing they were available in other contexts.\n", 227 | "\n", 228 | "虽然本节列出的快捷键看起来很冗余,但是很快你会发现它们在实践中的作用。一旦你形成了肌肉记忆,你甚至会希望在其他环境中也能使用它们。" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "\n", 236 | "< [IPython帮助和文档](01.01-Help-And-Documentation.ipynb) | [目录](Index.ipynb) | [IPython魔术命令](01.03-Magic-Commands.ipynb) >\n", 237 | "\n", 238 | "\"Open" 239 | ] 240 | } 241 | ], 242 | "metadata": { 243 | "anaconda-cloud": {}, 244 | "kernelspec": { 245 | "display_name": "Python 3", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.7.6" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 1 264 | } 265 | -------------------------------------------------------------------------------- /notebooks/01.03-Magic-Commands.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [IPython Shell中的键盘快捷键](01.02-Shell-Keyboard-Shortcuts.ipynb) | [目录](Index.ipynb) | [输入输出历史](01.04-Input-Output-History.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# IPython Magic Commands\n", 18 | "\n", 19 | "# IPython魔术命令" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> The previous two sections showed how IPython lets you use and explore Python efficiently and interactively.\n", 27 | "Here we'll begin discussing some of the enhancements that IPython adds on top of the normal Python syntax.\n", 28 | "These are known in IPython as *magic commands*, and are prefixed by the ``%`` character.\n", 29 | "These magic commands are designed to succinctly solve various common problems in standard data analysis.\n", 30 | "Magic commands come in two flavors: *line magics*, which are denoted by a single ``%`` prefix and operate on a single line of input, and *cell magics*, which are denoted by a double ``%%`` prefix and operate on multiple lines of input.\n", 31 | "We'll demonstrate and discuss a few brief examples here, and come back to more focused discussion of several useful magic commands later in the chapter.\n", 32 | "\n", 33 | "前两小节展示了怎样使用IPython,令你在其中执行Python代码更加有效和具有交互性。现在我们要开始讨论一些IPython增强的语言特性。这些特性被称为IPython的*魔术命令*,它们都是以`%`字符开头的。这些魔术命令被设计用来简洁地实现很多通用的标准数据科学问题。魔术命令分成两种模式:*行魔术*,以一个`%`开头,是对于一行的输入进行魔术处理的;另一种是*单元格魔术*,以两个`%%`开头,是对于多行的输入进行魔术处理的。本节我们会展示和讨论一些例子,然后本章后续小节会对部分有用的魔术命令进行详细的讨论。" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Pasting Code Blocks: ``%paste`` and ``%cpaste``\n", 41 | "\n", 42 | "## 粘贴代码块:`%paste` 和 `%cpaste`\n", 43 | "\n", 44 | "> When working in the IPython interpreter, one common gotcha is that pasting multi-line code blocks can lead to unexpected errors, especially when indentation and interpreter markers are involved.\n", 45 | "A common case is that you find some example code on a website and want to paste it into your interpreter.\n", 46 | "Consider the following simple function:\n", 47 | "\n", 48 | "当使用IPython解释器时,我们会遇到一个坑,就是粘贴多行代码块是会出现很多意料之外的错误,尤其是当存在缩进和提示符的情况下。其中一个常见的情况就是当你在网上找到一些示例代码,然后想将它们粘贴到你的解释器中。例如下面这个简单的函数:\n", 49 | "\n", 50 | "``` python\n", 51 | ">>> def donothing(x):\n", 52 | "... return x\n", 53 | "\n", 54 | "```\n", 55 | "\n", 56 | "> The code is formatted as it would appear in the Python interpreter, and if you copy and paste this directly into IPython you get an error:\n", 57 | "\n", 58 | "这段代码在Python解释器中就会像上面那样展示,但是如果你采用通常的复制粘贴大法将它们粘贴到IPython的时候,错误就发生了:\n", 59 | "\n", 60 | "```python\n", 61 | "In [2]: >>> def donothing(x):\n", 62 | " ...: ... return x\n", 63 | " ...: \n", 64 | " File \"\", line 2\n", 65 | " ... return x\n", 66 | " ^\n", 67 | "SyntaxError: invalid syntax\n", 68 | "```\n", 69 | "\n", 70 | "> In the direct paste, the interpreter is confused by the additional prompt characters.\n", 71 | "But never fear–IPython's ``%paste`` magic function is designed to handle this exact type of multi-line, marked-up input:\n", 72 | "\n", 73 | "在直接粘贴的情况下,解释器被额外的提示符号搞蒙了。不怕,IPyton的`%paste`魔术命令是专门为了处理这种情况(多行代码块,带提示符号)设计的:\n", 74 | "\n", 75 | "```python\n", 76 | "In [3]: %paste\n", 77 | ">>> def donothing(x):\n", 78 | "... return x\n", 79 | "\n", 80 | "## -- End pasted text --\n", 81 | "```\n", 82 | "\n", 83 | "> The ``%paste`` command both enters and executes the code, so now the function is ready to be used:\n", 84 | "\n", 85 | "`%paste`命令既输入了多行代码又执行了它们,因此`donothing`函数已经可以使用了:\n", 86 | "\n", 87 | "```python\n", 88 | "In [4]: donothing(10)\n", 89 | "Out[4]: 10\n", 90 | "```\n", 91 | "\n", 92 | "> A command with a similar intent is ``%cpaste``, which opens up an interactive multiline prompt in which you can paste one or more chunks of code to be executed in a batch:\n", 93 | "\n", 94 | "还有一个魔术命令`%cpaste`也是类似的作用,它会打开一个交互的多行提示符,允许你粘贴多个代码块然后批量执行它们:\n", 95 | "\n", 96 | "```python\n", 97 | "In [5]: %cpaste\n", 98 | "Pasting code; enter '--' alone on the line to stop or use Ctrl-D.\n", 99 | ":>>> def donothing(x):\n", 100 | ":... return x\n", 101 | ":--\n", 102 | "```\n", 103 | "\n", 104 | "> These magic commands, like others we'll see, make available functionality that would be difficult or impossible in a standard Python interpreter.\n", 105 | "\n", 106 | "这些魔术命令,还有我们马上会看到的其他命令,提供了标准Python解释器很难或无法提供的功能。" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## Running External Code: ``%run``\n", 114 | "\n", 115 | "## 执行外部代码:`%run`\n", 116 | "\n", 117 | "> As you begin developing more extensive code, you will likely find yourself working in both IPython for interactive exploration, as well as a text editor to store code that you want to reuse.\n", 118 | "Rather than running this code in a new window, it can be convenient to run it within your IPython session.\n", 119 | "This can be done with the ``%run`` magic.\n", 120 | "\n", 121 | "当你使用Python开发更多代码之后,你会发现你可能需要两个环境,在IPython中交互式的进行探索和快速验证,使用文本编辑器保存那些以后你需要重用的代码。当你需要在IPython中运行你已经保存好的Python代码文件时,你不需要打开一个新的进程执行它们,也不需要将它们的代码粘贴进来,你可以使用`%run`魔术。\n", 122 | "\n", 123 | "> For example, imagine you've created a ``myscript.py`` file with the following contents:\n", 124 | "\n", 125 | "例如,你创建了一个`myscript.py`文件,里面的内容是:\n", 126 | "\n", 127 | "\n", 128 | "```python\n", 129 | "#-------------------------------------\n", 130 | "# file: myscript.py\n", 131 | "\n", 132 | "def square(x):\n", 133 | " \"\"\"square a number\"\"\"\n", 134 | " return x ** 2\n", 135 | "\n", 136 | "for N in range(1, 4):\n", 137 | " print(N, \"squared is\", square(N))\n", 138 | "```\n", 139 | "\n", 140 | "> You can execute this from your IPython session as follows:\n", 141 | "\n", 142 | "你可以在你的IPython shell中这样执行这个Python代码文件:\n", 143 | "\n", 144 | "```python\n", 145 | "In [6]: %run myscript.py\n", 146 | "1 squared is 1\n", 147 | "2 squared is 4\n", 148 | "3 squared is 9\n", 149 | "```\n", 150 | "\n", 151 | "> Note also that after you've run this script, any functions defined within it are available for use in your IPython session:\n", 152 | "\n", 153 | "你应该注意到了,当你执行完这个脚本文件之后,任何定义了的函数也可以在你当前的IPython会话中使用了。\n", 154 | "\n", 155 | "```python\n", 156 | "In [7]: square(5)\n", 157 | "Out[7]: 25\n", 158 | "```\n", 159 | "\n", 160 | "> There are several options to fine-tune how your code is run; you can see the documentation in the normal way, by typing **``%run?``** in the IPython interpreter.\n", 161 | "\n", 162 | "还有一些参数可以精细控制你的代码文件如何执行;你可以像之前介绍的那样查看它的文档,只需要在IPython shell中输入`%run?`即可。" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Timing Code Execution: ``%timeit``\n", 170 | "\n", 171 | "## 代码执行计时:`%timeit`\n", 172 | "\n", 173 | "> Another example of a useful magic function is ``%timeit``, which will automatically determine the execution time of the single-line Python statement that follows it.\n", 174 | "For example, we may want to check the performance of a list comprehension:\n", 175 | "\n", 176 | "下面介绍的魔术命令是`%timeit`,它会自动测试统计紧跟之后的单行Python语句的执行性能(时间)。例如我们需要测试列表解析的性能:\n", 177 | "\n", 178 | "```python\n", 179 | "In [8]: %timeit L = [n ** 2 for n in range(1000)]\n", 180 | "1000 loops, best of 3: 325 µs per loop\n", 181 | "```\n", 182 | "\n", 183 | "> The benefit of ``%timeit`` is that for short commands it will automatically perform multiple runs in order to attain more robust results.\n", 184 | "For multi line statements, adding a second ``%`` sign will turn this into a cell magic that can handle multiple lines of input.\n", 185 | "For example, here's the equivalent construction with a ``for``-loop:\n", 186 | "\n", 187 | "使用`%timeit`的时候,它会自动执行多次,以获取更有效的结果。对于多行的代码来说,增加一个`%`号,会将本魔术命令变成单元格模式,因此它能测试多行输入的性能。例如,下面是一段相同功能的列表初始化,使用的`for`循环:\n", 188 | "\n", 189 | "```python\n", 190 | "In [9]: %%timeit\n", 191 | " ...: L = []\n", 192 | " ...: for n in range(1000):\n", 193 | " ...: L.append(n ** 2)\n", 194 | " ...: \n", 195 | "1000 loops, best of 3: 373 µs per loop\n", 196 | "```\n", 197 | "\n", 198 | "> We can immediately see that list comprehensions are about 10% faster than the equivalent ``for``-loop construction in this case.\n", 199 | "We'll explore ``%timeit`` and other approaches to timing and profiling code in [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb).\n", 200 | "\n", 201 | "从上面的结果可以看出来,使用列表解析能比使用`for`循环的方式提升10%的运行速度。我们将在[性能测算和计时](01.07-Timing-and-Profiling.ipynb)中更加详细的讨论它。" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## Help on Magic Functions: ``?``, ``%magic``, and ``%lsmagic``\n", 209 | "\n", 210 | "## 魔术命令帮助:`?`、`%magic` 和 `%lsmagic`\n", 211 | "\n", 212 | "> Like normal Python functions, IPython magic functions have docstrings, and this useful\n", 213 | "documentation can be accessed in the standard manner.\n", 214 | "So, for example, to read the documentation of the ``%timeit`` magic simply type this:\n", 215 | "\n", 216 | "就像普通的Python对象,IPython魔术命令也有docstring,这些文档可以按照我们之前的方式简单的获取到。举个例子,想要查阅`%timeit`的文档,仅需输入:\n", 217 | "\n", 218 | "```python\n", 219 | "In [10]: %timeit?\n", 220 | "```\n", 221 | "\n", 222 | "> Documentation for other functions can be accessed similarly.\n", 223 | "To access a general description of available magic functions, including some examples, you can type this:\n", 224 | "\n", 225 | "其他魔术命令和文档也可以类似获得。要获得魔术命令的通用描述以及它们的例子,你可以输入:\n", 226 | "\n", 227 | "```python\n", 228 | "In [11]: %magic\n", 229 | "```\n", 230 | "\n", 231 | "> For a quick and simple list of all available magic functions, type this:\n", 232 | "\n", 233 | "如果想要快速简单地列出所有可用的魔术命令,输入:\n", 234 | "\n", 235 | "```python\n", 236 | "In [12]: %lsmagic\n", 237 | "```\n", 238 | "\n", 239 | "> Finally, I'll mention that it is quite straightforward to define your own magic functions if you wish.\n", 240 | "We won't discuss it here, but if you are interested, see the references listed in [More IPython Resources](01.08-More-IPython-Resources.ipynb).\n", 241 | "\n", 242 | "最后,你可以了解自定义魔术命令的有关知识。但是本书不会讨论这个方面,如果读者感兴趣,请参见[更多IPython资源](01.08-More-IPython-Resources.ipynb)。" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "\n", 250 | "< [IPython Shell中的键盘快捷键](01.02-Shell-Keyboard-Shortcuts.ipynb) | [目录](Index.ipynb) | [输入输出历史](01.04-Input-Output-History.ipynb) >\n", 251 | "\n", 252 | "\"Open\n" 253 | ] 254 | } 255 | ], 256 | "metadata": { 257 | "anaconda-cloud": {}, 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | }, 263 | "language_info": { 264 | "codemirror_mode": { 265 | "name": "ipython", 266 | "version": 3 267 | }, 268 | "file_extension": ".py", 269 | "mimetype": "text/x-python", 270 | "name": "python", 271 | "nbconvert_exporter": "python", 272 | "pygments_lexer": "ipython3", 273 | "version": "3.7.6" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 1 278 | } 279 | -------------------------------------------------------------------------------- /notebooks/01.04-Input-Output-History.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [IPython魔术命令](01.03-Magic-Commands.ipynb) | [目录](Index.ipynb) | [IPython和Shell命令](01.05-IPython-And-Shell-Commands.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Input and Output History\n", 18 | "\n", 19 | "# 输入输出历史" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> Previously we saw that the IPython shell allows you to access previous commands with the up and down arrow keys, or equivalently the Ctrl-p/Ctrl-n shortcuts.\n", 27 | "Additionally, in both the shell and the notebook, IPython exposes several ways to obtain the output of previous commands, as well as string versions of the commands themselves.\n", 28 | "We'll explore those here.\n", 29 | "\n", 30 | "前面我们看到IPython shell能够让你获取到命令的历史,使用向上箭头或者向下箭头,或者等同的Ctrl-p/Ctrl-n快捷键。除此之外,在IPython shell和notebook中,还提供了一些方法可以获得前面命令的输出结果,或者字符串形式的命令本身。本节将讨论它们。" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## IPython's ``In`` and ``Out`` Objects\n", 38 | "\n", 39 | "## IPython的 `In` 和 `Out` 对象\n", 40 | "\n", 41 | "> By now I imagine you're quite familiar with the ``In [1]:``/``Out[1]:`` style prompts used by IPython.\n", 42 | "But it turns out that these are not just pretty decoration: they give a clue as to how you can access previous inputs and outputs in your current session.\n", 43 | "Imagine you start a session that looks like this:\n", 44 | "\n", 45 | "阅读到这里,作者认为你已经相当熟悉IPython的`In [1]:`/`Out[1]:`风格的提示符了。但是其实这些提示符并不是为了美观而采用的装饰符号:它们会给出你提示,让你可以获取之前的输入和输出。例如你启动了一个IPython会话:\n", 46 | "\n", 47 | "```python\n", 48 | "In [1]: import math\n", 49 | "\n", 50 | "In [2]: math.sin(2)\n", 51 | "Out[2]: 0.9092974268256817\n", 52 | "\n", 53 | "In [3]: math.cos(2)\n", 54 | "Out[3]: -0.4161468365471424\n", 55 | "```" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "> We've imported the built-in ``math`` package, then computed the sine and the cosine of the number 2.\n", 63 | "These inputs and outputs are displayed in the shell with ``In``/``Out`` labels, but there's more–IPython actually creates some Python variables called ``In`` and ``Out`` that are automatically updated to reflect this history:\n", 64 | "\n", 65 | "我们载入了內建的`math`包,然后计算了2的正弦和余弦值。这些输入和输出在IPython shell当中使用`In`/`Out`标签打印在屏幕上,但实际上这些标签的作用不限于此,IPython创建了两个Python的变量名叫`In`和`Out`,在每次输入输出的情况下都会自动更新和相应:\n", 66 | "\n", 67 | "```python\n", 68 | "In [4]: print(In)\n", 69 | "['', 'import math', 'math.sin(2)', 'math.cos(2)', 'print(In)']\n", 70 | "\n", 71 | "In [5]: Out\n", 72 | "Out[5]: {2: 0.9092974268256817, 3: -0.4161468365471424}\n", 73 | "```" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "> The ``In`` object is a list, which keeps track of the commands in order (the first item in the list is a place-holder so that ``In[1]`` can refer to the first command):\n", 81 | "\n", 82 | "`In`对象是一个列表,保存着本次IPython会话的所有输入命令(列表中的第一个元素是一个占位符,因此第一条命令是`In[1]`):\n", 83 | "\n", 84 | "\n", 85 | "```python\n", 86 | "In [6]: print(In[1])\n", 87 | "import math\n", 88 | "```\n", 89 | "\n", 90 | "> The ``Out`` object is not a list but a dictionary mapping input numbers to their outputs (if any):\n", 91 | "\n", 92 | "`Out`对象是一个字典值,将输入的编号对应到它们相应的输出上面:\n", 93 | "\n", 94 | "```python\n", 95 | "In [7]: print(Out[2])\n", 96 | "0.9092974268256817\n", 97 | "```\n", 98 | "\n", 99 | "> Note that not all operations have outputs: for example, ``import`` statements and ``print`` statements don't affect the output.\n", 100 | "The latter may be surprising, but makes sense if you consider that ``print`` is a function that returns ``None``; for brevity, any command that returns ``None`` is not added to ``Out``.\n", 101 | "\n", 102 | "注意并不是所有的操作都有输出:例如,`import`和`print`语句就不会影响输出内容。然后再深入思考一下,你会发现,`print`是一个返回值为`None`的函数;简而言之,任何指令返回None都不会加入到`Out`当中。\n", 103 | "\n", 104 | "> Where this can be useful is if you want to interact with past results.\n", 105 | "For example, let's check the sum of ``sin(2) ** 2`` and ``cos(2) ** 2`` using the previously-computed results:\n", 106 | "\n", 107 | "当你需要用到历史结果时,上面的变量就非常有用。例如,我们检查一下``sin(2) ** 2``加上``cos(2) ** 2``的和,可以使用前面的结果:\n", 108 | "\n", 109 | "```python\n", 110 | "In [8]: Out[2] ** 2 + Out[3] ** 2\n", 111 | "Out[8]: 1.0\n", 112 | "```\n", 113 | "\n", 114 | "> The result is ``1.0`` as we'd expect from the well-known trigonometric identity.\n", 115 | "In this case, using these previous results probably is not necessary, but it can become very handy if you execute a very expensive computation and want to reuse the result!\n", 116 | "\n", 117 | "结果是`1.0`,和我们了解的三角函数运算得到的一样。在这个例子中,使用历史结果并不是特别需要,但是当你前面进行了非常耗时的运算的时候,重用这个结果是非常方便的。" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Underscore Shortcuts and Previous Outputs\n", 125 | "\n", 126 | "## 下划线变量和之前的输出\n", 127 | "\n", 128 | "> The standard Python shell contains just one simple shortcut for accessing previous output; the variable ``_`` (i.e., a single underscore) is kept updated with the previous output; this works in IPython as well:\n", 129 | "\n", 130 | "标准的Python shell包含着一个简单的快捷变量用来获取前一个输出结果;变量`_`(一个下划线),这个变量会更新为每次前一条语句的输出结果。IPython中也是可以使用的:\n", 131 | "\n", 132 | "```python\n", 133 | "In [9]: print(_)\n", 134 | "1.0\n", 135 | "```\n", 136 | "\n", 137 | "> But IPython takes this a bit further—you can use a double underscore to access the second-to-last output, and a triple underscore to access the third-to-last output (skipping any commands with no output):\n", 138 | "\n", 139 | "IPython扩展了这个功能,你可以使用双下划线获取倒数第二个输出结果,使用三下划线获取倒数第三个输出结果(当然会跳过无输出的命令):\n", 140 | "\n", 141 | "```python\n", 142 | "In [10]: print(__)\n", 143 | "-0.4161468365471424\n", 144 | "\n", 145 | "In [11]: print(___)\n", 146 | "0.9092974268256817\n", 147 | "```\n", 148 | "\n", 149 | "> IPython stops there: more than three underscores starts to get a bit hard to count, and at that point it's easier to refer to the output by line number.\n", 150 | "\n", 151 | "> There is one more shortcut we should mention, however–a shorthand for ``Out[X]`` is ``_X`` (i.e., a single underscore followed by the line number):\n", 152 | "\n", 153 | "三个就打住了,IPython也不支持更多的下划线了,因为多于三个的下划线就变得比较难以数清楚了,在这种情况下,使用输入序号会更加方便一些。\n", 154 | "\n", 155 | "这里还有一个快捷方式需要介绍,`Out[x]`的快捷写法是`_x`(一个下划线后面跟着输入序号):\n", 156 | "\n", 157 | "```python\n", 158 | "In [12]: Out[2]\n", 159 | "Out[12]: 0.9092974268256817\n", 160 | "\n", 161 | "In [13]: _2\n", 162 | "Out[13]: 0.9092974268256817\n", 163 | "```" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## Suppressing Output\n", 171 | "\n", 172 | "## 取消输出\n", 173 | "\n", 174 | "> Sometimes you might wish to suppress the output of a statement (this is perhaps most common with the plotting commands that we'll explore in [Introduction to Matplotlib](04.00-Introduction-To-Matplotlib.ipynb)).\n", 175 | "Or maybe the command you're executing produces a result that you'd prefer not like to store in your output history, perhaps so that it can be deallocated when other references are removed.\n", 176 | "The easiest way to suppress the output of a command is to add a semicolon to the end of the line:\n", 177 | "\n", 178 | "有时你可能希望取消一个语句的输出结果(这在我们使用绘图指令时很常见,我们会在[Matplotlib简介](04.00-Introduction-To-Matplotlib.ipynb)中详细讨论)。或者你在执行的指令会产生的结果,你并不希望结果被存储在输出历史中,这样的结果就能在其他引用被移除后自动释放资源。取消一个指令的输出结果最简单的方法就是在语句最后加上一个分号:\n", 179 | "\n", 180 | "```python\n", 181 | "In [14]: math.sin(2) + math.cos(2);\n", 182 | "```\n", 183 | "\n", 184 | "> Note that the result is computed silently, and the output is neither displayed on the screen or stored in the ``Out`` dictionary:\n", 185 | "\n", 186 | "这里结果将会静默的计算出来,输出既不会打印在屏幕上,也不会保存在输出`Out`的字典中:\n", 187 | "\n", 188 | "```python\n", 189 | "In [15]: 14 in Out\n", 190 | "Out[15]: False\n", 191 | "```" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "## Related Magic Commands\n", 199 | "\n", 200 | "## 相关的魔术命令\n", 201 | "\n", 202 | "> For accessing a batch of previous inputs at once, the ``%history`` magic command is very helpful.\n", 203 | "Here is how you can print the first four inputs:\n", 204 | "\n", 205 | "要想一次性获得批量的输入历史,`%history`魔术命令是非常有用的。下面例子展示了如何使用它打印出输入历史中头四个指令:\n", 206 | "\n", 207 | "\n", 208 | "```python\n", 209 | "In [16]: %history -n 1-4\n", 210 | " 1: import math\n", 211 | " 2: math.sin(2)\n", 212 | " 3: math.cos(2)\n", 213 | " 4: print(In)\n", 214 | "```\n", 215 | "\n", 216 | "> As usual, you can type ``%history?`` for more information and a description of options available.\n", 217 | "Other similar magic commands are ``%rerun`` (which will re-execute some portion of the command history) and ``%save`` (which saves some set of the command history to a file).\n", 218 | "For more information, I suggest exploring these using the ``?`` help functionality discussed in [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb).\n", 219 | "\n", 220 | "当然,你也可以使用`%history?`来查阅该魔术命令的文档。其他类似的魔术命令包括`%rerun`(重新执行输入历史中的某部分指令)和`%save`(将输入历史中的某部分内容保存成文件)。需要更多的信息,推荐使用`?`魔术符号来查阅文档,有关`?`号的内容请参见[IPython帮助和文档](01.01-Help-And-Documentation.ipynb)。" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "\n", 228 | "< [IPython魔术命令](01.03-Magic-Commands.ipynb) | [目录](Index.ipynb) | [IPython和Shell命令](01.05-IPython-And-Shell-Commands.ipynb) >\n", 229 | "\n", 230 | "\"Open\n" 231 | ] 232 | } 233 | ], 234 | "metadata": { 235 | "anaconda-cloud": {}, 236 | "kernelspec": { 237 | "display_name": "Python 3", 238 | "language": "python", 239 | "name": "python3" 240 | }, 241 | "language_info": { 242 | "codemirror_mode": { 243 | "name": "ipython", 244 | "version": 3 245 | }, 246 | "file_extension": ".py", 247 | "mimetype": "text/x-python", 248 | "name": "python", 249 | "nbconvert_exporter": "python", 250 | "pygments_lexer": "ipython3", 251 | "version": "3.7.6" 252 | } 253 | }, 254 | "nbformat": 4, 255 | "nbformat_minor": 1 256 | } 257 | -------------------------------------------------------------------------------- /notebooks/01.05-IPython-And-Shell-Commands.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [输入输出历史](01.04-Input-Output-History.ipynb) | [目录](Index.ipynb) | [错误和调试](01.06-Errors-and-Debugging.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# IPython and Shell Commands\n", 18 | "\n", 19 | "# IPython 和 Shell命令" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> When working interactively with the standard Python interpreter, one of the frustrations is the need to switch between multiple windows to access Python tools and system command-line tools.\n", 27 | "IPython bridges this gap, and gives you a syntax for executing shell commands directly from within the IPython terminal.\n", 28 | "The magic happens with the exclamation point: anything appearing after ``!`` on a line will be executed not by the Python kernel, but by the system command-line.\n", 29 | "\n", 30 | "当使用标准的Python解释器时,有一个让人感到沮丧的地方就是你需要在不同的窗口之间进行切换,有时你需要使用Python,有时你又需要使用系统命令行工具。IPython将两者联系起来,它允许你直接在IPython终端中直接运行shell命令。这个魔术使用的是感叹号:任何出现在`!`之后的内容将被系统shell执行,而不是Python解释器。\n", 31 | "\n", 32 | "> The following assumes you're on a Unix-like system, such as Linux or Mac OSX.\n", 33 | "Some of the examples that follow will fail on Windows, which uses a different type of shell by default (though with the 2016 announcement of native Bash shells on Windows, soon this may no longer be an issue!).\n", 34 | "If you're unfamiliar with shell commands, I'd suggest reviewing the [Shell Tutorial](http://swcarpentry.github.io/shell-novice/) put together by the always excellent Software Carpentry Foundation.\n", 35 | "\n", 36 | "本节内容假定你在使用一个类Unix的系统,如Linx或者Mac OS X。下面的一些例子会在Windows下面失效,因为它使用的是一种完全不同的shell(2016年Windows宣布将直接支持原生的Bash,很快这将不成为问题。译者注:目前在windows下使用bash还是会有很多问题,微软的原生实现并不理想)。如果你对于shell命令不熟悉,作者推荐你去[Shell教程](http://swcarpentry.github.io/shell-novice/)去学习一下基础的shell命令。" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Quick Introduction to the Shell\n", 44 | "\n", 45 | "## Shell快速介绍\n", 46 | "\n", 47 | "> A full intro to using the shell/terminal/command-line is well beyond the scope of this chapter, but for the uninitiated we will offer a quick introduction here.\n", 48 | "The shell is a way to interact textually with your computer.\n", 49 | "Ever since the mid 1980s, when Microsoft and Apple introduced the first versions of their now ubiquitous graphical operating systems, most computer users have interacted with their operating system through familiar clicking of menus and drag-and-drop movements.\n", 50 | "But operating systems existed long before these graphical user interfaces, and were primarily controlled through sequences of text input: at the prompt, the user would type a command, and the computer would do what the user told it to.\n", 51 | "Those early prompt systems are the precursors of the shells and terminals that most active data scientists still use today.\n", 52 | "\n", 53 | "如何使用shell/终端/命令行远远超出了本章的范围,但是对于初学者,作者还是准备了一个简单快速的介绍。从80年代中开始,微软和苹果想用户推出了它们的图形界面,时至今日,图形化操作系统已经是无处不在了。大部分的计算机用户都是使用他们熟悉的菜单点击和拖放操作来使用操作系统。但是实际上操作系统比这些图形用户界面出现早得多,当时都是由用户输入一系列的文本内容对操作系统进行控制:在提示符下,用户敲入一个命令,然后计算机会按照用户的指示进行工作。这种早期的提示符界面就是shell和终端的前身,也是直到今天很多数据科学家仍在使用的工具。\n", 54 | "\n", 55 | "> Someone unfamiliar with the shell might ask why you would bother with this, when many results can be accomplished by simply clicking on icons and menus.\n", 56 | "A shell user might reply with another question: why hunt icons and click menus when you can accomplish things much more easily by typing?\n", 57 | "While it might sound like a typical tech preference impasse, when moving beyond basic tasks it quickly becomes clear that the shell offers much more control of advanced tasks, though admittedly the learning curve can intimidate the average computer user.\n", 58 | "\n", 59 | "不熟悉shell的人可能会问,为什么你们要这么麻烦,为什么简单的通过点击图表和菜单就能实现的功能你们要敲命令。熟练使用shell的用户可能会这样回应:为什么通过简单的键盘命令就能完成的工作你们要点击鼠标呢。虽然看起来这是一个典型的技术偏好问题,但是当你需要完成的任务变得复杂的时候,shell确实能够提供更多的控制,哪怕shell的学习曲线会吓跑很多普通的计算机用户。\n", 60 | "\n", 61 | "> As an example, here is a sample of a Linux/OSX shell session where a user explores, creates, and modifies directories and files on their system (``osx:~ $`` is the prompt, and everything after the ``$`` sign is the typed command; text that is preceded by a ``#`` is meant just as description, rather than something you would actually type in):\n", 62 | "\n", 63 | "作为一个例子,这里有一个用户在Linux/OSX系统上浏览、创建和修改目录以及文件的shell会话(`osx:~ $`是提示符,所有出现在`$`后面的文本都是一条命令;以`#`开始的文本是注释作为命令的解释,而不是你需要真正输入的内容):\n", 64 | "\n", 65 | "```bash\n", 66 | "osx:~ $ echo \"hello world\" # 使用echo打印输出,类似Python中的print\n", 67 | "hello world\n", 68 | "\n", 69 | "osx:~ $ pwd # pwd = 打印当前工作目录\n", 70 | "/home/jake # 这是我们当前的工作目录\n", 71 | "\n", 72 | "osx:~ $ ls # ls = 列示目录内容\n", 73 | "notebooks projects \n", 74 | "\n", 75 | "osx:~ $ cd projects/ # cd = 改变目录位置\n", 76 | "\n", 77 | "osx:projects $ pwd\n", 78 | "/home/jake/projects\n", 79 | "\n", 80 | "osx:projects $ ls\n", 81 | "datasci_book mpld3 myproject.txt\n", 82 | "\n", 83 | "osx:projects $ mkdir myproject # mkdir = 创建新目录\n", 84 | "\n", 85 | "osx:projects $ cd myproject/\n", 86 | "\n", 87 | "osx:myproject $ mv ../myproject.txt ./ # mv = 移动文件,这里我们将父目录中的myproject.txt\n", 88 | " # 移动到当前工作目录下\n", 89 | "osx:myproject $ ls\n", 90 | "myproject.txt\n", 91 | "```\n", 92 | "\n", 93 | "> Notice that all of this is just a compact way to do familiar operations (navigating a directory structure, creating a directory, moving a file, etc.) by typing commands rather than clicking icons and menus.\n", 94 | "Note that with just a few commands (``pwd``, ``ls``, ``cd``, ``mkdir``, and ``cp``) you can do many of the most common file operations.\n", 95 | "It's when you go beyond these basics that the shell approach becomes really powerful.\n", 96 | "\n", 97 | "请注意,上面的命令都是使用命令输入完成我们平常使用鼠标点击操作完成的任务(浏览目录结构、创建目录、移动文件等)。只需要少量的命令输入(`pwd`、`ls`、`cd`、`mkdir`和`cp`)我们就能完成很多通用的文件操作。当你更深入学习shell之后,你就会发现它们非常强大。" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Shell Commands in IPython\n", 105 | "\n", 106 | "## IPython 中的 shell 命令\n", 107 | "\n", 108 | "> Any command that works at the command-line can be used in IPython by prefixing it with the ``!`` character.\n", 109 | "For example, the ``ls``, ``pwd``, and ``echo`` commands can be run as follows:\n", 110 | "\n", 111 | "任何在命令行中可以使用的命令,也都可以在IPython中使用,只需要在前面加上`!`号。例如,`ls`、`pwd`和`echo`命令:\n", 112 | "\n", 113 | "```bash\n", 114 | "In [1]: !ls\n", 115 | "myproject.txt\n", 116 | "\n", 117 | "In [2]: !pwd\n", 118 | "/home/jake/projects/myproject\n", 119 | "\n", 120 | "In [3]: !echo \"printing from the shell\"\n", 121 | "printing from the shell\n", 122 | "```" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## Passing Values to and from the Shell\n", 130 | "\n", 131 | "## 与 shell 之间传递值\n", 132 | "\n", 133 | "> Shell commands can not only be called from IPython, but can also be made to interact with the IPython namespace.\n", 134 | "For example, you can save the output of any shell command to a Python list using the assignment operator:\n", 135 | "\n", 136 | "shell命令不但能被IPython环境中调用,还能与IPython的命名空间产生交互。例如,你可以将shell命令的输出保存成一个Python的列表:\n", 137 | "\n", 138 | "```python\n", 139 | "In [4]: contents = !ls\n", 140 | "\n", 141 | "In [5]: print(contents)\n", 142 | "['myproject.txt']\n", 143 | "\n", 144 | "In [6]: directory = !pwd\n", 145 | "\n", 146 | "In [7]: print(directory)\n", 147 | "['/Users/jakevdp/notebooks/tmp/myproject']\n", 148 | "```\n", 149 | "\n", 150 | "> Note that these results are not returned as lists, but as a special shell return type defined in IPython:\n", 151 | "\n", 152 | "值得注意的是,这些结果并不是返回成为普通的Python列表,而是一个IPython定义的特殊shell返回值类型:\n", 153 | "\n", 154 | "```python\n", 155 | "In [8]: type(directory)\n", 156 | "IPython.utils.text.SList\n", 157 | "```\n", 158 | "\n", 159 | "> This looks and acts a lot like a Python list, but has additional functionality, such as\n", 160 | "the ``grep`` and ``fields`` methods and the ``s``, ``n``, and ``p`` properties that allow you to search, filter, and display the results in convenient ways.\n", 161 | "For more information on these, you can use IPython's built-in help features.\n", 162 | "\n", 163 | "它看起来很像一个Python列表,但是还包含额外的功能,比方说`grep`和`fields`方法,以及`s`、`n`和`p`属性,让你能够使用简单方式搜索,过滤和显示结果。如果你想获得更多信息,请使用IPython內建的帮助特性来查看。" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "> Communication in the other direction–passing Python variables into the shell–is possible using the ``{varname}`` syntax:\n", 171 | "\n", 172 | "反过来,也可以传递Python的变量给shell,通过`{变量名}`语法就可以实现:\n", 173 | "\n", 174 | "```python\n", 175 | "In [9]: message = \"hello from Python\"\n", 176 | "\n", 177 | "In [10]: !echo {message}\n", 178 | "hello from Python\n", 179 | "```\n", 180 | "\n", 181 | "> The curly braces contain the variable name, which is replaced by the variable's contents in the shell command.\n", 182 | "\n", 183 | "花括号里面是变量的名称,在执行shell命令的时候将会被变量的值替代。" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "# Shell-Related Magic Commands\n", 191 | "\n", 192 | "# Shell 相关魔术命令\n", 193 | "\n", 194 | "> If you play with IPython's shell commands for a while, you might notice that you cannot use ``!cd`` to navigate the filesystem:\n", 195 | "\n", 196 | "如果你已经在IPython中使用了shell命令一段时间了,你会发现你无法使用`!cd`来改变你的工作目录:\n", 197 | "\n", 198 | "```python\n", 199 | "In [11]: !pwd\n", 200 | "/home/jake/projects/myproject\n", 201 | "\n", 202 | "In [12]: !cd ..\n", 203 | "\n", 204 | "In [13]: !pwd\n", 205 | "/home/jake/projects/myproject\n", 206 | "```\n", 207 | "\n", 208 | "> The reason is that shell commands in the notebook are executed in a temporary subshell.\n", 209 | "If you'd like to change the working directory in a more enduring way, you can use the ``%cd`` magic command:\n", 210 | "\n", 211 | "这是因为在notebook里面shell是在一个子shell空间中执行的。如果你需要改变工作目录的话,你可以使用`%cd`魔术命令:\n", 212 | "\n", 213 | "```python\n", 214 | "In [14]: %cd ..\n", 215 | "/home/jake/projects\n", 216 | "```\n", 217 | "\n", 218 | "> In fact, by default you can even use this without the ``%`` sign:\n", 219 | "\n", 220 | "事实上,你甚至可以不用`%`号:\n", 221 | "\n", 222 | "```python\n", 223 | "In [15]: cd myproject\n", 224 | "/home/jake/projects/myproject\n", 225 | "```\n", 226 | "\n", 227 | "> This is known as an ``automagic`` function, and this behavior can be toggled with the ``%automagic`` magic function.\n", 228 | "\n", 229 | "这被称为`自动魔术`,你可以使用`%automagic`来切换它的开关状态。\n", 230 | "\n", 231 | "> Besides ``%cd``, other available shell-like magic functions are ``%cat``, ``%cp``, ``%env``, ``%ls``, ``%man``, ``%mkdir``, ``%more``, ``%mv``, ``%pwd``, ``%rm``, and ``%rmdir``, any of which can be used without the ``%`` sign if ``automagic`` is on.\n", 232 | "This makes it so that you can almost treat the IPython prompt as if it's a normal shell:\n", 233 | "\n", 234 | "除了`%cd`之外,其他类似shell命令的魔术命令包括`%cat`、`%cp`、`%env`、`%ls`、`%man`、`%mkdir`、`%more`、`%mv`、`%pwd`、`%rm`和`%rmdir`,这些命令在`automagic`开启时都可以不带`%`使用。这功能令你可以几乎将IPython shell当成系统的shell来使用了:\n", 235 | "\n", 236 | "```python\n", 237 | "In [16]: mkdir tmp\n", 238 | "\n", 239 | "In [17]: ls\n", 240 | "myproject.txt tmp/\n", 241 | "\n", 242 | "In [18]: cp myproject.txt tmp/\n", 243 | "\n", 244 | "In [19]: ls tmp\n", 245 | "myproject.txt\n", 246 | "\n", 247 | "In [20]: rm -r tmp\n", 248 | "```\n", 249 | "\n", 250 | "> This access to the shell from within the same terminal window as your Python session means that there is a lot less switching back and forth between interpreter and shell as you write your Python code.\n", 251 | "\n", 252 | "能够在IPython环境中直接使用shell,意味着你可以不用来回在解释器和shell终端两个窗口之间进行切换,可以提高你写Python代码时候的效率。" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "\n", 260 | "< [输入输出历史](01.04-Input-Output-History.ipynb) | [目录](Index.ipynb) | [错误和调试](01.06-Errors-and-Debugging.ipynb) >\n", 261 | "\n", 262 | "\"Open\n" 263 | ] 264 | } 265 | ], 266 | "metadata": { 267 | "anaconda-cloud": {}, 268 | "kernelspec": { 269 | "display_name": "Python 3", 270 | "language": "python", 271 | "name": "python3" 272 | }, 273 | "language_info": { 274 | "codemirror_mode": { 275 | "name": "ipython", 276 | "version": 3 277 | }, 278 | "file_extension": ".py", 279 | "mimetype": "text/x-python", 280 | "name": "python", 281 | "nbconvert_exporter": "python", 282 | "pygments_lexer": "ipython3", 283 | "version": "3.7.6" 284 | } 285 | }, 286 | "nbformat": 4, 287 | "nbformat_minor": 1 288 | } 289 | -------------------------------------------------------------------------------- /notebooks/01.08-More-IPython-Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [性能测算和计时](01.07-Timing-and-Profiling.ipynb) | [目录](Index.ipynb) | [Numpy介绍](02.00-Introduction-to-NumPy.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# More IPython Resources\n", 18 | "\n", 19 | "# 更多IPython资源" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> In this chapter, we've just scratched the surface of using IPython to enable data science tasks.\n", 27 | "Much more information is available both in print and on the Web, and here we'll list some other resources that you may find helpful.\n", 28 | "\n", 29 | "本章中我们初步讨论了使用IPython来解决数据科学任务的一些基本内容。更多的内容可以在网上或书籍中找到,最后本小节来列出其中可能对你有帮助的一些资源。" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Web Resources\n", 37 | "\n", 38 | "## 网络资源\n", 39 | "\n", 40 | "> - [The IPython website](http://ipython.org): The IPython website links to documentation, examples, tutorials, and a variety of other resources.\n", 41 | "> - [The nbviewer website](http://nbviewer.jupyter.org/): This site shows static renderings of any IPython notebook available on the internet. The front page features some example notebooks that you can browse to see what other folks are using IPython for!\n", 42 | "> - [A gallery of interesting Jupyter Notebooks](https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks/): This ever-growing list of notebooks, powered by nbviewer, shows the depth and breadth of numerical analysis you can do with IPython. It includes everything from short examples and tutorials to full-blown courses and books composed in the notebook format!\n", 43 | "> - Video Tutorials: searching the Internet, you will find any video-recorded tutorials on IPython. I'd especially recommend seeking tutorials from the PyCon, SciPy, and PyData conferenes by Fernando Perez and Brian Granger, two of the primary creators and maintainers of IPython and Jupyter.\n", 44 | "\n", 45 | "\n", 46 | "- [IPython官网](http://ipython.org): 在线文档、例子、教程和其他许多资源。\n", 47 | "- [nbviewer官网](http://nbviewer.jupyter.org/): nbviewer网站能展示互联网上的IPython notebook的资源文件。首页展示了一些notebooks的例子,你可以看到其他人是怎样使用IPython的。\n", 48 | "- [有趣的Jupyter notebooks展览馆](https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks/): 这是一个不断增加的notebooks列表,由nbviewer进行维护,展示了许多既有深度又有广度的IPython在数值分析中的应用。它应有尽有,从简短的例子,到稍长的教程,直至完整的课程和书籍,都是使用notebook格式。\n", 49 | "- 视频教程:在互联网上可以搜索到很多关于IPython的视频教程。作者特别推荐PyCon,SciPy和PyData学术会上Fernando Perez 和 Brian Granger 做的报告,他们是IPython和Jupyter的主要创始人和维护者。" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Books\n", 57 | "\n", 58 | "## 书籍\n", 59 | "\n", 60 | "> - [*Python for Data Analysis*](http://shop.oreilly.com/product/0636920023784.do): Wes McKinney's book includes a chapter that covers using IPython as a data scientist. Although much of the material overlaps what we've discussed here, another perspective is always helpful.\n", 61 | "> - [*Learning IPython for Interactive Computing and Data Visualization*](https://www.packtpub.com/big-data-and-business-intelligence/learning-ipython-interactive-computing-and-data-visualization): This short book by Cyrille Rossant offers a good introduction to using IPython for data analysis.\n", 62 | "> - [*IPython Interactive Computing and Visualization Cookbook*](https://www.packtpub.com/big-data-and-business-intelligence/ipython-interactive-computing-and-visualization-cookbook): Also by Cyrille Rossant, this book is a longer and more advanced treatment of using IPython for data science. Despite its name, it's not just about IPython–it also goes into some depth on a broad range of data science topics.\n", 63 | "\n", 64 | "\n", 65 | "- [*Python for Data Analysis*](http://shop.oreilly.com/product/0636920023784.do): 作者:Wes McKinney,其中有一章专门讲述使用IPython来进行数据科学处理。虽然大部分的内容可能与本书我们将要看到的有重复,从另一个角度进行认知永远不是坏事。\n", 66 | "- [*Learning IPython for Interactive Computing and Data Visualization*](https://www.packtpub.com/big-data-and-business-intelligence/learning-ipython-interactive-computing-and-data-visualization): 作者:Cyrille Rossant,一本很简短的书籍专门介绍使用IPython进行数据分析。\n", 67 | "- [*IPython Interactive Computing and Visualization Cookbook*](https://www.packtpub.com/big-data-and-business-intelligence/ipython-interactive-computing-and-visualization-cookbook): 作者:Cyrille Rossant, 一本更加详尽的书籍,对于在数据科学领域使用IPython进行了深入的介绍。虽然名字叫做IPython,实际上内容深度涵盖了数据科学的广泛课题。\n", 68 | "\n", 69 | "> Finally, a reminder that you can find help on your own: IPython's ``?``-based help functionality (discussed in [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)) can be very useful if you use it well and use it often.\n", 70 | "As you go through the examples here and elsewhere, this can be used to familiarize yourself with all the tools that IPython has to offer.\n", 71 | "\n", 72 | "最后还是再次提醒一下,当你在使用IPython时遇到了困难,不要忘记了IPython本身自带的帮助工具`?`(参见[IPython帮助和文档](01.01-Help-And-Documentation.ipynb)),当你经常使用它,熟练地掌握它之后,你会发现它能带给你的帮助超出你的预期。当你在本书中或其他资源处查看例子的时候,它能让你事半功倍地熟悉IPython中提供的工具和功能。" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "\n", 80 | "< [性能测算和计时](01.07-Timing-and-Profiling.ipynb) | [目录](Index.ipynb) | [Numpy介绍](02.00-Introduction-to-NumPy.ipynb) >\n", 81 | "\n", 82 | "\"Open\n" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "anaconda-cloud": {}, 88 | "kernelspec": { 89 | "display_name": "Python 3", 90 | "language": "python", 91 | "name": "python3" 92 | }, 93 | "language_info": { 94 | "codemirror_mode": { 95 | "name": "ipython", 96 | "version": 3 97 | }, 98 | "file_extension": ".py", 99 | "mimetype": "text/x-python", 100 | "name": "python", 101 | "nbconvert_exporter": "python", 102 | "pygments_lexer": "ipython3", 103 | "version": "3.7.6" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 1 108 | } 109 | -------------------------------------------------------------------------------- /notebooks/02.00-Introduction-to-NumPy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [更多IPython资源](01.08-More-IPython-Resources.ipynb) | [目录](Index.ipynb) | [理解Python中的数据类型](02.01-Understanding-Data-Types.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Introduction to NumPy\n", 18 | "\n", 19 | "# NumPy 介绍" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> This chapter, along with chapter 3, outlines techniques for effectively loading, storing, and manipulating in-memory data in Python.\n", 27 | "The topic is very broad: datasets can come from a wide range of sources and a wide range of formats, including be collections of documents, collections of images, collections of sound clips, collections of numerical measurements, or nearly anything else.\n", 28 | "Despite this apparent heterogeneity, it will help us to think of all data fundamentally as arrays of numbers.\n", 29 | "\n", 30 | "下面我们将开启新的一章,本章连同第三章一起,会介绍和讨论高效的装载,存储和处理Python中内存数据的技巧。这个主题非常广泛:数据集可能来自非常不同的来源和非常不同的格式,包括文档的集合,图像的集合,声音片段的集合,数值测量的集合,甚至其他任何东西的集合。尽管数据集有着超出想象的异质性,我们还是可以将所有的数据抽象成为数值组成的数组。\n", 31 | "\n", 32 | "> For example, images–particularly digital images–can be thought of as simply two-dimensional arrays of numbers representing pixel brightness across the area.\n", 33 | "Sound clips can be thought of as one-dimensional arrays of intensity versus time.\n", 34 | "Text can be converted in various ways into numerical representations, perhaps binary digits representing the frequency of certain words or pairs of words.\n", 35 | "No matter what the data are, the first step in making it analyzable will be to transform them into arrays of numbers.\n", 36 | "(We will discuss some specific examples of this process later in [Feature Engineering](05.04-Feature-Engineering.ipynb))\n", 37 | "\n", 38 | "例如图像,这里我们特指数字图像,可以被认为是简单的二维数组,包含着代表这区域内每个像素亮度的数值。声音片段可以被认为是一维的数组,包含着时间范围内声音强度的数值。文本可以使用各种方法转换成为数值方式表示,比方说使用二进制数字表示某个单词或短语的出现频率。无论数据是哪种类型,我们对它们进行处理的时候,第一步总是设计将它们转换为数值。(参见[特征工程](05.04-Feature-Engineering.ipynb))\n", 39 | "\n", 40 | "> For this reason, efficient storage and manipulation of numerical arrays is absolutely fundamental to the process of doing data science.\n", 41 | "We'll now take a look at the specialized tools that Python has for handling such numerical arrays: the NumPy package, and the Pandas package (discussed in Chapter 3).\n", 42 | "\n", 43 | "因此,有效的存储和处理数值数组对于数据科学来说是最根本的能力。我们接下来会讨论Python中具备这样强大功能的特殊工具:NumPy和Pandas(将在第三章讨论)。\n", 44 | "\n", 45 | "> This chapter will cover NumPy in detail. NumPy (short for *Numerical Python*) provides an efficient interface to store and operate on dense data buffers.\n", 46 | "In some ways, NumPy arrays are like Python's built-in ``list`` type, but NumPy arrays provide much more efficient storage and data operations as the arrays grow larger in size.\n", 47 | "NumPy arrays form the core of nearly the entire ecosystem of data science tools in Python, so time spent learning to use effectively will be valuable no matter what aspect of data science interests you.\n", 48 | "\n", 49 | "本章会详细介绍NumPy(*Numerical Python* 数值Python的缩写),它提供了强大的接口供我们存储和操作非稀疏数据集合。在某些情况下,NumPy的数组表现得就像Python內建的`列表`,但是NumPy数组在存储和操作大量数据集合的时候提供了有效得多的功能和性能。NumPy数组是Python的数据科学领域工具链的核心,很多其他的工具都是在它的基础上构建的,因此无论你感兴趣的是数据科学的哪个领域,NumPy都值得你花时间进行钻研。\n", 50 | "\n", 51 | "> If you followed the advice outlined in the Preface and installed the Anaconda stack, you already have NumPy installed and ready to go.\n", 52 | "If you're more the do-it-yourself type, you can go to http://www.numpy.org/ and follow the installation instructions found there.\n", 53 | "Once you do, you can import NumPy and double-check the version:\n", 54 | "\n", 55 | "如果你遵从这本书序言的内容安装的Anaconda,那么NumPy已经自动安装好了,你可以继续往下阅读。如果你喜欢DIY,你可以到[NumPy官网](http://www.numpy.org/),然后按照提示自行安装。当你完成之后,你就可以在你的脚本中载入NumPy模块了,然后输出NumPy的版本号验证安装结果:" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 1, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "'1.16.4'" 67 | ] 68 | }, 69 | "execution_count": 1, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "import numpy\n", 76 | "numpy.__version__" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "> For the pieces of the package discussed here, I'd recommend NumPy version 1.8 or later.\n", 84 | "By convention, you'll find that most people in the SciPy/PyData world will import NumPy using ``np`` as an alias:\n", 85 | "\n", 86 | "对于本书中的例子来说,作者推荐安装NumPy 1.8或以上版本。习惯上,大多数人都会使用`np`作为别名来载入NumPy模块:" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "import numpy as np" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "> Throughout this chapter, and indeed the rest of the book, you'll find that this is the way we will import and use NumPy.\n", 103 | "\n", 104 | "本章以及本书后续内容,这都是我们载入NumPy模块的标准方式。" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Reminder about Built In Documentation\n", 112 | "\n", 113 | "## 內建帮助和文档\n", 114 | "\n", 115 | "> As you read through this chapter, don't forget that IPython gives you the ability to quickly explore the contents of a package (by using the tab-completion feature), as well as the documentation of various functions (using the ``?`` character – Refer back to [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)).\n", 116 | "\n", 117 | "在你阅读本章的过程中,请不要忘记了IPython提供的內建帮助工具`?`以及使用制表符自动补全的功能。(参见:[IPython帮助和文档](01.01-Help-And-Documentation.ipynb)。\n", 118 | "\n", 119 | "> For example, to display all the contents of the numpy namespace, you can type this:\n", 120 | "\n", 121 | "例如,要查看numpy模块中的所有内容(属性和方法),你可以输入:\n", 122 | "\n", 123 | "```python\n", 124 | "In [3]: np.\n", 125 | "```\n", 126 | "\n", 127 | "> And to display NumPy's built-in documentation, you can use this:\n", 128 | "\n", 129 | "如果想查看numpy的內建文档,你可以输入:\n", 130 | "\n", 131 | "```python\n", 132 | "In [4]: np?\n", 133 | "```\n", 134 | "\n", 135 | "> More detailed documentation, along with tutorials and other resources, can be found at http://www.numpy.org.\n", 136 | "\n", 137 | "需要更加详尽的文档、教程或其他资源,你可以访问[NumPy官网](http://www.numpy.org)。" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "\n", 145 | "< [更多IPython资源](01.08-More-IPython-Resources.ipynb) | [目录](Index.ipynb) | [理解Python中的数据类型](02.01-Understanding-Data-Types.ipynb) >\n", 146 | "\n", 147 | "\"Open\n" 148 | ] 149 | } 150 | ], 151 | "metadata": { 152 | "anaconda-cloud": {}, 153 | "kernelspec": { 154 | "display_name": "Python 3", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.7.6" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 1 173 | } 174 | -------------------------------------------------------------------------------- /notebooks/03.00-Introduction-to-Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [格式化数据:NumPy里的结构化数组](02.09-Structured-Data-NumPy.ipynb) | [目录](Index.ipynb) | [Pandas对象简介](03.01-Introducing-Pandas-Objects.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Data Manipulation with Pandas\n", 18 | "\n", 19 | "# 使用Pandas处理数据" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> In the previous chapter, we dove into detail on NumPy and its ``ndarray`` object, which provides efficient storage and manipulation of dense typed arrays in Python.\n", 27 | "Here we'll build on this knowledge by looking in detail at the data structures provided by the Pandas library.\n", 28 | "Pandas is a newer package built on top of NumPy, and provides an efficient implementation of a ``DataFrame``.\n", 29 | "``DataFrame``s are essentially multidimensional arrays with attached row and column labels, and often with heterogeneous types and/or missing data.\n", 30 | "As well as offering a convenient storage interface for labeled data, Pandas implements a number of powerful data operations familiar to users of both database frameworks and spreadsheet programs.\n", 31 | "\n", 32 | "在上一章中,我们深入介绍了NumPy和它的`ndarray`对象,它被用来在Python存储和操作非稀疏的数组数据。以此为基础,本章将要详细介绍Pandas库为我们提供数据结构。Pandas是一个在NumPy的基础上创建的第三方库,它提供了对于`DataFrame`对象的有效支持。`DataFrame`是一个多维的数组,其行和列都有标签,通常列之间都含有不同种类的数据类型或者有缺失的数据。除了提供了对于标签数据存储的支持之外,Pandas还实现了数量众多的数据操作方法,这些方法无论对于数据库的用户还是对于工作表单用户而言都非常熟悉。\n", 33 | "\n", 34 | "> As we saw, NumPy's ``ndarray`` data structure provides essential features for the type of clean, well-organized data typically seen in numerical computing tasks.\n", 35 | "While it serves this purpose very well, its limitations become clear when we need more flexibility (e.g., attaching labels to data, working with missing data, etc.) and when attempting operations that do not map well to element-wise broadcasting (e.g., groupings, pivots, etc.), each of which is an important piece of analyzing the less structured data available in many forms in the world around us.\n", 36 | "Pandas, and in particular its ``Series`` and ``DataFrame`` objects, builds on the NumPy array structure and provides efficient access to these sorts of \"data munging\" tasks that occupy much of a data scientist's time.\n", 37 | "\n", 38 | "正如我们前面看到的,NumPy的`ndarray`数据结构能为数值计算任务所需要的数据提供必不可少的功能。虽然`ndarray`的功能已经很强大,但是当我们需要更多的灵活性的时候,它的缺陷就体现了出来(例如,为数据提供标签,处理缺失的数据等)。而且如果当需要对数据进行超过广播能处理范畴的操作时(例如分组,数据透视等),NumPy就无能为力了。而上述提到的这些能力对于我们处理真实世界中产生的非严格格式化数据来说是非常重要的。Pandas,或者更具体的来说,它的`Series`和`DataFrame`对象,在NumPy的基础上提供了上述操作,让数据科学家能从花很多时间的这种乏味的数据处理工作中解脱出来。\n", 39 | "\n", 40 | "> In this chapter, we will focus on the mechanics of using ``Series``, ``DataFrame``, and related structures effectively.\n", 41 | "We will use examples drawn from real datasets where appropriate, but these examples are not necessarily the focus.\n", 42 | "\n", 43 | "我们在本章中会聚焦于了解`Series`、`DataFrame`和相关结构的机制上。例子中使用了真实的数据集进行说明,以方便理解,但是并不需要特别关注例子数据本身。" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Installing and Using Pandas\n", 51 | "\n", 52 | "## 安装和使用Pandas\n", 53 | "\n", 54 | "> Installation of Pandas on your system requires NumPy to be installed, and if building the library from source, requires the appropriate tools to compile the C and Cython sources on which Pandas is built.\n", 55 | "Details on this installation can be found in the [Pandas documentation](http://pandas.pydata.org/).\n", 56 | "If you followed the advice outlined in the [Preface](00.00-Preface.ipynb) and used the Anaconda stack, you already have Pandas installed.\n", 57 | "\n", 58 | "在你的系统上安装Pandas必要先安装NumPy,如果选择从源码进行安装,还需要能够编译C和Cython的工具,因为Pandas源码是使用这两种语言编写的。详细的安装文档可以访问[Pandas在线文档](http://pandas.pydata.org/)。如果你是依照[序言](00.00-Preface.ipynb)中的方法使用Anaconda安装的环境,那么Pandas已经安装好了。\n", 59 | "\n", 60 | "> Once Pandas is installed, you can import it and check the version:\n", 61 | "\n", 62 | "安装后,你可以载入包并检查版本信息,验证安装是否成功:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 1, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "'0.24.2'" 74 | ] 75 | }, 76 | "execution_count": 1, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "import pandas\n", 83 | "pandas.__version__" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "> Just as we generally import NumPy under the alias ``np``, we will import Pandas under the alias ``pd``:\n", 91 | "\n", 92 | "就像我们管理将NumPy载入并命名为`np`一样,我们也惯例将Pandas载入并命名为`pd`:" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 2, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "import pandas as pd" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "> This import convention will be used throughout the remainder of this book.\n", 109 | "\n", 110 | "这个惯例会贯穿本书后续所有内容。" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Reminder about Built-In Documentation\n", 118 | "\n", 119 | "## 內建帮助及文档的提醒\n", 120 | "\n", 121 | "> As you read through this chapter, don't forget that IPython gives you the ability to quickly explore the contents of a package (by using the tab-completion feature) as well as the documentation of various functions (using the ``?`` character). (Refer back to [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb) if you need a refresher on this.)\n", 122 | "\n", 123 | "当你阅读本章的时候,不要忘记了IPython提供了快速查看对象内容(使用tab自动补全)和帮助文档(使用`?`语句)的工具。(参见[IPython的帮助和文档](01.01-Help-And-Documentation.ipynb))\n", 124 | "\n", 125 | "> For example, to display all the contents of the pandas namespace, you can type\n", 126 | "\n", 127 | "例如,要查看pandas命名空间中的所有内容,你可以输入\n", 128 | "\n", 129 | "```ipython\n", 130 | "In [3]: pd.\n", 131 | "```\n", 132 | "\n", 133 | "> And to display Pandas's built-in documentation, you can use this:\n", 134 | "\n", 135 | "要列示Pandas的內建文件,你可以输入\n", 136 | "\n", 137 | "```ipython\n", 138 | "In [4]: pd?\n", 139 | "```\n", 140 | "\n", 141 | "> More detailed documentation, along with tutorials and other resources, can be found at http://pandas.pydata.org/.\n", 142 | "\n", 143 | "更详细的文档,包括教程和其他资源,可以访问http://pandas.pydata.org/。" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "\n", 151 | "< [格式化数据:NumPy里的结构化数组](02.09-Structured-Data-NumPy.ipynb) | [目录](Index.ipynb) | [Pandas对象简介](03.01-Introducing-Pandas-Objects.ipynb) >\n", 152 | "\n", 153 | "\"Open\n" 154 | ] 155 | } 156 | ], 157 | "metadata": { 158 | "anaconda-cloud": {}, 159 | "kernelspec": { 160 | "display_name": "Python 3", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.7.6" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 1 179 | } 180 | -------------------------------------------------------------------------------- /notebooks/03.13-Further-Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [高性能Pandas: eval() 和 query()](03.12-Performance-Eval-and-Query.ipynb) | [目录](Index.ipynb) | [使用matplotlib展示数据](04.00-Introduction-To-Matplotlib.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Further Resources\n", 18 | "\n", 19 | "# 更多资源" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> In this chapter, we've covered many of the basics of using Pandas effectively for data analysis.\n", 27 | "Still, much has been omitted from our discussion.\n", 28 | "To learn more about Pandas, I recommend the following resources:\n", 29 | "\n", 30 | "> - [Pandas online documentation](http://pandas.pydata.org/): This is the go-to source for complete documentation of the package. While the examples in the documentation tend to be small generated datasets, the description of the options is complete and generally very useful for understanding the use of various functions.\n", 31 | "\n", 32 | "> - [*Python for Data Analysis*](http://shop.oreilly.com/product/0636920023784.do) Written by Wes McKinney (the original creator of Pandas), this book contains much more detail on the Pandas package than we had room for in this chapter. In particular, he takes a deep dive into tools for time series, which were his bread and butter as a financial consultant. The book also has many entertaining examples of applying Pandas to gain insight from real-world datasets. Keep in mind, though, that the book is now several years old, and the Pandas package has quite a few new features that this book does not cover (but be on the lookout for a new edition in 2017).\n", 33 | "\n", 34 | "> - [Stack Overflow](http://stackoverflow.com/questions/tagged/pandas): Pandas has so many users that any question you have has likely been asked and answered on Stack Overflow. Using Pandas is a case where some Google-Fu is your best friend. Simply go to your favorite search engine and type in the question, problem, or error you're coming across–more than likely you'll find your answer on a Stack Overflow page.\n", 35 | "\n", 36 | "> - [Pandas on PyVideo](http://pyvideo.org/search?q=pandas): From PyCon to SciPy to PyData, many conferences have featured tutorials from Pandas developers and power users. The PyCon tutorials in particular tend to be given by very well-vetted presenters.\n", 37 | "\n", 38 | "> Using these resources, combined with the walk-through given in this chapter, my hope is that you'll be poised to use Pandas to tackle any data analysis problem you come across!" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "在本章中,我们介绍了许多使用Pandas有效进行数据分析的基础知识。但是显然还有很多内容没有讨论到,需要学习更多有关Pandas的内容,作者建议阅读下面的资源:\n", 46 | "\n", 47 | "- [Pandas在线文档](http://pandas.pydata.org/):Pandas包最完整的文档来源。虽然文档中的例子基本上都是一些生成的小数据集,但是里面的参数说明是很完整的,而且通常对于理解使用Pandas的函数和方法是非常有帮助的。\n", 48 | "- [*Python for Data Analysis*](http://shop.oreilly.com/product/0636920023784.do):作者Wes McKinney(Pandas的创始人),这本书包括了很多Pandas的详尽资料。特别是对于时间序列数据的处理,本书进行了深入的介绍,这对于经济学的分析是非常有帮助的。本书也有很多在真实世界数据中应用Pandas的有趣例子。不过此书已经出版有几年时间了,因此近几年Pandas提供的新特性都没有包括其中,不过我们可以期待本书2017年的新版。\n", 49 | "- [Stack Overflow](http://stackoverflow.com/questions/tagged/pandas):Pandas有着许多的用户,因此你遇到的问题很可能在Stack Overflow网站上已经有人问过和解答了。当然谷歌大法也是你的好朋友(译者注:度娘大法就免了)。去搜索引擎输入你的问题或错误,很可能谷歌就会将你导向到Stack Overflow的答案。\n", 50 | "- [PyVideo上的Pandas视频](http://pyvideo.org/search?q=pandas):从PyCon到SciPy再到PyData,很多研讨会都有关于Pandas的教程和专题。特别是PyCon上面有很多知名开发者做的教程和专题讨论。\n", 51 | "\n", 52 | "结合本章介绍的内容,加上上面的资源,读者应该能够获得足够的帮助用来解决使用Pandas处理数据分析的所有问题。" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "\n", 60 | "< [高性能Pandas: eval() 和 query()](03.12-Performance-Eval-and-Query.ipynb) | [目录](Index.ipynb) | [使用matplotlib展示数据](04.00-Introduction-To-Matplotlib.ipynb) >\n", 61 | "\n", 62 | "\"Open\n" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "anaconda-cloud": {}, 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.7.6" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 1 88 | } 89 | -------------------------------------------------------------------------------- /notebooks/04.15-Further-Resources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [使用Seaborn进行可视化](04.14-Visualization-With-Seaborn.ipynb) | [目录](Index.ipynb) | [机器学习](05.00-Machine-Learning.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Further Resources\n", 18 | "\n", 19 | "# 更多资源" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Matplotlib Resources\n", 27 | "\n", 28 | "## Matplotlib 资源\n", 29 | "\n", 30 | "> A single chapter in a book can never hope to cover all the available features and plot types available in Matplotlib.\n", 31 | "As with other packages we've seen, liberal use of IPython's tab-completion and help functions (see [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)) can be very helpful when exploring Matplotlib's API.\n", 32 | "In addition, Matplotlib’s [online documentation](http://matplotlib.org/) can be a helpful reference.\n", 33 | "See in particular the [Matplotlib gallery](http://matplotlib.org/gallery.html) linked on that page: it shows thumbnails of hundreds of different plot types, each one linked to a page with the Python code snippet used to generate it.\n", 34 | "In this way, you can visually inspect and learn about a wide range of different plotting styles and visualization techniques.\n", 35 | "\n", 36 | "本书中短短的一章内容不可能涵盖Matplotlib中所有的特性和图表类型。就像我们之前介绍到的其他工具一样,使用IPython的TAB自动补全以及帮助功能(参见[IPython的帮助和文档](01.01-Help-And-Documentation.ipynb))对于学习Matplotlib的API是非常有帮助的。[Matplotlib在线文档](http://matplotlib.org/)也是一个很有帮助的参考内容。建议去浏览[Matplotlib画廊](http://matplotlib.org/gallery.html):上面展示了上百个不同种类图表的缩略图,每个图都有一个超链接能导航到创建这个图表的Python代码片段页面。使用上述方法可以直观的浏览和学习许多不同类型的图表样式和可视化技巧。\n", 37 | "\n", 38 | "> For a book-length treatment of Matplotlib, I would recommend [*Interactive Applications Using Matplotlib*](https://www.packtpub.com/application-development/interactive-applications-using-matplotlib), written by Matplotlib core developer Ben Root.\n", 39 | "\n", 40 | "如果你需要阅读书籍,作者建议[*使用Matplotlib交互式应用*](https://www.packtpub.com/application-development/interactive-applications-using-matplotlib),作者是Matplotlib的核心开发者Ben Root。" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Other Python Graphics Libraries\n", 48 | "\n", 49 | "## 其他Python图像包\n", 50 | "\n", 51 | "> Although Matplotlib is the most prominent Python visualization library, there are other more modern tools that are worth exploring as well.\n", 52 | "I'll mention a few of them briefly here:\n", 53 | "\n", 54 | "> - [Bokeh](http://bokeh.pydata.org) is a JavaScript visualization library with a Python frontend that creates highly interactive visualizations capable of handling very large and/or streaming datasets. The Python front-end outputs a JSON data structure that can be interpreted by the Bokeh JS engine.\n", 55 | "> - [Plotly](http://plot.ly) is the eponymous open source product of the Plotly company, and is similar in spirit to Bokeh. Because Plotly is the main product of a startup, it is receiving a high level of development effort. Use of the library is entirely free.\n", 56 | "> - [Vispy](http://vispy.org/) is an actively developed project focused on dynamic visualizations of very large datasets. Because it is built to target OpenGL and make use of efficient graphics processors in your computer, it is able to render some quite large and stunning visualizations.\n", 57 | "> - [Vega](https://vega.github.io/) and [Vega-Lite](https://vega.github.io/vega-lite) are declarative graphics representations, and are the product of years of research into the fundamental language of data visualization. The reference rendering implementation is JavaScript, but the API is language agnostic. There is a Python API under development in the [Altair](https://altair-viz.github.io/) package. Though as of summer 2016 it's not yet fully mature, I'm quite excited for the possibilities of this project to provide a common reference point for visualization in Python and other languages.\n", 58 | "\n", 59 | "虽然Matplotlib在Python可视化库中是占统治地位的,但是还有很多其他更多现代的工具值得了解和学习。下面简单的介绍一下它们:\n", 60 | "\n", 61 | "- [Bokeh](http://bokeh.pydata.org)是一个在前端使用Python的JavaScript可视化库,能够创建高度交互的可视化图表处理大量和流式的数据集。Python前端会产生JSON数据结构然后交给Bokeh JS引擎进行解析处理。\n", 62 | "- [Plotly](http://plot.ly)是Plotly公司开发维护的一套同名产品,它的理念与Bokeh类似。因为Plotly是这间初创公司的主要产品,所以它正处于开发密集时期,使用这个库是完全免费的。\n", 63 | "- [Vispy](http://vispy.org/)是一个聚焦于大数据动态可视化的活跃项目。因为它的目标是支持OpenGL,并且有效地使用计算机的显卡资源,所以它能渲染一些非常巨大和炫目的可视化效果。\n", 64 | "- [Vega](https://vega.github.io/)和[Vega-Lite](https://vega.github.io/vega-lite)是陈述式的图形表达,提供了一个多年研究的数据可视化基础语言产品。渲染实现的基础是JavaScript,但是它的API是语言无关的。Vega有一个正在开发中的Python API叫做[Altair](https://altair-viz.github.io/)包。虽然在2016年夏天这个包还未完全成熟,但作者对于项目在为Python和其他语言提供通用的可视化功能取得的进展感到兴奋。\n", 65 | "\n", 66 | "> The visualization space in the Python community is very dynamic, and I fully expect this list to be out of date as soon as it is published.\n", 67 | "\n", 68 | "Python社区中数据可视化部分变化很快,作者估计上述的列表可能在本书出版的时候就已经显得过时了。\n", 69 | "\n", 70 | "> Keep an eye out for what's coming in the future!\n", 71 | "\n", 72 | "希望读者能保持对这个领域未来的关注。" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "\n", 80 | "< [使用Seaborn进行可视化](04.14-Visualization-With-Seaborn.ipynb) | [目录](Index.ipynb) | [机器学习](05.00-Machine-Learning.ipynb) >\n", 81 | "\n", 82 | "\"Open\n" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python 3", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.7.6" 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 1 107 | } 108 | -------------------------------------------------------------------------------- /notebooks/05.00-Machine-Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [更多资源](04.15-Further-Resources.ipynb) | [目录](Index.ipynb) | [什么是机器学习?](05.01-What-Is-Machine-Learning.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Machine Learning\n", 18 | "\n", 19 | "# 机器学习" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> In many ways, machine learning is the primary means by which data science manifests itself to the broader world.\n", 27 | "Machine learning is where these computational and algorithmic skills of data science meet the statistical thinking of data science, and the result is a collection of approaches to inference and data exploration that are not about effective theory so much as effective computation.\n", 28 | "\n", 29 | "在很多情况下,机器学习是数据科学本身以及更广泛领域中的主要方法。机器学习是数据科学中计算及算法和统计思维相结合的产物,结果得到的是一整套的推理方法和数据分析工具。\n", 30 | "\n", 31 | "> The term \"machine learning\" is sometimes thrown around as if it is some kind of magic pill: *apply machine learning to your data, and all your problems will be solved!*\n", 32 | "As you might expect, the reality is rarely this simple.\n", 33 | "While these methods can be incredibly powerful, to be effective they must be approached with a firm grasp of the strengths and weaknesses of each method, as well as a grasp of general concepts such as bias and variance, overfitting and underfitting, and more.\n", 34 | "\n", 35 | "术语“机器学习”有时候会被滥用就好像这是一剂灵丹妙药一样:*在你的数据上使用机器学习吧,你的所有问题都会得到解决*。虽然这些方法可能非常强大,但是你必须掌握每种方法的优缺点才能令它们更加有效,你需要掌握偏差和方差的基本概念,以及过拟合和欠拟合等等。\n", 36 | "\n", 37 | "> This chapter will dive into practical aspects of machine learning, primarily using Python's [Scikit-Learn](http://scikit-learn.org) package.\n", 38 | "This is not meant to be a comprehensive introduction to the field of machine learning; that is a large subject and necessitates a more technical approach than we take here.\n", 39 | "Nor is it meant to be a comprehensive manual for the use of the Scikit-Learn package (for this, you can refer to the resources listed in [Further Machine Learning Resources](05.15-Learning-More.ipynb)).\n", 40 | "Rather, the goals of this chapter are:\n", 41 | "\n", 42 | "> - To introduce the fundamental vocabulary and concepts of machine learning.\n", 43 | "> - To introduce the Scikit-Learn API and show some examples of its use.\n", 44 | "> - To take a deeper dive into the details of several of the most important machine learning approaches, and develop an intuition into how they work and when and where they are applicable.\n", 45 | "\n", 46 | "本章会从实践的角度深入的介绍机器学习,主要使用Python的[Scikit-Learn](http://scikit-learn.org)包。这并不是一个机器学习领域完整的介绍;因为这一课题过于庞大复杂,需要比本书更高级的技术书籍才能阐述清楚。本章甚至不是一个使用Scikit-Learn包的完整手册(为此,你需要在[更多机器学习资源](05.15-Learning-More.ipynb)中列出的内容)。本章的目标是:\n", 47 | "\n", 48 | "- 对机器学习基本术语和概念的介绍。\n", 49 | "- Scikit-Learn包API的简单介绍以及使用例子。\n", 50 | "- 对最重要的机器学习方法进行深入介绍,帮你建立它们工作原理的概念,并对它们的应用范围进行了解。\n", 51 | "\n", 52 | "> Much of this material is drawn from the Scikit-Learn tutorials and workshops I have given on several occasions at PyCon, SciPy, PyData, and other conferences.\n", 53 | "Any clarity in the following pages is likely due to the many workshop participants and co-instructors who have given me valuable feedback on this material over the years!\n", 54 | "\n", 55 | "本章很多的材料都是从作者多次在PyCon、SciPy、PyData和其他论坛中对Scikit-Learn的教程和工坊中精选出来的。接下来的各小节内容都得到了这些论坛参与者和同事的反馈及帮助。\n", 56 | "\n", 57 | "> Finally, if you are seeking a more comprehensive or technical treatment of any of these subjects, I've listed several resources and references in [Further Machine Learning Resources](05.15-Learning-More.ipynb).\n", 58 | "\n", 59 | "最后,如果你在寻找更全面和深入的资料的话,在[更多机器学习资源](05.15-Learning-More.ipynb)一节中列出了一些你需要的资源。" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "\n", 67 | "< [更多资源](04.15-Further-Resources.ipynb) | [目录](Index.ipynb) | [什么是机器学习?](05.01-What-Is-Machine-Learning.ipynb) >\n", 68 | "\n", 69 | "\"Open\n" 70 | ] 71 | } 72 | ], 73 | "metadata": { 74 | "anaconda-cloud": {}, 75 | "kernelspec": { 76 | "display_name": "Python 3", 77 | "language": "python", 78 | "name": "python3" 79 | }, 80 | "language_info": { 81 | "codemirror_mode": { 82 | "name": "ipython", 83 | "version": 3 84 | }, 85 | "file_extension": ".py", 86 | "mimetype": "text/x-python", 87 | "name": "python", 88 | "nbconvert_exporter": "python", 89 | "pygments_lexer": "ipython3", 90 | "version": "3.7.6" 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 1 95 | } 96 | -------------------------------------------------------------------------------- /notebooks/05.15-Learning-More.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "< [应用:脸部识别管道](05.14-Image-Features.ipynb) | [目录](Index.ipynb) | [附录:生成图像的代码](06.00-Figure-Code.ipynb) >\n", 9 | "\n", 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Further Machine Learning Resources\n", 18 | "\n", 19 | "# 更多机器学习资源" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "> This chapter has been a quick tour of machine learning in Python, primarily using the tools within the Scikit-Learn library.\n", 27 | "As long as the chapter is, it is still too short to cover many interesting and important algorithms, approaches, and discussions.\n", 28 | "Here I want to suggest some resources to learn more about machine learning for those who are interested.\n", 29 | "\n", 30 | "本章对Python中的机器学习进行了一个快速简要的介绍,主要是使用Scikit-Learn库中的工具来完成机器学习任务。尽管本章内容较长,但依然不足以覆盖很多有趣及重要的算法、方法和讨论。这里作者会推荐一些额外的资源,希望对那些有兴趣了解更多机器学习知识的读者有帮助。" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Machine Learning in Python\n", 38 | "\n", 39 | "## Python机器学习\n", 40 | "\n", 41 | "> To learn more about machine learning in Python, I'd suggest some of the following resources:\n", 42 | "\n", 43 | ">- [The Scikit-Learn website](http://scikit-learn.org): The Scikit-Learn website has an impressive breadth of documentation and examples covering some of the models discussed here, and much, much more. If you want a brief survey of the most important and often-used machine learning algorithms, this website is a good place to start.\n", 44 | "- *SciPy, PyCon, and PyData tutorial videos*: Scikit-Learn and other machine learning topics are perennial favorites in the tutorial tracks of many Python-focused conference series, in particular the PyCon, SciPy, and PyData conferences. You can find the most recent ones via a simple web search.\n", 45 | "- [*Introduction to Machine Learning with Python*](http://shop.oreilly.com/product/0636920030515.do): Written by Andreas C. Mueller and Sarah Guido, this book includes a fuller treatment of the topics in this chapter. If you're interested in reviewing the fundamentals of Machine Learning and pushing the Scikit-Learn toolkit to its limits, this is a great resource, written by one of the most prolific developers on the Scikit-Learn team.\n", 46 | "- [*Python Machine Learning*](https://www.packtpub.com/big-data-and-business-intelligence/python-machine-learning): Sebastian Raschka's book focuses less on Scikit-learn itself, and more on the breadth of machine learning tools available in Python. In particular, there is some very useful discussion on how to scale Python-based machine learning approaches to large and complex datasets.\n", 47 | "\n", 48 | "要学习更多Python机器学习的内容,作者推荐下面的资源:\n", 49 | "\n", 50 | "- [Scikit-Learn官网](http://scikit-learn.org):Scikit-Learn官网有许多文档和例子,涵盖了一些我们本章介绍的内容,和更多未及阐述的知识。如果你想要获得最重要和常用的机器学习算法简介,这将是旅程的起点。\n", 51 | "- [*Python机器学习介绍*](http://shop.oreilly.com/product/0636920030515.do):作者Andreas C. Mueller和Sarah Guido,这本书包含了本章内容更加完整的解决方案。如果你想要复习机器学习的基本概念算法以及最深入地掌握Scikit-Learn工具集的话,这本书非常合适,其中一个作者是Scikit-Learn开发团队中最主要的贡献者之一。\n", 52 | "- [*Python机器学习*](https://www.packtpub.com/big-data-and-business-intelligence/python-machine-learning):作者Sebastian Raschka,本书较少聚焦在Scikit-Learn工具集,而是更多广泛地介绍使用Python的机器学习工具。确切的说,书中有着许多关于扩展Python机器学习方法适应大型复杂数据集的介绍。" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## General Machine Learning\n", 60 | "\n", 61 | "## 通用机器学习\n", 62 | "\n", 63 | "> Of course, machine learning is much broader than just the Python world. There are many good resources to take your knowledge further, and here I will highlight a few that I have found useful:\n", 64 | "\n", 65 | "> - [*Machine Learning*](https://www.coursera.org/learn/machine-learning): Taught by Andrew Ng (Coursera), this is a very clearly-taught free online course which covers the basics of machine learning from an algorithmic perspective. It assumes undergraduate-level understanding of mathematics and programming, and steps through detailed considerations of some of the most important machine learning algorithms. Homework assignments, which are algorithmically graded, have you actually implement some of these models yourself.\n", 66 | "- [*Pattern Recognition and Machine Learning*](http://www.springer.com/us/book/9780387310732): Written by Christopher Bishop, this classic technical text covers the concepts of machine learning discussed in this chapter in detail. If you plan to go further in this subject, you should have this book on your shelf.\n", 67 | "- [*Machine Learning: a Probabilistic Perspective*](https://mitpress.mit.edu/books/machine-learning-0): Written by Kevin Murphy, this is an excellent graduate-level text that explores nearly all important machine learning algorithms from a ground-up, unified probabilistic perspective.\n", 68 | "\n", 69 | "当然,机器学习的范围远远超越了Python语言本身。有许多资源能让你学习到更广泛更深入的知识,作者这里推荐以下他认为非常有用的资源:\n", 70 | "\n", 71 | "- [*Machine Learning*](https://www.coursera.org/learn/machine-learning):Andrew Ng授课,这是一个非常出色在线免费课程,涵盖了从机器学习基础到算法分析。它默认听众具有大学本科的数学和编程基础,然后一步一步的对一些最重要的机器学习算法进行了详细的介绍。课程还有作业,与算法相关,让听众自己能真正实现一些模型算法。\n", 72 | "- [*模式识别与机器学习*](http://www.springer.com/us/book/9780387310732):作者Christopher Bishop,这本经典的技术书籍详细地介绍了本章讨论的机器学习概念。如果你希望深入研究本章介绍的内容,应该阅读本书。\n", 73 | "- [*机器学习:概率视角*](https://mitpress.mit.edu/books/machine-learning-0):作者Kevin Murphy,这是一本优秀的书籍,联系概率论知识从零开始探讨了几乎所有重要的机器学习算法,适合研究生以上阅读。\n", 74 | "\n", 75 | "> These resources are more technical than the material presented in this book, but to really understand the fundamentals of these methods requires a deep dive into the mathematics behind them.\n", 76 | "If you're up for the challenge and ready to bring your data science to the next level, don't hesitate to dive-in!\n", 77 | "\n", 78 | "这些资源与本书比较起来技术性更强,但是需要更加深入的掌握算法背后的数学知识才能真正理解这些方法的意义。如果你已经准备好了挑战更高层次的数据科学领域,不要犹豫,去掌握它们吧。" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "\n", 86 | "< [应用:脸部识别管道](05.14-Image-Features.ipynb) | [目录](Index.ipynb) | [附录:生成图像的代码](06.00-Figure-Code.ipynb) >\n", 87 | "\n", 88 | "\"Open\n" 89 | ] 90 | } 91 | ], 92 | "metadata": { 93 | "anaconda-cloud": {}, 94 | "kernelspec": { 95 | "display_name": "Python 3", 96 | "language": "python", 97 | "name": "python3" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.7.6" 110 | } 111 | }, 112 | "nbformat": 4, 113 | "nbformat_minor": 1 114 | } 115 | -------------------------------------------------------------------------------- /notebooks/Index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Data Science Handbook\n", 8 | "\n", 9 | "# Python数据科学手册" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "*Jake VanderPlas*\n", 17 | "\n", 18 | "原作者 *Jake VanderPlas*\n", 19 | "\n", 20 | "译者 *[wangyingsm@github.com](https://github.com/wangyingsm)*\n", 21 | "\n", 22 | "![Book Cover](https://github.com/wangyingsm/Python-Data-Science-Handbook/raw/master/notebooks/figures/PDSH-cover.png)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "> This is the Jupyter notebook version of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*\n", 30 | "The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT). If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!\n", 31 | "\n", 32 | "这是Jake VenderPlas所著的[Python数据科学手册](http://shop.oreilly.com/product/0636920034919.do)的Jupyter notebook版本;本内容在[GitHub](https://github.com/wangyingsm/Python-Data-Science-Handbook)上。文字发行协议遵循[CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode)协议,而代码发行遵循[MIT license](https://opensource.org/licenses/MIT)。如果你认为这些内容很有用,请考虑通过[购买本书](http://shop.oreilly.com/product/0636920034919.do)支持作者。" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## 目录\n", 40 | "\n", 41 | "译者注:原版英文目录见下方\n", 42 | "\n", 43 | "#### [序言](00.00-Preface.ipynb)\n", 44 | "\n", 45 | "#### [1. IPython:超越Python解释器](01.00-IPython-Beyond-Normal-Python.ipynb)\n", 46 | "- [IPython帮助和文档](01.01-Help-And-Documentation.ipynb)\n", 47 | "- [IPython Shell中的键盘快捷键](01.02-Shell-Keyboard-Shortcuts.ipynb)\n", 48 | "- [IPython魔术命令](01.03-Magic-Commands.ipynb)\n", 49 | "- [输入输出历史](01.04-Input-Output-History.ipynb)\n", 50 | "- [IPython和Shell命令](01.05-IPython-And-Shell-Commands.ipynb)\n", 51 | "- [错误和调试](01.06-Errors-and-Debugging.ipynb)\n", 52 | "- [性能测算和计时](01.07-Timing-and-Profiling.ipynb)\n", 53 | "- [更多IPython资源](01.08-More-IPython-Resources.ipynb)\n", 54 | "\n", 55 | "#### [2. Numpy介绍](02.00-Introduction-to-NumPy.ipynb)\n", 56 | "- [理解Python中的数据类型](02.01-Understanding-Data-Types.ipynb)\n", 57 | "- [Numpy数组基础](02.02-The-Basics-Of-NumPy-Arrays.ipynb)\n", 58 | "- [使用Numpy计算:通用函数](02.03-Computation-on-arrays-ufuncs.ipynb)\n", 59 | "- [聚合:Min, Max, 以及其他](02.04-Computation-on-arrays-aggregates.ipynb)\n", 60 | "- [在数组上计算:广播](02.05-Computation-on-arrays-broadcasting.ipynb)\n", 61 | "- [比较,遮盖和布尔逻辑](02.06-Boolean-Arrays-and-Masks.ipynb)\n", 62 | "- [高级索引](02.07-Fancy-Indexing.ipynb)\n", 63 | "- [数组排序](02.08-Sorting.ipynb)\n", 64 | "- [格式化数据:NumPy里的结构化数组](02.09-Structured-Data-NumPy.ipynb)\n", 65 | "\n", 66 | "#### [3. 使用Pandas进行数据处理](03.00-Introduction-to-Pandas.ipynb)\n", 67 | "- [Pandas对象简介](03.01-Introducing-Pandas-Objects.ipynb)\n", 68 | "- [数据索引和选择](03.02-Data-Indexing-and-Selection.ipynb)\n", 69 | "- [在Pandas中操作数据](03.03-Operations-in-Pandas.ipynb)\n", 70 | "- [处理空缺数据](03.04-Missing-Values.ipynb)\n", 71 | "- [层次化的索引](03.05-Hierarchical-Indexing.ipynb)\n", 72 | "- [组合数据集:Concat 和 Append](03.06-Concat-And-Append.ipynb)\n", 73 | "- [组合数据集:Merge 和 Join](03.07-Merge-and-Join.ipynb)\n", 74 | "- [聚合与分组](03.08-Aggregation-and-Grouping.ipynb)\n", 75 | "- [数据透视表](03.09-Pivot-Tables.ipynb)\n", 76 | "- [向量化的字符串操作](03.10-Working-With-Strings.ipynb)\n", 77 | "- [在时间序列上操作](03.11-Working-with-Time-Series.ipynb)\n", 78 | "- [高性能Pandas: ``eval()`` 和 ``query()``](03.12-Performance-Eval-and-Query.ipynb)\n", 79 | "- [更多资源](03.13-Further-Resources.ipynb)\n", 80 | "\n", 81 | "#### [4. 使用matplotlib展示数据](04.00-Introduction-To-Matplotlib.ipynb)\n", 82 | "- [简单的折线图](04.01-Simple-Line-Plots.ipynb)\n", 83 | "- [简单的散点图](04.02-Simple-Scatter-Plots.ipynb)\n", 84 | "- [误差可视化](04.03-Errorbars.ipynb)\n", 85 | "- [密度和轮廓图](04.04-Density-and-Contour-Plots.ipynb)\n", 86 | "- [直方图, 分桶和密度](04.05-Histograms-and-Binnings.ipynb)\n", 87 | "- [自定义图表图例](04.06-Customizing-Legends.ipynb)\n", 88 | "- [自定义颜色条](04.07-Customizing-Colorbars.ipynb)\n", 89 | "- [多个子图表](04.08-Multiple-Subplots.ipynb)\n", 90 | "- [文本和标注](04.09-Text-and-Annotation.ipynb)\n", 91 | "- [自定义刻度](04.10-Customizing-Ticks.ipynb)\n", 92 | "- [自定义matplotlib:配置和样式单](04.11-Settings-and-Stylesheets.ipynb)\n", 93 | "- [在matplotlib中创建三维图表](04.12-Three-Dimensional-Plotting.ipynb)\n", 94 | "- [使用Basemap创建地理位置图表](04.13-Geographic-Data-With-Basemap.ipynb)\n", 95 | "- [使用Seaborn进行可视化](04.14-Visualization-With-Seaborn.ipynb)\n", 96 | "- [更多资源](04.15-Further-Resources.ipynb)\n", 97 | "\n", 98 | "#### [5. 机器学习](05.00-Machine-Learning.ipynb)\n", 99 | "- [什么是机器学习?](05.01-What-Is-Machine-Learning.ipynb)\n", 100 | "- [Scikit-Learn简介](05.02-Introducing-Scikit-Learn.ipynb)\n", 101 | "- [超参数及模型验证](05.03-Hyperparameters-and-Model-Validation.ipynb)\n", 102 | "- [特征工程](05.04-Feature-Engineering.ipynb)\n", 103 | "- [深入:朴素贝叶斯分类](05.05-Naive-Bayes.ipynb)\n", 104 | "- [深入:线性回归](05.06-Linear-Regression.ipynb)\n", 105 | "- [深入:支持向量机](05.07-Support-Vector-Machines.ipynb)\n", 106 | "- [深入:决策树和随机森林](05.08-Random-Forests.ipynb)\n", 107 | "- [深入:主成分分析](05.09-Principal-Component-Analysis.ipynb)\n", 108 | "- [深入:流形学习](05.10-Manifold-Learning.ipynb)\n", 109 | "- [深入:k-均值聚类](05.11-K-Means.ipynb)\n", 110 | "- [深入:高斯混合模型](05.12-Gaussian-Mixtures.ipynb)\n", 111 | "- [深入:核密度估计](05.13-Kernel-Density-Estimation.ipynb)\n", 112 | "- [应用:脸部识别管道](05.14-Image-Features.ipynb)\n", 113 | "- [更多机器学习资源](05.15-Learning-More.ipynb)\n", 114 | "\n", 115 | "#### [附录:生成图像的代码](06.00-Figure-Code.ipynb)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Table of Contents\n", 123 | "\n", 124 | "#### [Preface](00.00-Preface.ipynb)\n", 125 | "\n", 126 | "#### [1. IPython: Beyond Normal Python](01.00-IPython-Beyond-Normal-Python.ipynb)\n", 127 | "- [Help and Documentation in IPython](01.01-Help-And-Documentation.ipynb)\n", 128 | "- [Keyboard Shortcuts in the IPython Shell](01.02-Shell-Keyboard-Shortcuts.ipynb)\n", 129 | "- [IPython Magic Commands](01.03-Magic-Commands.ipynb)\n", 130 | "- [Input and Output History](01.04-Input-Output-History.ipynb)\n", 131 | "- [IPython and Shell Commands](01.05-IPython-And-Shell-Commands.ipynb)\n", 132 | "- [Errors and Debugging](01.06-Errors-and-Debugging.ipynb)\n", 133 | "- [Profiling and Timing Code](01.07-Timing-and-Profiling.ipynb)\n", 134 | "- [More IPython Resources](01.08-More-IPython-Resources.ipynb)\n", 135 | "\n", 136 | "#### [2. Introduction to NumPy](02.00-Introduction-to-NumPy.ipynb)\n", 137 | "- [Understanding Data Types in Python](02.01-Understanding-Data-Types.ipynb)\n", 138 | "- [The Basics of NumPy Arrays](02.02-The-Basics-Of-NumPy-Arrays.ipynb)\n", 139 | "- [Computation on NumPy Arrays: Universal Functions](02.03-Computation-on-arrays-ufuncs.ipynb)\n", 140 | "- [Aggregations: Min, Max, and Everything In Between](02.04-Computation-on-arrays-aggregates.ipynb)\n", 141 | "- [Computation on Arrays: Broadcasting](02.05-Computation-on-arrays-broadcasting.ipynb)\n", 142 | "- [Comparisons, Masks, and Boolean Logic](02.06-Boolean-Arrays-and-Masks.ipynb)\n", 143 | "- [Fancy Indexing](02.07-Fancy-Indexing.ipynb)\n", 144 | "- [Sorting Arrays](02.08-Sorting.ipynb)\n", 145 | "- [Structured Data: NumPy's Structured Arrays](02.09-Structured-Data-NumPy.ipynb)\n", 146 | "\n", 147 | "#### [3. Data Manipulation with Pandas](03.00-Introduction-to-Pandas.ipynb)\n", 148 | "- [Introducing Pandas Objects](03.01-Introducing-Pandas-Objects.ipynb)\n", 149 | "- [Data Indexing and Selection](03.02-Data-Indexing-and-Selection.ipynb)\n", 150 | "- [Operating on Data in Pandas](03.03-Operations-in-Pandas.ipynb)\n", 151 | "- [Handling Missing Data](03.04-Missing-Values.ipynb)\n", 152 | "- [Hierarchical Indexing](03.05-Hierarchical-Indexing.ipynb)\n", 153 | "- [Combining Datasets: Concat and Append](03.06-Concat-And-Append.ipynb)\n", 154 | "- [Combining Datasets: Merge and Join](03.07-Merge-and-Join.ipynb)\n", 155 | "- [Aggregation and Grouping](03.08-Aggregation-and-Grouping.ipynb)\n", 156 | "- [Pivot Tables](03.09-Pivot-Tables.ipynb)\n", 157 | "- [Vectorized String Operations](03.10-Working-With-Strings.ipynb)\n", 158 | "- [Working with Time Series](03.11-Working-with-Time-Series.ipynb)\n", 159 | "- [High-Performance Pandas: eval() and query()](03.12-Performance-Eval-and-Query.ipynb)\n", 160 | "- [Further Resources](03.13-Further-Resources.ipynb)\n", 161 | "\n", 162 | "#### [4. Visualization with Matplotlib](04.00-Introduction-To-Matplotlib.ipynb)\n", 163 | "- [Simple Line Plots](04.01-Simple-Line-Plots.ipynb)\n", 164 | "- [Simple Scatter Plots](04.02-Simple-Scatter-Plots.ipynb)\n", 165 | "- [Visualizing Errors](04.03-Errorbars.ipynb)\n", 166 | "- [Density and Contour Plots](04.04-Density-and-Contour-Plots.ipynb)\n", 167 | "- [Histograms, Binnings, and Density](04.05-Histograms-and-Binnings.ipynb)\n", 168 | "- [Customizing Plot Legends](04.06-Customizing-Legends.ipynb)\n", 169 | "- [Customizing Colorbars](04.07-Customizing-Colorbars.ipynb)\n", 170 | "- [Multiple Subplots](04.08-Multiple-Subplots.ipynb)\n", 171 | "- [Text and Annotation](04.09-Text-and-Annotation.ipynb)\n", 172 | "- [Customizing Ticks](04.10-Customizing-Ticks.ipynb)\n", 173 | "- [Customizing Matplotlib: Configurations and Stylesheets](04.11-Settings-and-Stylesheets.ipynb)\n", 174 | "- [Three-Dimensional Plotting in Matplotlib](04.12-Three-Dimensional-Plotting.ipynb)\n", 175 | "- [Geographic Data with Basemap](04.13-Geographic-Data-With-Basemap.ipynb)\n", 176 | "- [Visualization with Seaborn](04.14-Visualization-With-Seaborn.ipynb)\n", 177 | "- [Further Resources](04.15-Further-Resources.ipynb)\n", 178 | "\n", 179 | "#### [5. Machine Learning](05.00-Machine-Learning.ipynb)\n", 180 | "- [What Is Machine Learning?](05.01-What-Is-Machine-Learning.ipynb)\n", 181 | "- [Introducing Scikit-Learn](05.02-Introducing-Scikit-Learn.ipynb)\n", 182 | "- [Hyperparameters and Model Validation](05.03-Hyperparameters-and-Model-Validation.ipynb)\n", 183 | "- [Feature Engineering](05.04-Feature-Engineering.ipynb)\n", 184 | "- [In Depth: Naive Bayes Classification](05.05-Naive-Bayes.ipynb)\n", 185 | "- [In Depth: Linear Regression](05.06-Linear-Regression.ipynb)\n", 186 | "- [In-Depth: Support Vector Machines](05.07-Support-Vector-Machines.ipynb)\n", 187 | "- [In-Depth: Decision Trees and Random Forests](05.08-Random-Forests.ipynb)\n", 188 | "- [In Depth: Principal Component Analysis](05.09-Principal-Component-Analysis.ipynb)\n", 189 | "- [In-Depth: Manifold Learning](05.10-Manifold-Learning.ipynb)\n", 190 | "- [In Depth: k-Means Clustering](05.11-K-Means.ipynb)\n", 191 | "- [In Depth: Gaussian Mixture Models](05.12-Gaussian-Mixtures.ipynb)\n", 192 | "- [In-Depth: Kernel Density Estimation](05.13-Kernel-Density-Estimation.ipynb)\n", 193 | "- [Application: A Face Detection Pipeline](05.14-Image-Features.ipynb)\n", 194 | "- [Further Machine Learning Resources](05.15-Learning-More.ipynb)\n", 195 | "\n", 196 | "#### [Appendix: Figure Code](06.00-Figure-Code.ipynb)" 197 | ] 198 | } 199 | ], 200 | "metadata": { 201 | "anaconda-cloud": {}, 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.7.6" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 1 222 | } 223 | -------------------------------------------------------------------------------- /notebooks/data/20170107-061401-recipeitems.json.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/data/20170107-061401-recipeitems.json.bz2 -------------------------------------------------------------------------------- /notebooks/data/gistemp250.nc.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/data/gistemp250.nc.bz2 -------------------------------------------------------------------------------- /notebooks/data/president_heights.csv: -------------------------------------------------------------------------------- 1 | order,name,height(cm) 2 | 1,George Washington,189 3 | 2,John Adams,170 4 | 3,Thomas Jefferson,189 5 | 4,James Madison,163 6 | 5,James Monroe,183 7 | 6,John Quincy Adams,171 8 | 7,Andrew Jackson,185 9 | 8,Martin Van Buren,168 10 | 9,William Henry Harrison,173 11 | 10,John Tyler,183 12 | 11,James K. Polk,173 13 | 12,Zachary Taylor,173 14 | 13,Millard Fillmore,175 15 | 14,Franklin Pierce,178 16 | 15,James Buchanan,183 17 | 16,Abraham Lincoln,193 18 | 17,Andrew Johnson,178 19 | 18,Ulysses S. Grant,173 20 | 19,Rutherford B. Hayes,174 21 | 20,James A. Garfield,183 22 | 21,Chester A. Arthur,183 23 | 23,Benjamin Harrison,168 24 | 25,William McKinley,170 25 | 26,Theodore Roosevelt,178 26 | 27,William Howard Taft,182 27 | 28,Woodrow Wilson,180 28 | 29,Warren G. Harding,183 29 | 30,Calvin Coolidge,178 30 | 31,Herbert Hoover,182 31 | 32,Franklin D. Roosevelt,188 32 | 33,Harry S. Truman,175 33 | 34,Dwight D. Eisenhower,179 34 | 35,John F. Kennedy,183 35 | 36,Lyndon B. Johnson,193 36 | 37,Richard Nixon,182 37 | 38,Gerald Ford,183 38 | 39,Jimmy Carter,177 39 | 40,Ronald Reagan,185 40 | 41,George H. W. Bush,188 41 | 42,Bill Clinton,188 42 | 43,George W. Bush,182 43 | 44,Barack Obama,185 44 | -------------------------------------------------------------------------------- /notebooks/data/state-abbrevs.csv: -------------------------------------------------------------------------------- 1 | "state","abbreviation" 2 | "Alabama","AL" 3 | "Alaska","AK" 4 | "Arizona","AZ" 5 | "Arkansas","AR" 6 | "California","CA" 7 | "Colorado","CO" 8 | "Connecticut","CT" 9 | "Delaware","DE" 10 | "District of Columbia","DC" 11 | "Florida","FL" 12 | "Georgia","GA" 13 | "Hawaii","HI" 14 | "Idaho","ID" 15 | "Illinois","IL" 16 | "Indiana","IN" 17 | "Iowa","IA" 18 | "Kansas","KS" 19 | "Kentucky","KY" 20 | "Louisiana","LA" 21 | "Maine","ME" 22 | "Montana","MT" 23 | "Nebraska","NE" 24 | "Nevada","NV" 25 | "New Hampshire","NH" 26 | "New Jersey","NJ" 27 | "New Mexico","NM" 28 | "New York","NY" 29 | "North Carolina","NC" 30 | "North Dakota","ND" 31 | "Ohio","OH" 32 | "Oklahoma","OK" 33 | "Oregon","OR" 34 | "Maryland","MD" 35 | "Massachusetts","MA" 36 | "Michigan","MI" 37 | "Minnesota","MN" 38 | "Mississippi","MS" 39 | "Missouri","MO" 40 | "Pennsylvania","PA" 41 | "Rhode Island","RI" 42 | "South Carolina","SC" 43 | "South Dakota","SD" 44 | "Tennessee","TN" 45 | "Texas","TX" 46 | "Utah","UT" 47 | "Vermont","VT" 48 | "Virginia","VA" 49 | "Washington","WA" 50 | "West Virginia","WV" 51 | "Wisconsin","WI" 52 | "Wyoming","WY" -------------------------------------------------------------------------------- /notebooks/data/state-areas.csv: -------------------------------------------------------------------------------- 1 | state,area (sq. mi) 2 | Alabama,52423 3 | Alaska,656425 4 | Arizona,114006 5 | Arkansas,53182 6 | California,163707 7 | Colorado,104100 8 | Connecticut,5544 9 | Delaware,1954 10 | Florida,65758 11 | Georgia,59441 12 | Hawaii,10932 13 | Idaho,83574 14 | Illinois,57918 15 | Indiana,36420 16 | Iowa,56276 17 | Kansas,82282 18 | Kentucky,40411 19 | Louisiana,51843 20 | Maine,35387 21 | Maryland,12407 22 | Massachusetts,10555 23 | Michigan,96810 24 | Minnesota,86943 25 | Mississippi,48434 26 | Missouri,69709 27 | Montana,147046 28 | Nebraska,77358 29 | Nevada,110567 30 | New Hampshire,9351 31 | New Jersey,8722 32 | New Mexico,121593 33 | New York,54475 34 | North Carolina,53821 35 | North Dakota,70704 36 | Ohio,44828 37 | Oklahoma,69903 38 | Oregon,98386 39 | Pennsylvania,46058 40 | Rhode Island,1545 41 | South Carolina,32007 42 | South Dakota,77121 43 | Tennessee,42146 44 | Texas,268601 45 | Utah,84904 46 | Vermont,9615 47 | Virginia,42769 48 | Washington,71303 49 | West Virginia,24231 50 | Wisconsin,65503 51 | Wyoming,97818 52 | District of Columbia,68 53 | Puerto Rico,3515 54 | -------------------------------------------------------------------------------- /notebooks/data/tips.csv: -------------------------------------------------------------------------------- 1 | "total_bill","tip","sex","smoker","day","time","size" 2 | 16.99,1.01,"Female","No","Sun","Dinner",2 3 | 10.34,1.66,"Male","No","Sun","Dinner",3 4 | 21.01,3.5,"Male","No","Sun","Dinner",3 5 | 23.68,3.31,"Male","No","Sun","Dinner",2 6 | 24.59,3.61,"Female","No","Sun","Dinner",4 7 | 25.29,4.71,"Male","No","Sun","Dinner",4 8 | 8.77,2,"Male","No","Sun","Dinner",2 9 | 26.88,3.12,"Male","No","Sun","Dinner",4 10 | 15.04,1.96,"Male","No","Sun","Dinner",2 11 | 14.78,3.23,"Male","No","Sun","Dinner",2 12 | 10.27,1.71,"Male","No","Sun","Dinner",2 13 | 35.26,5,"Female","No","Sun","Dinner",4 14 | 15.42,1.57,"Male","No","Sun","Dinner",2 15 | 18.43,3,"Male","No","Sun","Dinner",4 16 | 14.83,3.02,"Female","No","Sun","Dinner",2 17 | 21.58,3.92,"Male","No","Sun","Dinner",2 18 | 10.33,1.67,"Female","No","Sun","Dinner",3 19 | 16.29,3.71,"Male","No","Sun","Dinner",3 20 | 16.97,3.5,"Female","No","Sun","Dinner",3 21 | 20.65,3.35,"Male","No","Sat","Dinner",3 22 | 17.92,4.08,"Male","No","Sat","Dinner",2 23 | 20.29,2.75,"Female","No","Sat","Dinner",2 24 | 15.77,2.23,"Female","No","Sat","Dinner",2 25 | 39.42,7.58,"Male","No","Sat","Dinner",4 26 | 19.82,3.18,"Male","No","Sat","Dinner",2 27 | 17.81,2.34,"Male","No","Sat","Dinner",4 28 | 13.37,2,"Male","No","Sat","Dinner",2 29 | 12.69,2,"Male","No","Sat","Dinner",2 30 | 21.7,4.3,"Male","No","Sat","Dinner",2 31 | 19.65,3,"Female","No","Sat","Dinner",2 32 | 9.55,1.45,"Male","No","Sat","Dinner",2 33 | 18.35,2.5,"Male","No","Sat","Dinner",4 34 | 15.06,3,"Female","No","Sat","Dinner",2 35 | 20.69,2.45,"Female","No","Sat","Dinner",4 36 | 17.78,3.27,"Male","No","Sat","Dinner",2 37 | 24.06,3.6,"Male","No","Sat","Dinner",3 38 | 16.31,2,"Male","No","Sat","Dinner",3 39 | 16.93,3.07,"Female","No","Sat","Dinner",3 40 | 18.69,2.31,"Male","No","Sat","Dinner",3 41 | 31.27,5,"Male","No","Sat","Dinner",3 42 | 16.04,2.24,"Male","No","Sat","Dinner",3 43 | 17.46,2.54,"Male","No","Sun","Dinner",2 44 | 13.94,3.06,"Male","No","Sun","Dinner",2 45 | 9.68,1.32,"Male","No","Sun","Dinner",2 46 | 30.4,5.6,"Male","No","Sun","Dinner",4 47 | 18.29,3,"Male","No","Sun","Dinner",2 48 | 22.23,5,"Male","No","Sun","Dinner",2 49 | 32.4,6,"Male","No","Sun","Dinner",4 50 | 28.55,2.05,"Male","No","Sun","Dinner",3 51 | 18.04,3,"Male","No","Sun","Dinner",2 52 | 12.54,2.5,"Male","No","Sun","Dinner",2 53 | 10.29,2.6,"Female","No","Sun","Dinner",2 54 | 34.81,5.2,"Female","No","Sun","Dinner",4 55 | 9.94,1.56,"Male","No","Sun","Dinner",2 56 | 25.56,4.34,"Male","No","Sun","Dinner",4 57 | 19.49,3.51,"Male","No","Sun","Dinner",2 58 | 38.01,3,"Male","Yes","Sat","Dinner",4 59 | 26.41,1.5,"Female","No","Sat","Dinner",2 60 | 11.24,1.76,"Male","Yes","Sat","Dinner",2 61 | 48.27,6.73,"Male","No","Sat","Dinner",4 62 | 20.29,3.21,"Male","Yes","Sat","Dinner",2 63 | 13.81,2,"Male","Yes","Sat","Dinner",2 64 | 11.02,1.98,"Male","Yes","Sat","Dinner",2 65 | 18.29,3.76,"Male","Yes","Sat","Dinner",4 66 | 17.59,2.64,"Male","No","Sat","Dinner",3 67 | 20.08,3.15,"Male","No","Sat","Dinner",3 68 | 16.45,2.47,"Female","No","Sat","Dinner",2 69 | 3.07,1,"Female","Yes","Sat","Dinner",1 70 | 20.23,2.01,"Male","No","Sat","Dinner",2 71 | 15.01,2.09,"Male","Yes","Sat","Dinner",2 72 | 12.02,1.97,"Male","No","Sat","Dinner",2 73 | 17.07,3,"Female","No","Sat","Dinner",3 74 | 26.86,3.14,"Female","Yes","Sat","Dinner",2 75 | 25.28,5,"Female","Yes","Sat","Dinner",2 76 | 14.73,2.2,"Female","No","Sat","Dinner",2 77 | 10.51,1.25,"Male","No","Sat","Dinner",2 78 | 17.92,3.08,"Male","Yes","Sat","Dinner",2 79 | 27.2,4,"Male","No","Thur","Lunch",4 80 | 22.76,3,"Male","No","Thur","Lunch",2 81 | 17.29,2.71,"Male","No","Thur","Lunch",2 82 | 19.44,3,"Male","Yes","Thur","Lunch",2 83 | 16.66,3.4,"Male","No","Thur","Lunch",2 84 | 10.07,1.83,"Female","No","Thur","Lunch",1 85 | 32.68,5,"Male","Yes","Thur","Lunch",2 86 | 15.98,2.03,"Male","No","Thur","Lunch",2 87 | 34.83,5.17,"Female","No","Thur","Lunch",4 88 | 13.03,2,"Male","No","Thur","Lunch",2 89 | 18.28,4,"Male","No","Thur","Lunch",2 90 | 24.71,5.85,"Male","No","Thur","Lunch",2 91 | 21.16,3,"Male","No","Thur","Lunch",2 92 | 28.97,3,"Male","Yes","Fri","Dinner",2 93 | 22.49,3.5,"Male","No","Fri","Dinner",2 94 | 5.75,1,"Female","Yes","Fri","Dinner",2 95 | 16.32,4.3,"Female","Yes","Fri","Dinner",2 96 | 22.75,3.25,"Female","No","Fri","Dinner",2 97 | 40.17,4.73,"Male","Yes","Fri","Dinner",4 98 | 27.28,4,"Male","Yes","Fri","Dinner",2 99 | 12.03,1.5,"Male","Yes","Fri","Dinner",2 100 | 21.01,3,"Male","Yes","Fri","Dinner",2 101 | 12.46,1.5,"Male","No","Fri","Dinner",2 102 | 11.35,2.5,"Female","Yes","Fri","Dinner",2 103 | 15.38,3,"Female","Yes","Fri","Dinner",2 104 | 44.3,2.5,"Female","Yes","Sat","Dinner",3 105 | 22.42,3.48,"Female","Yes","Sat","Dinner",2 106 | 20.92,4.08,"Female","No","Sat","Dinner",2 107 | 15.36,1.64,"Male","Yes","Sat","Dinner",2 108 | 20.49,4.06,"Male","Yes","Sat","Dinner",2 109 | 25.21,4.29,"Male","Yes","Sat","Dinner",2 110 | 18.24,3.76,"Male","No","Sat","Dinner",2 111 | 14.31,4,"Female","Yes","Sat","Dinner",2 112 | 14,3,"Male","No","Sat","Dinner",2 113 | 7.25,1,"Female","No","Sat","Dinner",1 114 | 38.07,4,"Male","No","Sun","Dinner",3 115 | 23.95,2.55,"Male","No","Sun","Dinner",2 116 | 25.71,4,"Female","No","Sun","Dinner",3 117 | 17.31,3.5,"Female","No","Sun","Dinner",2 118 | 29.93,5.07,"Male","No","Sun","Dinner",4 119 | 10.65,1.5,"Female","No","Thur","Lunch",2 120 | 12.43,1.8,"Female","No","Thur","Lunch",2 121 | 24.08,2.92,"Female","No","Thur","Lunch",4 122 | 11.69,2.31,"Male","No","Thur","Lunch",2 123 | 13.42,1.68,"Female","No","Thur","Lunch",2 124 | 14.26,2.5,"Male","No","Thur","Lunch",2 125 | 15.95,2,"Male","No","Thur","Lunch",2 126 | 12.48,2.52,"Female","No","Thur","Lunch",2 127 | 29.8,4.2,"Female","No","Thur","Lunch",6 128 | 8.52,1.48,"Male","No","Thur","Lunch",2 129 | 14.52,2,"Female","No","Thur","Lunch",2 130 | 11.38,2,"Female","No","Thur","Lunch",2 131 | 22.82,2.18,"Male","No","Thur","Lunch",3 132 | 19.08,1.5,"Male","No","Thur","Lunch",2 133 | 20.27,2.83,"Female","No","Thur","Lunch",2 134 | 11.17,1.5,"Female","No","Thur","Lunch",2 135 | 12.26,2,"Female","No","Thur","Lunch",2 136 | 18.26,3.25,"Female","No","Thur","Lunch",2 137 | 8.51,1.25,"Female","No","Thur","Lunch",2 138 | 10.33,2,"Female","No","Thur","Lunch",2 139 | 14.15,2,"Female","No","Thur","Lunch",2 140 | 16,2,"Male","Yes","Thur","Lunch",2 141 | 13.16,2.75,"Female","No","Thur","Lunch",2 142 | 17.47,3.5,"Female","No","Thur","Lunch",2 143 | 34.3,6.7,"Male","No","Thur","Lunch",6 144 | 41.19,5,"Male","No","Thur","Lunch",5 145 | 27.05,5,"Female","No","Thur","Lunch",6 146 | 16.43,2.3,"Female","No","Thur","Lunch",2 147 | 8.35,1.5,"Female","No","Thur","Lunch",2 148 | 18.64,1.36,"Female","No","Thur","Lunch",3 149 | 11.87,1.63,"Female","No","Thur","Lunch",2 150 | 9.78,1.73,"Male","No","Thur","Lunch",2 151 | 7.51,2,"Male","No","Thur","Lunch",2 152 | 14.07,2.5,"Male","No","Sun","Dinner",2 153 | 13.13,2,"Male","No","Sun","Dinner",2 154 | 17.26,2.74,"Male","No","Sun","Dinner",3 155 | 24.55,2,"Male","No","Sun","Dinner",4 156 | 19.77,2,"Male","No","Sun","Dinner",4 157 | 29.85,5.14,"Female","No","Sun","Dinner",5 158 | 48.17,5,"Male","No","Sun","Dinner",6 159 | 25,3.75,"Female","No","Sun","Dinner",4 160 | 13.39,2.61,"Female","No","Sun","Dinner",2 161 | 16.49,2,"Male","No","Sun","Dinner",4 162 | 21.5,3.5,"Male","No","Sun","Dinner",4 163 | 12.66,2.5,"Male","No","Sun","Dinner",2 164 | 16.21,2,"Female","No","Sun","Dinner",3 165 | 13.81,2,"Male","No","Sun","Dinner",2 166 | 17.51,3,"Female","Yes","Sun","Dinner",2 167 | 24.52,3.48,"Male","No","Sun","Dinner",3 168 | 20.76,2.24,"Male","No","Sun","Dinner",2 169 | 31.71,4.5,"Male","No","Sun","Dinner",4 170 | 10.59,1.61,"Female","Yes","Sat","Dinner",2 171 | 10.63,2,"Female","Yes","Sat","Dinner",2 172 | 50.81,10,"Male","Yes","Sat","Dinner",3 173 | 15.81,3.16,"Male","Yes","Sat","Dinner",2 174 | 7.25,5.15,"Male","Yes","Sun","Dinner",2 175 | 31.85,3.18,"Male","Yes","Sun","Dinner",2 176 | 16.82,4,"Male","Yes","Sun","Dinner",2 177 | 32.9,3.11,"Male","Yes","Sun","Dinner",2 178 | 17.89,2,"Male","Yes","Sun","Dinner",2 179 | 14.48,2,"Male","Yes","Sun","Dinner",2 180 | 9.6,4,"Female","Yes","Sun","Dinner",2 181 | 34.63,3.55,"Male","Yes","Sun","Dinner",2 182 | 34.65,3.68,"Male","Yes","Sun","Dinner",4 183 | 23.33,5.65,"Male","Yes","Sun","Dinner",2 184 | 45.35,3.5,"Male","Yes","Sun","Dinner",3 185 | 23.17,6.5,"Male","Yes","Sun","Dinner",4 186 | 40.55,3,"Male","Yes","Sun","Dinner",2 187 | 20.69,5,"Male","No","Sun","Dinner",5 188 | 20.9,3.5,"Female","Yes","Sun","Dinner",3 189 | 30.46,2,"Male","Yes","Sun","Dinner",5 190 | 18.15,3.5,"Female","Yes","Sun","Dinner",3 191 | 23.1,4,"Male","Yes","Sun","Dinner",3 192 | 15.69,1.5,"Male","Yes","Sun","Dinner",2 193 | 19.81,4.19,"Female","Yes","Thur","Lunch",2 194 | 28.44,2.56,"Male","Yes","Thur","Lunch",2 195 | 15.48,2.02,"Male","Yes","Thur","Lunch",2 196 | 16.58,4,"Male","Yes","Thur","Lunch",2 197 | 7.56,1.44,"Male","No","Thur","Lunch",2 198 | 10.34,2,"Male","Yes","Thur","Lunch",2 199 | 43.11,5,"Female","Yes","Thur","Lunch",4 200 | 13,2,"Female","Yes","Thur","Lunch",2 201 | 13.51,2,"Male","Yes","Thur","Lunch",2 202 | 18.71,4,"Male","Yes","Thur","Lunch",3 203 | 12.74,2.01,"Female","Yes","Thur","Lunch",2 204 | 13,2,"Female","Yes","Thur","Lunch",2 205 | 16.4,2.5,"Female","Yes","Thur","Lunch",2 206 | 20.53,4,"Male","Yes","Thur","Lunch",4 207 | 16.47,3.23,"Female","Yes","Thur","Lunch",3 208 | 26.59,3.41,"Male","Yes","Sat","Dinner",3 209 | 38.73,3,"Male","Yes","Sat","Dinner",4 210 | 24.27,2.03,"Male","Yes","Sat","Dinner",2 211 | 12.76,2.23,"Female","Yes","Sat","Dinner",2 212 | 30.06,2,"Male","Yes","Sat","Dinner",3 213 | 25.89,5.16,"Male","Yes","Sat","Dinner",4 214 | 48.33,9,"Male","No","Sat","Dinner",4 215 | 13.27,2.5,"Female","Yes","Sat","Dinner",2 216 | 28.17,6.5,"Female","Yes","Sat","Dinner",3 217 | 12.9,1.1,"Female","Yes","Sat","Dinner",2 218 | 28.15,3,"Male","Yes","Sat","Dinner",5 219 | 11.59,1.5,"Male","Yes","Sat","Dinner",2 220 | 7.74,1.44,"Male","Yes","Sat","Dinner",2 221 | 30.14,3.09,"Female","Yes","Sat","Dinner",4 222 | 12.16,2.2,"Male","Yes","Fri","Lunch",2 223 | 13.42,3.48,"Female","Yes","Fri","Lunch",2 224 | 8.58,1.92,"Male","Yes","Fri","Lunch",1 225 | 15.98,3,"Female","No","Fri","Lunch",3 226 | 13.42,1.58,"Male","Yes","Fri","Lunch",2 227 | 16.27,2.5,"Female","Yes","Fri","Lunch",2 228 | 10.09,2,"Female","Yes","Fri","Lunch",2 229 | 20.45,3,"Male","No","Sat","Dinner",4 230 | 13.28,2.72,"Male","No","Sat","Dinner",2 231 | 22.12,2.88,"Female","Yes","Sat","Dinner",2 232 | 24.01,2,"Male","Yes","Sat","Dinner",4 233 | 15.69,3,"Male","Yes","Sat","Dinner",3 234 | 11.61,3.39,"Male","No","Sat","Dinner",2 235 | 10.77,1.47,"Male","No","Sat","Dinner",2 236 | 15.53,3,"Male","Yes","Sat","Dinner",2 237 | 10.07,1.25,"Male","No","Sat","Dinner",2 238 | 12.6,1,"Male","Yes","Sat","Dinner",2 239 | 32.83,1.17,"Male","Yes","Sat","Dinner",2 240 | 35.83,4.67,"Female","No","Sat","Dinner",3 241 | 29.03,5.92,"Male","No","Sat","Dinner",3 242 | 27.18,2,"Female","Yes","Sat","Dinner",2 243 | 22.67,2,"Male","Yes","Sat","Dinner",2 244 | 17.82,1.75,"Male","No","Sat","Dinner",2 245 | 18.78,3,"Female","No","Thur","Dinner",2 246 | -------------------------------------------------------------------------------- /notebooks/figures/02.05-broadcasting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/02.05-broadcasting.png -------------------------------------------------------------------------------- /notebooks/figures/03.08-split-apply-combine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/03.08-split-apply-combine.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-classification-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-classification-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-classification-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-classification-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-classification-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-classification-3.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-clustering-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-clustering-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-clustering-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-clustering-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-dimesionality-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-dimesionality-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-dimesionality-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-dimesionality-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-regression-1.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-regression-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-regression-3.png -------------------------------------------------------------------------------- /notebooks/figures/05.01-regression-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.01-regression-4.png -------------------------------------------------------------------------------- /notebooks/figures/05.02-samples-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.02-samples-features.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-2-fold-CV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.03-2-fold-CV.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-5-fold-CV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.03-5-fold-CV.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-bias-variance-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.03-bias-variance-2.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-bias-variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.03-bias-variance.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-learning-curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.03-learning-curve.png -------------------------------------------------------------------------------- /notebooks/figures/05.03-validation-curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.03-validation-curve.png -------------------------------------------------------------------------------- /notebooks/figures/05.05-gaussian-NB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.05-gaussian-NB.png -------------------------------------------------------------------------------- /notebooks/figures/05.06-gaussian-basis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.06-gaussian-basis.png -------------------------------------------------------------------------------- /notebooks/figures/05.08-decision-tree-levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.08-decision-tree-levels.png -------------------------------------------------------------------------------- /notebooks/figures/05.08-decision-tree-overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.08-decision-tree-overfitting.png -------------------------------------------------------------------------------- /notebooks/figures/05.08-decision-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.08-decision-tree.png -------------------------------------------------------------------------------- /notebooks/figures/05.09-PCA-rotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.09-PCA-rotation.png -------------------------------------------------------------------------------- /notebooks/figures/05.09-digits-pca-components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.09-digits-pca-components.png -------------------------------------------------------------------------------- /notebooks/figures/05.09-digits-pixel-components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.09-digits-pixel-components.png -------------------------------------------------------------------------------- /notebooks/figures/05.10-LLE-vs-MDS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.10-LLE-vs-MDS.png -------------------------------------------------------------------------------- /notebooks/figures/05.11-expectation-maximization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.11-expectation-maximization.png -------------------------------------------------------------------------------- /notebooks/figures/05.12-covariance-type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/05.12-covariance-type.png -------------------------------------------------------------------------------- /notebooks/figures/Data_Science_VD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/Data_Science_VD.png -------------------------------------------------------------------------------- /notebooks/figures/PDSH-cover-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/PDSH-cover-small.png -------------------------------------------------------------------------------- /notebooks/figures/PDSH-cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/PDSH-cover.png -------------------------------------------------------------------------------- /notebooks/figures/array_vs_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/array_vs_list.png -------------------------------------------------------------------------------- /notebooks/figures/cint_vs_pyint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/figures/cint_vs_pyint.png -------------------------------------------------------------------------------- /notebooks/hello.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/hello.png -------------------------------------------------------------------------------- /notebooks/helpers_05_08.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from sklearn.tree import DecisionTreeClassifier 5 | from ipywidgets import interact 6 | 7 | 8 | def visualize_tree(estimator, X, y, boundaries=True, 9 | xlim=None, ylim=None, ax=None): 10 | ax = ax or plt.gca() 11 | 12 | # 绘制训练数据点 13 | ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap='viridis', 14 | clim=(y.min(), y.max()), zorder=3) 15 | ax.axis('tight') 16 | ax.axis('off') 17 | if xlim is None: 18 | xlim = ax.get_xlim() 19 | if ylim is None: 20 | ylim = ax.get_ylim() 21 | 22 | # 拟合评估器 23 | estimator.fit(X, y) 24 | xx, yy = np.meshgrid(np.linspace(*xlim, num=200), 25 | np.linspace(*ylim, num=200)) 26 | Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) 27 | 28 | # 将结果放到颜色图表中 29 | n_classes = len(np.unique(y)) 30 | Z = Z.reshape(xx.shape) 31 | contours = ax.contourf(xx, yy, Z, alpha=0.3, 32 | levels=np.arange(n_classes + 1) - 0.5, 33 | cmap='viridis', clim=(y.min(), y.max()), 34 | zorder=1) 35 | 36 | ax.set(xlim=xlim, ylim=ylim) 37 | 38 | # 绘制决策树边界 39 | def plot_boundaries(i, xlim, ylim): 40 | if i >= 0: 41 | tree = estimator.tree_ 42 | 43 | if tree.feature[i] == 0: 44 | ax.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k', zorder=2) 45 | plot_boundaries(tree.children_left[i], 46 | [xlim[0], tree.threshold[i]], ylim) 47 | plot_boundaries(tree.children_right[i], 48 | [tree.threshold[i], xlim[1]], ylim) 49 | 50 | elif tree.feature[i] == 1: 51 | ax.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k', zorder=2) 52 | plot_boundaries(tree.children_left[i], xlim, 53 | [ylim[0], tree.threshold[i]]) 54 | plot_boundaries(tree.children_right[i], xlim, 55 | [tree.threshold[i], ylim[1]]) 56 | 57 | if boundaries: 58 | plot_boundaries(0, xlim, ylim) 59 | 60 | 61 | def plot_tree_interactive(X, y): 62 | def interactive_tree(depth=5): 63 | clf = DecisionTreeClassifier(max_depth=depth, random_state=0) 64 | visualize_tree(clf, X, y) 65 | 66 | return interact(interactive_tree, depth=[1, 5]) 67 | 68 | 69 | def randomized_tree_interactive(X, y): 70 | N = int(0.75 * X.shape[0]) 71 | 72 | xlim = (X[:, 0].min(), X[:, 0].max()) 73 | ylim = (X[:, 1].min(), X[:, 1].max()) 74 | 75 | def fit_randomized_tree(random_state=0): 76 | clf = DecisionTreeClassifier(max_depth=15) 77 | i = np.arange(len(y)) 78 | rng = np.random.RandomState(random_state) 79 | rng.shuffle(i) 80 | visualize_tree(clf, X[i[:N]], y[i[:N]], boundaries=False, 81 | xlim=xlim, ylim=ylim) 82 | 83 | interact(fit_randomized_tree, random_state=[0, 100]); 84 | -------------------------------------------------------------------------------- /notebooks/my_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/notebooks/my_figure.png -------------------------------------------------------------------------------- /printable/00.00-Preface.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/00.00-Preface.pdf -------------------------------------------------------------------------------- /printable/01.00-IPython-Beyond-Normal-Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.00-IPython-Beyond-Normal-Python.pdf -------------------------------------------------------------------------------- /printable/01.01-Help-And-Documentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.01-Help-And-Documentation.pdf -------------------------------------------------------------------------------- /printable/01.02-Shell-Keyboard-Shortcuts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.02-Shell-Keyboard-Shortcuts.pdf -------------------------------------------------------------------------------- /printable/01.03-Magic-Commands.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.03-Magic-Commands.pdf -------------------------------------------------------------------------------- /printable/01.04-Input-Output-History.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.04-Input-Output-History.pdf -------------------------------------------------------------------------------- /printable/01.05-IPython-And-Shell-Commands.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.05-IPython-And-Shell-Commands.pdf -------------------------------------------------------------------------------- /printable/01.06-Errors-and-Debugging.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.06-Errors-and-Debugging.pdf -------------------------------------------------------------------------------- /printable/01.07-Timing-and-Profiling.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.07-Timing-and-Profiling.pdf -------------------------------------------------------------------------------- /printable/01.08-More-IPython-Resources.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/01.08-More-IPython-Resources.pdf -------------------------------------------------------------------------------- /printable/02.00-Introduction-to-NumPy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.00-Introduction-to-NumPy.pdf -------------------------------------------------------------------------------- /printable/02.01-Understanding-Data-Types.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.01-Understanding-Data-Types.pdf -------------------------------------------------------------------------------- /printable/02.02-The-Basics-Of-NumPy-Arrays.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.02-The-Basics-Of-NumPy-Arrays.pdf -------------------------------------------------------------------------------- /printable/02.03-Computation-on-arrays-ufuncs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.03-Computation-on-arrays-ufuncs.pdf -------------------------------------------------------------------------------- /printable/02.04-Computation-on-arrays-aggregates.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.04-Computation-on-arrays-aggregates.pdf -------------------------------------------------------------------------------- /printable/02.05-Computation-on-arrays-broadcasting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.05-Computation-on-arrays-broadcasting.pdf -------------------------------------------------------------------------------- /printable/02.06-Boolean-Arrays-and-Masks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.06-Boolean-Arrays-and-Masks.pdf -------------------------------------------------------------------------------- /printable/02.07-Fancy-Indexing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.07-Fancy-Indexing.pdf -------------------------------------------------------------------------------- /printable/02.08-Sorting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.08-Sorting.pdf -------------------------------------------------------------------------------- /printable/02.09-Structured-Data-NumPy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/02.09-Structured-Data-NumPy.pdf -------------------------------------------------------------------------------- /printable/03.00-Introduction-to-Pandas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.00-Introduction-to-Pandas.pdf -------------------------------------------------------------------------------- /printable/03.01-Introducing-Pandas-Objects.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.01-Introducing-Pandas-Objects.pdf -------------------------------------------------------------------------------- /printable/03.02-Data-Indexing-and-Selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.02-Data-Indexing-and-Selection.pdf -------------------------------------------------------------------------------- /printable/03.03-Operations-in-Pandas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.03-Operations-in-Pandas.pdf -------------------------------------------------------------------------------- /printable/03.04-Missing-Values.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.04-Missing-Values.pdf -------------------------------------------------------------------------------- /printable/03.05-Hierarchical-Indexing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.05-Hierarchical-Indexing.pdf -------------------------------------------------------------------------------- /printable/03.06-Concat-And-Append.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.06-Concat-And-Append.pdf -------------------------------------------------------------------------------- /printable/03.07-Merge-and-Join.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.07-Merge-and-Join.pdf -------------------------------------------------------------------------------- /printable/03.08-Aggregation-and-Grouping.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.08-Aggregation-and-Grouping.pdf -------------------------------------------------------------------------------- /printable/03.09-Pivot-Tables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.09-Pivot-Tables.pdf -------------------------------------------------------------------------------- /printable/03.10-Working-With-Strings.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.10-Working-With-Strings.pdf -------------------------------------------------------------------------------- /printable/03.11-Working-with-Time-Series.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.11-Working-with-Time-Series.pdf -------------------------------------------------------------------------------- /printable/03.12-Performance-Eval-and-Query.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.12-Performance-Eval-and-Query.pdf -------------------------------------------------------------------------------- /printable/03.13-Further-Resources.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/03.13-Further-Resources.pdf -------------------------------------------------------------------------------- /printable/04.00-Introduction-To-Matplotlib.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.00-Introduction-To-Matplotlib.pdf -------------------------------------------------------------------------------- /printable/04.01-Simple-Line-Plots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.01-Simple-Line-Plots.pdf -------------------------------------------------------------------------------- /printable/04.02-Simple-Scatter-Plots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.02-Simple-Scatter-Plots.pdf -------------------------------------------------------------------------------- /printable/04.03-Errorbars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.03-Errorbars.pdf -------------------------------------------------------------------------------- /printable/04.04-Density-and-Contour-Plots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.04-Density-and-Contour-Plots.pdf -------------------------------------------------------------------------------- /printable/04.05-Histograms-and-Binnings.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.05-Histograms-and-Binnings.pdf -------------------------------------------------------------------------------- /printable/04.06-Customizing-Legends.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.06-Customizing-Legends.pdf -------------------------------------------------------------------------------- /printable/04.07-Customizing-Colorbars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.07-Customizing-Colorbars.pdf -------------------------------------------------------------------------------- /printable/04.08-Multiple-Subplots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.08-Multiple-Subplots.pdf -------------------------------------------------------------------------------- /printable/04.09-Text-and-Annotation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.09-Text-and-Annotation.pdf -------------------------------------------------------------------------------- /printable/04.10-Customizing-Ticks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.10-Customizing-Ticks.pdf -------------------------------------------------------------------------------- /printable/04.11-Settings-and-Stylesheets.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.11-Settings-and-Stylesheets.pdf -------------------------------------------------------------------------------- /printable/04.12-Three-Dimensional-Plotting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.12-Three-Dimensional-Plotting.pdf -------------------------------------------------------------------------------- /printable/04.13-Geographic-Data-With-Basemap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.13-Geographic-Data-With-Basemap.pdf -------------------------------------------------------------------------------- /printable/04.14-Visualization-With-Seaborn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.14-Visualization-With-Seaborn.pdf -------------------------------------------------------------------------------- /printable/04.15-Further-Resources.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/04.15-Further-Resources.pdf -------------------------------------------------------------------------------- /printable/05.00-Machine-Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.00-Machine-Learning.pdf -------------------------------------------------------------------------------- /printable/05.01-What-Is-Machine-Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.01-What-Is-Machine-Learning.pdf -------------------------------------------------------------------------------- /printable/05.02-Introducing-Scikit-Learn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.02-Introducing-Scikit-Learn.pdf -------------------------------------------------------------------------------- /printable/05.03-Hyperparameters-and-Model-Validation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.03-Hyperparameters-and-Model-Validation.pdf -------------------------------------------------------------------------------- /printable/05.04-Feature-Engineering.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.04-Feature-Engineering.pdf -------------------------------------------------------------------------------- /printable/05.05-Naive-Bayes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.05-Naive-Bayes.pdf -------------------------------------------------------------------------------- /printable/05.06-Linear-Regression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.06-Linear-Regression.pdf -------------------------------------------------------------------------------- /printable/05.07-Support-Vector-Machines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.07-Support-Vector-Machines.pdf -------------------------------------------------------------------------------- /printable/05.08-Random-Forests.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.08-Random-Forests.pdf -------------------------------------------------------------------------------- /printable/05.09-Principal-Component-Analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.09-Principal-Component-Analysis.pdf -------------------------------------------------------------------------------- /printable/05.10-Manifold-Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.10-Manifold-Learning.pdf -------------------------------------------------------------------------------- /printable/05.11-K-Means.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.11-K-Means.pdf -------------------------------------------------------------------------------- /printable/05.12-Gaussian-Mixtures.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.12-Gaussian-Mixtures.pdf -------------------------------------------------------------------------------- /printable/05.13-Kernel-Density-Estimation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.13-Kernel-Density-Estimation.pdf -------------------------------------------------------------------------------- /printable/05.14-Image-Features.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.14-Image-Features.pdf -------------------------------------------------------------------------------- /printable/05.15-Learning-More.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/05.15-Learning-More.pdf -------------------------------------------------------------------------------- /printable/06.00-Figure-Code.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/06.00-Figure-Code.pdf -------------------------------------------------------------------------------- /printable/Index.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyingsm/Python-Data-Science-Handbook/15e7588d15d8068eb23a8dc9d37790eadf5a5db2/printable/Index.pdf -------------------------------------------------------------------------------- /printable/README.md: -------------------------------------------------------------------------------- 1 | # 下载本书中文翻译版免费PDF可打印格式 2 | 3 | [目录](Index.pdf) 4 | 5 | - [00.00-序言](00.00-Preface.pdf) 6 | 7 | - [01.00-IPython:超越Python解释器](01.00-IPython-Beyond-Normal-Python.pdf) 8 | - [01.01-IPython帮助和文档](01.01-Help-And-Documentation.pdf) 9 | - [01.02-IPython Shell中的键盘快捷键](01.02-Shell-Keyboard-Shortcuts.pdf) 10 | - [01.03-IPython魔术命令](01.03-Magic-Commands.pdf) 11 | - [01.04-输入输出历史](01.04-Input-Output-History.pdf) 12 | - [01.05-IPython和Shell命令](01.05-IPython-And-Shell-Commands.pdf) 13 | - [01.06-错误和调试](01.06-Errors-and-Debugging.pdf) 14 | - [01.07-性能测算和计时](01.07-Timing-and-Profiling.pdf) 15 | - [01.08-更多IPython资源](01.08-More-IPython-Resources.pdf) 16 | 17 | - [02.00-Numpy介绍](printable/02.00-Introduction-to-NumPy.pdf) 18 | - [02.01-理解Python中的数据类型](printable/02.01-Understanding-Data-Types.pdf) 19 | - [02.02-Numpy数组基础](printable/02.02-The-Basics-Of-NumPy-Arrays.pdf) 20 | - [02.03-使用Numpy计算:通用函数](printable/02.03-Computation-on-arrays-ufuncs.pdf) 21 | - [02.04-聚合:Min, Max, 以及其他](printable/02.04-Computation-on-arrays-aggregates.pdf) 22 | - [02.05-在数组上计算:广播](printable/02.05-Computation-on-arrays-broadcasting.pdf) 23 | - [02.06-比较,遮盖和布尔逻辑](printable/02.06-Boolean-Arrays-and-Masks.pdf) 24 | - [02.07-高级索引](printable/02.07-Fancy-Indexing.pdf) 25 | - [02.08-数组排序](printable/02.08-Sorting.pdf) 26 | - [02.09-格式化数据:NumPy里的结构化数组](printable/02.09-Structured-Data-NumPy.pdf) 27 | 28 | - [03.00-使用Pandas进行数据处理](03.00-Introduction-to-Pandas.pdf) 29 | - [03.01-Pandas对象简介](03.01-Introducing-Pandas-Objects.pdf) 30 | - [03.02-数据索引和选择](03.02-Data-Indexing-and-Selection.pdf) 31 | - [03.03-在Pandas中操作数据](03.03-Operations-in-Pandas.pdf) 32 | - [03.04-处理空缺数据](03.04-Missing-Values.pdf) 33 | - [03.05-层次化的索引](03.05-Hierarchical-Indexing.pdf) 34 | - [03.06-组合数据集:Concat 和 Append](03.06-Concat-And-Append.pdf) 35 | - [03.07-组合数据集:Merge 和 Join](03.07-Merge-and-Join.pdf) 36 | - [03.08-聚合与分组](03.08-Aggregation-and-Grouping.pdf) 37 | - [03.09-数据透视表](03.09-Pivot-Tables.pdf) 38 | - [03.10-向量化的字符串操作](03.10-Working-With-Strings.pdf) 39 | - [03.11-在时间序列上操作](03.11-Working-with-Time-Series.pdf) 40 | - [03.12-高性能Pandas: ``eval()`` 和 ``query()``](03.12-Performance-Eval-and-Query.pdf) 41 | - [03.13-更多资源](03.13-Further-Resources.pdf) 42 | 43 | - [04.00-使用matplotlib展示数据](04.00-Introduction-To-Matplotlib.pdf) 44 | - [04.01-简单的折线图](04.01-Simple-Line-Plots.pdf) 45 | - [04.02-简单的散点图](04.02-Simple-Scatter-Plots.pdf) 46 | - [04.03-误差可视化](04.03-Errorbars.pdf) 47 | - [04.04-密度和轮廓图](04.04-Density-and-Contour-Plots.pdf) 48 | - [04.05-直方图, 分桶和密度](04.05-Histograms-and-Binnings.pdf) 49 | - [04.06-自定义图表图例](04.06-Customizing-Legends.pdf) 50 | - [04.07-自定义颜色条](04.07-Customizing-Colorbars.pdf) 51 | - [04.08-多个子图表](04.08-Multiple-Subplots.pdf) 52 | - [04.09-文本和标注](04.09-Text-and-Annotation.pdf) 53 | - [04.10-自定义刻度](04.10-Customizing-Ticks.pdf) 54 | - [04.11-自定义matplotlib:配置和样式单](04.11-Settings-and-Stylesheets.pdf) 55 | - [04.12-在matplotlib中创建三维图表](04.12-Three-Dimensional-Plotting.pdf) 56 | - [04.13-使用Basemap创建地理位置图表](04.13-Geographic-Data-With-Basemap.pdf) 57 | - [04.14-使用Seaborn进行可视化](04.14-Visualization-With-Seaborn.pdf) 58 | - [04.15-更多资源](04.15-Further-Resources.pdf) 59 | 60 | - [05.00-机器学习](05.00-Machine-Learning.pdf) 61 | - [05.01-什么是机器学习?](05.01-What-Is-Machine-Learning.pdf) 62 | - [05.02-Scikit-Learn简介](05.02-Introducing-Scikit-Learn.pdf) 63 | - [05.03-超参数及模型验证](05.03-Hyperparameters-and-Model-Validation.pdf) 64 | - [05.04-特征工程](05.04-Feature-Engineering.pdf) 65 | - [05.05-深入:朴素贝叶斯分类](05.05-Naive-Bayes.pdf) 66 | - [05.06-深入:线性回归](05.06-Linear-Regression.pdf) 67 | - [05.07-深入:支持向量机](05.07-Support-Vector-Machines.pdf) 68 | - [05.08-深入:决策树和随机森林](05.08-Random-Forests.pdf) 69 | - [05.09-深入:主成分分析](05.09-Principal-Component-Analysis.pdf) 70 | - [05.10-深入:流形学习](05.10-Manifold-Learning.pdf) 71 | - [05.11-深入:k-均值聚类](05.11-K-Means.pdf) 72 | - [05.12-深入:高斯混合模型](05.12-Gaussian-Mixtures.pdf) 73 | - [05.13-深入:核密度估计](05.13-Kernel-Density-Estimation.pdf) 74 | - [05.14-应用:脸部识别管道](05.14-Image-Features.pdf) 75 | - [05.15-更多机器学习资源](05.15-Learning-More.pdf) 76 | 77 | - [06.00-附录:生成图像的代码](06.00-Figure-Code.pdf) 78 | 79 | --------------------------------------------------------------------------------