├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── logo.png ├── matplotlib ├── Data-Visualizations.ipynb ├── Part 01 Introduction and Line Plots.ipynb ├── Part 02 Bar Charts.ipynb ├── Part 03 Pie Charts.ipynb ├── Part 04 Stack Plots.ipynb ├── Part 05 Fill Between.ipynb ├── Part 06 Histograms.ipynb ├── Part 07 Scatter Plots.ipynb ├── Part 08 Time Series.ipynb ├── Part 09 Plotting real time data.py ├── Part 10 Subplots.ipynb ├── Plotting and Visualization.ipynb ├── Pokemon.csv ├── README.md ├── Word Cloud.ipynb ├── data_gen.py ├── examples │ ├── 2019-05-31-data.csv │ ├── age_data.csv │ ├── data.csv │ ├── data_10.csv │ ├── data_6.csv │ ├── data_8.csv │ ├── macrodata.csv │ ├── spx.csv │ └── tips.csv ├── iris.csv ├── matplotlib-seaborn-basic.ipynb ├── plots │ ├── plot1-1.png │ ├── plot1-2.png │ ├── plot10_1.png │ ├── plot10_2.png │ └── plot3-1.png ├── seaborn_tutorial_EDS.ipynb └── snippets.txt ├── numpy ├── .ipynb_checkpoints │ └── Numpy basic tutorial-checkpoint.ipynb ├── Numpy basic tutorial.ipynb └── data.txt ├── pandas ├── Data Cleaning and Preparation.ipynb ├── Data Loading, Storage, and File Formats.ipynb ├── Getting started with pandas.ipynb ├── MovieLens-1M │ └── movies.dat ├── README.md ├── examples │ ├── NA_handling_methods.csv │ ├── csv_mindex.csv │ ├── ex1.csv │ ├── ex2.csv │ ├── ex3.txt │ ├── ex4.csv │ ├── ex5.csv │ ├── ex6.csv │ ├── ex7.csv │ ├── example.json │ ├── out.csv │ ├── test.csv │ └── tseries.csv └── test.ipynb └── requirements.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # pipenv 86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 88 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 89 | # install all needed dependencies. 90 | #Pipfile.lock 91 | 92 | # celery beat schedule file 93 | celerybeat-schedule 94 | 95 | # SageMath parsed files 96 | *.sage.py 97 | 98 | # Environments 99 | .env 100 | .venv 101 | env/ 102 | venv/ 103 | ENV/ 104 | env.bak/ 105 | venv.bak/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | .dmypy.json 120 | dmypy.json 121 | 122 | # Pyre type checker 123 | .pyre/ 124 | 125 | 126 | .ipynb_checkpoints 127 | */.ipynb_checkpoints/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Tutorial notebooks on numpy, pandas and matplolib 2 | 3 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/veb-101/Numpy-Pandas-Matplotlib-Tutorial/master) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/issues) [![PRs](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/pulls) 4 | __________ 5 | 6 | 7 | 8 | * Run online using either [binder](https://mybinder.org/v2/gh/veb-101/Numpy-Pandas-Matplotlib-Tutorial/master) or [colab](https://colab.research.google.com/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/). 9 | * To run on your local machine; execute in either CMD or Terminal 10 | 11 | ```shell 12 | git clone https://github.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial.git 13 | cd Numpy-Pandas-Matplotlib-Tutorial 14 | ``` 15 | 16 | * Then install the required libraries (requires **python3**) 17 | 18 | ```shell 19 | python -m pip install -r requirements.txt (for windows) 20 | python3 -m pip install -r requirements.txt (for linux) 21 | ``` 22 | 23 | | Sr. No. | Topic | Notebook | 24 | | ------- | ---------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 25 | | 1 | Numpy | [Numpy notebook](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/numpy/Numpy%20basic%20tutorial.ipynb) | 26 | | | | | 27 | | 2 | Pandas | [Getting Started with Pandas](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/pandas/Getting%20started%20with%20pandas.ipynb) | 28 | | | Pandas | [Data Loading, Storage, and File Formats](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/pandas/Data%20Loading%2C%20Storage%2C%20and%20File%20Formats.ipynb) | 29 | | | Pandas | [Data Cleaning and Preparation](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/pandas/Data%20Cleaning%20and%20Preparation.ipynb) | 30 | | | Pandas | [example test](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/pandas/test.ipynb) | 31 | | | | | 32 | | 3 | Matplotlib | [Part 1: Creating and Customizing Our First Plots](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2001%20Introduction%20and%20Line%20Plots.ipynb) | 33 | | | Matplotlib | [Part 2: Bar Charts and Analyzing Data from CSVs](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2002%20Bar%20Charts.ipynb) | 34 | | | Matplotlib | [Part 3: Pie Charts](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2003%20Pie%20Charts.ipynb) | 35 | | | Matplotlib | [Part 4: Stack Plots](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2004%20Stack%20Plots.ipynb) | 36 | | | Matplotlib | [Part 5: Filling Area on Line Plots](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2005%20Fill%20Between.ipynb) | 37 | | | Matplotlib | [Part 6: Histograms](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2006%20Histograms.ipynb) | 38 | | | Matplotlib | [Part 7: Scatter Plots](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2007%20Scatter%20Plots.ipynb) | 39 | | | Matplotlib | [Part 8: Plotting Time Series Data](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2008%20Time%20Series.ipynb) | 40 | | | Matplotlib | [Part 10: Subplots](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Part%2010%20Subplots.ipynb) | 41 | | | Matplotlib | [Plotting and Visualization](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Plotting%20and%20Visualization.ipynb) | 42 | | | Matplotlib | [Word Cloud](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Word%20Cloud.ipynb) | 43 | | | Matplotlib | [Seaborn Basics](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/matplotlib-seaborn-basic.ipynb) | 44 | | | Matplotlib | [Iris - Data Visualization](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/Data-Visualizations.ipynb) | 45 | | | Matplotlib | [Pokemon - Data Visualization](https://nbviewer.jupyter.org/github/veb-101/Numpy-Pandas-Matplotlib-Tutorial/blob/master/matplotlib/seaborn_tutorial_EDS.ipynb) | 46 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/logo.png -------------------------------------------------------------------------------- /matplotlib/Part 06 Histograms.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import matplotlib.pyplot as plt" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "plt.rc('figure', figsize=(10, 8))\n", 20 | "plt.style.use('seaborn-white')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "image/png": "\n", 31 | "text/plain": [ 32 | "
" 33 | ] 34 | }, 35 | "metadata": {}, 36 | "output_type": "display_data" 37 | } 38 | ], 39 | "source": [ 40 | "ages = [18, 18, 21, 25, 26, 30, 32, 38, 45, 55]\n", 41 | "bins= [10, 20, 30, 40, 50, 60]\n", 42 | "\n", 43 | "# plt.hist(ages, bins=5, edgecolor='black')\n", 44 | "# plt.hist(ages, bins=bins, edgecolor='black')\n", 45 | "\n", 46 | "# excluding data for age 10\n", 47 | "plt.hist(ages, bins=bins[1:], edgecolor='black')\n", 48 | "\n", 49 | "\n", 50 | "plt.title('Age of Respondants')\n", 51 | "plt.xlabel('Ages')\n", 52 | "plt.ylabel('Total Respondants')\n", 53 | "plt.tight_layout()\n", 54 | "plt.show()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "image/png": "\n", 65 | "text/plain": [ 66 | "
" 67 | ] 68 | }, 69 | "metadata": {}, 70 | "output_type": "display_data" 71 | } 72 | ], 73 | "source": [ 74 | "\n", 75 | "data = pd.read_csv(r'examples/data_6.csv')\n", 76 | "# data.head()\n", 77 | "\n", 78 | "ids = data['Responder_id']\n", 79 | "ages = data['Age']\n", 80 | "\n", 81 | "bins = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]\n", 82 | "\n", 83 | "median_age = 29\n", 84 | "color = '#fc4f30'\n", 85 | "\n", 86 | "\n", 87 | "# plt.hist(ages, bins=bins, edgecolor='black')\n", 88 | "plt.hist(ages, bins=bins, edgecolor='black', log=True)\n", 89 | "plt.axvline(median_age, color=color, \n", 90 | " linewidth=2, label='Age Median')\n", 91 | "\n", 92 | "plt.grid(True)\n", 93 | "plt.legend()\n", 94 | "plt.title('Age of Respondants')\n", 95 | "plt.xlabel('Ages')\n", 96 | "plt.ylabel('Total Respondants')\n", 97 | "plt.tight_layout()\n", 98 | "plt.show()" 99 | ] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python 3", 105 | "language": "python", 106 | "name": "python3" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.7.4" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 4 123 | } 124 | -------------------------------------------------------------------------------- /matplotlib/Part 09 Plotting real time data.py: -------------------------------------------------------------------------------- 1 | import random 2 | from itertools import count 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from matplotlib.animation import FuncAnimation 6 | 7 | plt.style.use('fivethirtyeight') 8 | 9 | # x_vals = [] 10 | # y_vals = [] 11 | # index = count() 12 | 13 | # def animate(i): 14 | # x_vals.append(next(index)) 15 | # y_vals.append(random.randint(0, 5)) 16 | # plt.cla() 17 | # plt.plot(x_vals, y_vals) 18 | 19 | 20 | # ani = FuncAnimation(plt.gcf(), animate, interval=1000) 21 | # plt.tight_layout() 22 | # plt.show() 23 | 24 | 25 | # example 26 | 27 | 28 | def animate(i): 29 | data = pd.read_csv('data.csv') 30 | x = data['x_value'] 31 | y1 = data['total_1'] 32 | y2 = data['total_2'] 33 | 34 | plt.cla() 35 | 36 | plt.plot(x, y1, label='Channel 1') 37 | plt.plot(x, y2, label='Channel 2') 38 | 39 | plt.legend(loc='upper left') 40 | plt.tight_layout() 41 | 42 | 43 | ani = FuncAnimation(plt.gcf(), animate, interval=1000) 44 | 45 | plt.tight_layout() 46 | plt.show() 47 | -------------------------------------------------------------------------------- /matplotlib/Pokemon.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/matplotlib/Pokemon.csv -------------------------------------------------------------------------------- /matplotlib/README.md: -------------------------------------------------------------------------------- 1 | ### Matplotlib Readme 2 | 3 | #### Sources 4 | 5 | - Plotting and Visualization - Chp 9 of **Python for Data Analysis** by **_Wes Mckinney_** 6 | - matplotlib-seaborn-basic - [Statistics and Machine Learning in Python - download](ftp://ftp.cea.fr/pub/unati/people/educhesnay/pystatml/StatisticsMachineLearningPythonDraft.pdf) 7 | - Part 1:10 - [Corey Schafer - Matplotlib series](https://www.youtube.com/playlist?list=PL-osiE80TeTvipOqomVEeZ1HRrcEvtZB_) 8 | - Word Cloud - [rougier/matplotlib-tutorial - github](https://github.com/rougier/matplotlib-tutorial) 9 | - Data-Visualizations - [Python Data Visualizations - kaggle](https://www.kaggle.com/benhamner/python-data-visualizations) 10 | - [Seaborn - Homepage](http://seaborn.pydata.org/) 11 | - [The Ultimate Python Seaborn Tutorial: Gotta Catch ‘Em All - elitedatascience.com](https://elitedatascience.com/python-seaborn-tutorial) 12 | -------------------------------------------------------------------------------- /matplotlib/data_gen.py: -------------------------------------------------------------------------------- 1 | 2 | import csv 3 | import random 4 | import time 5 | 6 | x_value = 0 7 | total_1 = 1000 8 | total_2 = 1000 9 | 10 | fieldnames = ["x_value", "total_1", "total_2"] 11 | 12 | 13 | with open('data.csv', 'w') as csv_file: 14 | csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) 15 | csv_writer.writeheader() 16 | 17 | while True: 18 | 19 | with open('data.csv', 'a') as csv_file: 20 | csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) 21 | 22 | info = { 23 | "x_value": x_value, 24 | "total_1": total_1, 25 | "total_2": total_2 26 | } 27 | 28 | csv_writer.writerow(info) 29 | print(x_value, total_1, total_2) 30 | 31 | x_value += 1 32 | total_1 = total_1 + random.randint(-6, 8) 33 | total_2 = total_2 + random.randint(-5, 6) 34 | 35 | time.sleep(1) 36 | -------------------------------------------------------------------------------- /matplotlib/examples/2019-05-31-data.csv: -------------------------------------------------------------------------------- 1 | view_count,likes,ratio 2 | 8036001,324742,96.91 3 | 9378067,562589,98.19 4 | 2182066,273650,99.38 5 | 6525864,94698,96.25 6 | 9481284,582481,97.22 7 | 1853121,89903,97.46 8 | 2875684,183163,94.52 9 | 483827,4864,91.53 10 | 1677046,103227,97.52 11 | 289756,2387,92.95 12 | 2561907,237728,98.8 13 | 468390,25346,98.34 14 | 18977153,768968,98.73 15 | 365731,5997,93.29 16 | 680701,41543,97.99 17 | 5748289,225966,99.17 18 | 3575950,374937,97.69 19 | 865788,31806,98.3 20 | 5433739,389145,98.84 21 | 3643458,369667,97.88 22 | 247602,1516,89.18 23 | 300443,25429,99.49 24 | 313500,56891,98.35 25 | 3525217,92948,95.29 26 | 195072,23832,98.97 27 | 142697,20708,98.91 28 | 456783,2625,94.53 29 | 601565,38792,98.34 30 | 6021472,342044,97.54 31 | 940583,14292,97.7 32 | 446569,7557,97.15 33 | 767900,11091,97.14 34 | 5895810,98088,95.87 35 | 381910,45178,99.21 36 | 2468645,188315,98.73 37 | 407859,19407,98.77 38 | 846399,29308,95.93 39 | 872092,27298,94.85 40 | 1279718,98471,99.06 41 | 1068377,92634,98.89 42 | 4691951,164807,98.93 43 | 1091006,55346,98.53 44 | 891230,30612,88.39 45 | 720734,35647,98.11 46 | 1025214,19926,94.86 47 | 505146,3309,59.69 48 | 265430,2124,91.99 49 | 3651318,283911,98.64 50 | 1290212,201881,99.3 51 | 420393,5434,95.99 52 | 655107,21485,96.16 53 | 1010207,23720,95.85 54 | 777547,9167,94.46 55 | 686703,34001,98.54 56 | 1625877,62101,98.35 57 | 2107926,59334,97.3 58 | 1564214,81581,97.96 59 | 2277765,53425,89.82 60 | 1558609,95695,98.23 61 | 1689305,88050,95.43 62 | 3382856,74078,93.32 63 | 4835746,276098,94.3 64 | 248754,2041,90.75 65 | 687182,63309,97.61 66 | 751948,24359,98.3 67 | 737756,23093,82.35 68 | 964229,18898,86.34 69 | 973121,22810,97.6 70 | 575508,16975,94.75 71 | 1114419,35208,94.3 72 | 722956,21843,97.6 73 | 1560200,38185,96.52 74 | 281397,3706,91.53 75 | 1122525,28232,97.23 76 | 20650480,212862,91.88 77 | 225207,1524,84.76 78 | 598367,24260,94.51 79 | 2117363,162960,99.12 80 | 1233027,16400,88.81 81 | 2566897,112005,54.67 82 | 11907188,1234111,83.49 83 | 1477059,36018,98.75 84 | 292469,5656,92.71 85 | 466862,47754,98.96 86 | 1055798,46122,97.84 87 | 1278142,26021,97.37 88 | 1938747,16942,87.66 89 | 338563,8416,96.46 90 | 645274,17943,94.67 91 | 730110,26868,92.31 92 | 1521090,19761,86.6 93 | 1719425,79646,98.33 94 | 3028604,75484,97.22 95 | 1236239,55409,96.0 96 | 906642,14128,91.88 97 | 1257902,20899,92.93 98 | 1163635,30173,89.82 99 | 1413936,90918,97.87 100 | 709519,6013,95.14 101 | 628111,41450,97.03 102 | 2478832,143686,98.28 103 | 2524598,32486,93.66 104 | 821547,18708,97.31 105 | 3016943,38294,95.76 106 | 743575,20181,89.7 107 | 919626,22114,95.84 108 | 2536083,538376,99.6 109 | 959442,13220,95.94 110 | 2044159,41080,92.48 111 | 1554417,67165,93.0 112 | 2181022,180132,98.19 113 | 1010899,13696,97.57 114 | 2620663,72681,96.68 115 | 5732609,189529,97.16 116 | 1187273,73120,99.24 117 | 1594532,85661,97.01 118 | 8403016,294629,96.97 119 | 5972754,133474,96.6 120 | 6189511,267690,99.03 121 | 1042734,23761,91.61 122 | 9476773,417402,97.8 123 | 8040754,789213,98.73 124 | 2724624,88968,91.74 125 | 1085592,27288,98.51 126 | 3393417,219213,95.68 127 | 16396012,208578,79.21 128 | 3226905,19814,91.77 129 | 6276301,286642,98.15 130 | 647094,19753,89.98 131 | 8081040,477122,98.81 132 | 886934,29360,98.46 133 | 1228396,29893,98.2 134 | 697471,6452,94.85 135 | 1605670,78364,96.63 136 | 2056991,121925,98.44 137 | 397981,6185,58.36 138 | 2760289,106828,97.14 139 | 3655043,54069,89.65 140 | 10662064,320959,97.89 141 | 3105500,108620,96.6 142 | 2238691,48825,96.77 143 | 1153518,25832,96.44 144 | 686228,24882,96.57 145 | 7523411,614901,98.87 146 | 2641916,49354,95.78 147 | 11657853,233343,97.82 148 | 5932061,172195,95.91 149 | 6313988,323119,98.18 150 | 2850316,218273,98.14 151 | 2620142,36637,93.99 152 | 854120,54821,98.05 153 | 13799864,317613,96.07 154 | 906841,35315,98.09 155 | 689607,20658,98.58 156 | 441729,14901,99.0 157 | 797800,14327,95.41 158 | 1682016,75706,98.17 159 | 1426251,57965,98.73 160 | 2268534,91796,97.75 161 | 750032,39406,98.19 162 | 4272799,26229,98.03 163 | 2449662,80825,97.54 164 | 5988592,512483,99.4 165 | 3662227,75552,97.46 166 | 725964,42700,98.98 167 | 1647440,111190,98.85 168 | 985104,12721,96.5 169 | 1665692,23961,92.37 170 | 2051794,81790,96.64 171 | 4112883,116481,93.46 172 | 33297045,1293427,99.07 173 | 1517628,19931,96.25 174 | 1675692,18803,72.76 175 | 3626738,173591,98.44 176 | 1169663,7766,92.99 177 | 446959,4923,89.48 178 | 6995153,195994,96.69 179 | 519706,18975,98.94 180 | 4373224,169228,93.01 181 | 4024087,73080,97.71 182 | 731349,42205,98.52 183 | 94366013,4539630,97.66 184 | 2458132,34337,95.52 185 | 1812670,17476,94.43 186 | 2028445,158178,97.94 187 | 1335703,12622,94.14 188 | 938717,17120,97.26 189 | 2926955,42554,97.73 190 | 4018930,32919,82.1 191 | 6439402,81148,51.58 192 | 5665790,166892,96.95 193 | 899728,28115,96.49 194 | 2792057,206926,96.99 195 | 12839663,722491,97.84 196 | 5694139,146797,98.19 197 | 1069693,3970,90.66 198 | 590760,70454,99.18 199 | 319347,1208,92.5 200 | 27594927,1351963,96.4 201 | 26993425,437561,97.42 202 | -------------------------------------------------------------------------------- /matplotlib/examples/age_data.csv: -------------------------------------------------------------------------------- 1 | Age,All_Devs,Python,JavaScript 2 | 18,17784,20046,16446 3 | 19,16500,17100,16791 4 | 20,18012,20000,18942 5 | 21,20628,24744,21780 6 | 22,25206,30500,25704 7 | 23,30252,37732,29000 8 | 24,34368,41247,34372 9 | 25,38496,45372,37810 10 | 26,42000,48876,43515 11 | 27,46752,53850,46823 12 | 28,49320,57287,49293 13 | 29,53200,45000,53437 14 | 30,56000,50000,56373 15 | 31,62316,55000,62375 16 | 32,64928,70000,66674 17 | 33,67317,71496,68745 18 | 34,68748,75370,68746 19 | 35,73752,83640,74583 20 | 36,77232,84666,79000 21 | 37,78000,84392,78508 22 | 38,78508,78254,79996 23 | 39,79536,85000,80403 24 | 40,82488,87038,83820 25 | 41,88935,91991,88833 26 | 42,90000,100000,91660 27 | 43,90056,94796,87892 28 | 44,95000,97962,96243 29 | 45,90000,93302,90000 30 | 46,91633,99240,99313 31 | 47,91660,102736,91660 32 | 48,98150,112285,102264 33 | 49,98964,100771,100000 34 | 50,100000,104708,100000 35 | 51,98988,108423,91660 36 | 52,100000,101407,99240 37 | 53,108923,112542,108000 38 | 54,105000,122870,105000 39 | 55,103117,120000,104000 40 | -------------------------------------------------------------------------------- /matplotlib/examples/data_10.csv: -------------------------------------------------------------------------------- 1 | Age,All_Devs,Python,JavaScript 2 | 18,17784,20046,16446 3 | 19,16500,17100,16791 4 | 20,18012,20000,18942 5 | 21,20628,24744,21780 6 | 22,25206,30500,25704 7 | 23,30252,37732,29000 8 | 24,34368,41247,34372 9 | 25,38496,45372,37810 10 | 26,42000,48876,43515 11 | 27,46752,53850,46823 12 | 28,49320,57287,49293 13 | 29,53200,63016,53437 14 | 30,56000,65998,56373 15 | 31,62316,70003,62375 16 | 32,64928,70000,66674 17 | 33,67317,71496,68745 18 | 34,68748,75370,68746 19 | 35,73752,83640,74583 20 | 36,77232,84666,79000 21 | 37,78000,84392,78508 22 | 38,78508,78254,79996 23 | 39,79536,85000,80403 24 | 40,82488,87038,83820 25 | 41,88935,91991,88833 26 | 42,90000,100000,91660 27 | 43,90056,94796,87892 28 | 44,95000,97962,96243 29 | 45,90000,93302,90000 30 | 46,91633,99240,99313 31 | 47,91660,102736,91660 32 | 48,98150,112285,102264 33 | 49,98964,100771,100000 34 | 50,100000,104708,100000 35 | 51,98988,108423,91660 36 | 52,100000,101407,99240 37 | 53,108923,112542,108000 38 | 54,105000,122870,105000 39 | 55,103117,120000,104000 40 | -------------------------------------------------------------------------------- /matplotlib/examples/data_8.csv: -------------------------------------------------------------------------------- 1 | Date,Open,High,Low,Close,Adj Close,Volume 2 | 2019-05-18,7266.080078,8281.660156,7257.259766,8193.139648,8193.139648,723011166 3 | 2019-05-19,8193.139648,8193.139648,7591.850098,7998.290039,7998.290039,637617163 4 | 2019-05-20,7998.290039,8102.319824,7807.770020,7947.930176,7947.930176,357803946 5 | 2019-05-21,7947.930176,8033.759766,7533.660156,7626.890137,7626.890137,424501866 6 | 2019-05-22,7626.890137,7971.259766,7478.740234,7876.500000,7876.500000,386766321 7 | 2019-05-23,7876.500000,8165.450195,7801.569824,7996.399902,7996.399902,413162746 8 | 2019-05-24,7996.399902,8140.819824,7948.680176,8059.129883,8059.129883,179206342 9 | 2019-05-25,8059.129883,8779.000000,7894.529785,8726.230469,8726.230469,483663699 10 | 2019-05-26,8726.230469,8931.530273,8668.459961,8785.169922,8785.169922,507164714 11 | 2019-05-27,8785.169922,8818.709961,8562.200195,8718.849609,8718.849609,360752199 12 | 2019-05-28,8718.849609,8760.480469,8444.099609,8664.559570,8664.559570,380343928 13 | 2019-05-29,8664.559570,9065.889648,8027.209961,8276.250000,8276.250000,815525590 14 | 2019-05-30,8276.250000,8570.780273,8116.000000,8560.080078,8560.080078,500141087 15 | 2019-05-31,8550.629883,8576.339844,8459.650391,8504.980469,8504.980469,69915456 16 | 2019-05-18,7266.080078,8281.660156,7257.259766,8193.139648,8193.139648,723011166 -------------------------------------------------------------------------------- /matplotlib/examples/macrodata.csv: -------------------------------------------------------------------------------- 1 | year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint 2 | 1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0 3 | 1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74 4 | 1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09 5 | 1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06 6 | 1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19 7 | 1960.0,2.0,2834.39,1792.9,298.152,460.4,1966.1,29.55,140.2,2.68,5.2,180.671,0.14,2.55 8 | 1960.0,3.0,2839.022,1785.8,296.375,474.676,1967.8,29.75,140.9,2.36,5.6,181.528,2.7,-0.34 9 | 1960.0,4.0,2802.616,1788.2,259.764,476.434,1966.6,29.84,141.1,2.29,6.3,182.287,1.21,1.08 10 | 1961.0,1.0,2819.264,1787.7,266.405,475.854,1984.5,29.81,142.1,2.37,6.8,182.992,-0.4,2.77 11 | 1961.0,2.0,2872.005,1814.3,286.246,480.328,2014.4,29.92,142.9,2.29,7.0,183.691,1.47,0.81 12 | 1961.0,3.0,2918.419,1823.1,310.227,493.828,2041.9,29.98,144.1,2.32,6.8,184.524,0.8,1.52 13 | 1961.0,4.0,2977.83,1859.6,315.463,502.521,2082.0,30.04,145.2,2.6,6.2,185.242,0.8,1.8 14 | 1962.0,1.0,3031.241,1879.4,334.271,520.96,2101.7,30.21,146.4,2.73,5.6,185.874,2.26,0.47 15 | 1962.0,2.0,3064.709,1902.5,331.039,523.066,2125.2,30.22,146.5,2.78,5.5,186.538,0.13,2.65 16 | 1962.0,3.0,3093.047,1917.9,336.962,538.838,2137.0,30.38,146.7,2.78,5.6,187.323,2.11,0.67 17 | 1962.0,4.0,3100.563,1945.1,325.65,535.912,2154.6,30.44,148.3,2.87,5.5,188.013,0.79,2.08 18 | 1963.0,1.0,3141.087,1958.2,343.721,522.917,2172.5,30.48,149.7,2.9,5.8,188.58,0.53,2.38 19 | 1963.0,2.0,3180.447,1976.9,348.73,518.108,2193.1,30.69,151.3,3.03,5.7,189.242,2.75,0.29 20 | 1963.0,3.0,3240.332,2003.8,360.102,546.893,2217.9,30.75,152.6,3.38,5.5,190.028,0.78,2.6 21 | 1963.0,4.0,3264.967,2020.6,364.534,532.383,2254.6,30.94,153.7,3.52,5.6,190.668,2.46,1.06 22 | 1964.0,1.0,3338.246,2060.5,379.523,529.686,2299.6,30.95,154.8,3.51,5.5,191.245,0.13,3.38 23 | 1964.0,2.0,3376.587,2096.7,377.778,526.175,2362.1,31.02,156.8,3.47,5.2,191.889,0.9,2.57 24 | 1964.0,3.0,3422.469,2135.2,386.754,522.008,2392.7,31.12,159.2,3.53,5.0,192.631,1.29,2.25 25 | 1964.0,4.0,3431.957,2141.2,389.91,514.603,2420.4,31.28,160.7,3.76,5.0,193.223,2.05,1.71 26 | 1965.0,1.0,3516.251,2188.8,429.145,508.006,2447.4,31.38,162.0,3.93,4.9,193.709,1.28,2.65 27 | 1965.0,2.0,3563.96,2213.0,429.119,508.931,2474.5,31.58,163.1,3.84,4.7,194.303,2.54,1.3 28 | 1965.0,3.0,3636.285,2251.0,444.444,529.446,2542.6,31.65,166.0,3.93,4.4,194.997,0.89,3.04 29 | 1965.0,4.0,3724.014,2314.3,446.493,544.121,2594.1,31.88,169.1,4.35,4.1,195.539,2.9,1.46 30 | 1966.0,1.0,3815.423,2348.5,484.244,556.593,2618.4,32.28,171.8,4.62,3.9,195.999,4.99,-0.37 31 | 1966.0,2.0,3828.124,2354.5,475.408,571.371,2624.7,32.45,170.3,4.65,3.8,196.56,2.1,2.55 32 | 1966.0,3.0,3853.301,2381.5,470.697,594.514,2657.8,32.85,171.2,5.23,3.8,197.207,4.9,0.33 33 | 1966.0,4.0,3884.52,2391.4,472.957,599.528,2688.2,32.9,171.9,5.0,3.7,197.736,0.61,4.39 34 | 1967.0,1.0,3918.74,2405.3,460.007,640.682,2728.4,33.1,174.2,4.22,3.8,198.206,2.42,1.8 35 | 1967.0,2.0,3919.556,2438.1,440.393,631.43,2750.8,33.4,178.1,3.78,3.8,198.712,3.61,0.17 36 | 1967.0,3.0,3950.826,2450.6,453.033,641.504,2777.1,33.7,181.6,4.42,3.8,199.311,3.58,0.84 37 | 1967.0,4.0,3980.97,2465.7,462.834,640.234,2797.4,34.1,184.3,4.9,3.9,199.808,4.72,0.18 38 | 1968.0,1.0,4063.013,2524.6,472.907,651.378,2846.2,34.4,186.6,5.18,3.7,200.208,3.5,1.67 39 | 1968.0,2.0,4131.998,2563.3,492.026,646.145,2893.5,34.9,190.5,5.5,3.5,200.706,5.77,-0.28 40 | 1968.0,3.0,4160.267,2611.5,476.053,640.615,2899.3,35.3,194.0,5.21,3.5,201.29,4.56,0.65 41 | 1968.0,4.0,4178.293,2623.5,480.998,636.729,2918.4,35.7,198.7,5.85,3.4,201.76,4.51,1.34 42 | 1969.0,1.0,4244.1,2652.9,512.686,633.224,2923.4,36.3,200.7,6.08,3.4,202.161,6.67,-0.58 43 | 1969.0,2.0,4256.46,2669.8,508.601,623.16,2952.9,36.8,201.7,6.49,3.4,202.677,5.47,1.02 44 | 1969.0,3.0,4283.378,2682.7,520.36,623.613,3012.9,37.3,202.9,7.02,3.6,203.302,5.4,1.63 45 | 1969.0,4.0,4263.261,2704.1,492.334,606.9,3034.9,37.9,206.2,7.64,3.6,203.849,6.38,1.26 46 | 1970.0,1.0,4256.573,2720.7,476.925,594.888,3050.1,38.5,206.7,6.76,4.2,204.401,6.28,0.47 47 | 1970.0,2.0,4264.289,2733.2,478.419,576.257,3103.5,38.9,208.0,6.66,4.8,205.052,4.13,2.52 48 | 1970.0,3.0,4302.259,2757.1,486.594,567.743,3145.4,39.4,212.9,6.15,5.2,205.788,5.11,1.04 49 | 1970.0,4.0,4256.637,2749.6,458.406,564.666,3135.1,39.9,215.5,4.86,5.8,206.466,5.04,-0.18 50 | 1971.0,1.0,4374.016,2802.2,517.935,542.709,3197.3,40.1,220.0,3.65,5.9,207.065,2.0,1.65 51 | 1971.0,2.0,4398.829,2827.9,533.986,534.905,3245.3,40.6,224.9,4.76,5.9,207.661,4.96,-0.19 52 | 1971.0,3.0,4433.943,2850.4,541.01,532.646,3259.7,40.9,227.2,4.7,6.0,208.345,2.94,1.75 53 | 1971.0,4.0,4446.264,2897.8,524.085,516.14,3294.2,41.2,230.1,3.87,6.0,208.917,2.92,0.95 54 | 1972.0,1.0,4525.769,2936.5,561.147,518.192,3314.9,41.5,235.6,3.55,5.8,209.386,2.9,0.64 55 | 1972.0,2.0,4633.101,2992.6,595.495,526.473,3346.1,41.8,238.8,3.86,5.7,209.896,2.88,0.98 56 | 1972.0,3.0,4677.503,3038.8,603.97,498.116,3414.6,42.2,245.0,4.47,5.6,210.479,3.81,0.66 57 | 1972.0,4.0,4754.546,3110.1,607.104,496.54,3550.5,42.7,251.5,5.09,5.3,210.985,4.71,0.38 58 | 1973.0,1.0,4876.166,3167.0,645.654,504.838,3590.7,43.7,252.7,5.98,5.0,211.42,9.26,-3.28 59 | 1973.0,2.0,4932.571,3165.4,675.837,497.033,3626.2,44.2,257.5,7.19,4.9,211.909,4.55,2.64 60 | 1973.0,3.0,4906.252,3176.7,649.412,475.897,3644.4,45.6,259.0,8.06,4.8,212.475,12.47,-4.41 61 | 1973.0,4.0,4953.05,3167.4,674.253,476.174,3688.9,46.8,263.8,7.68,4.8,212.932,10.39,-2.71 62 | 1974.0,1.0,4909.617,3139.7,631.23,491.043,3632.3,48.1,267.2,7.8,5.1,213.361,10.96,-3.16 63 | 1974.0,2.0,4922.188,3150.6,628.102,490.177,3601.1,49.3,269.3,7.89,5.2,213.854,9.86,-1.96 64 | 1974.0,3.0,4873.52,3163.6,592.672,492.586,3612.4,51.0,272.3,8.16,5.6,214.451,13.56,-5.4 65 | 1974.0,4.0,4854.34,3117.3,598.306,496.176,3596.0,52.3,273.9,6.96,6.6,214.931,10.07,-3.11 66 | 1975.0,1.0,4795.295,3143.4,493.212,490.603,3581.9,53.0,276.2,5.53,8.2,215.353,5.32,0.22 67 | 1975.0,2.0,4831.942,3195.8,476.085,486.679,3749.3,54.0,283.7,5.57,8.9,215.973,7.48,-1.91 68 | 1975.0,3.0,4913.328,3241.4,516.402,498.836,3698.6,54.9,285.4,6.27,8.5,216.587,6.61,-0.34 69 | 1975.0,4.0,4977.511,3275.7,530.596,500.141,3736.0,55.8,288.4,5.26,8.3,217.095,6.5,-1.24 70 | 1976.0,1.0,5090.663,3341.2,585.541,495.568,3791.0,56.1,294.7,4.91,7.7,217.528,2.14,2.77 71 | 1976.0,2.0,5128.947,3371.8,610.513,494.532,3822.2,57.0,297.2,5.28,7.6,218.035,6.37,-1.09 72 | 1976.0,3.0,5154.072,3407.5,611.646,493.141,3856.7,57.9,302.0,5.05,7.7,218.644,6.27,-1.22 73 | 1976.0,4.0,5191.499,3451.8,615.898,494.415,3884.4,58.7,308.3,4.57,7.8,219.179,5.49,-0.92 74 | 1977.0,1.0,5251.762,3491.3,646.198,498.509,3887.5,60.0,316.0,4.6,7.5,219.684,8.76,-4.16 75 | 1977.0,2.0,5356.131,3510.6,696.141,506.695,3931.8,60.8,320.2,5.06,7.1,220.239,5.3,-0.24 76 | 1977.0,3.0,5451.921,3544.1,734.078,509.605,3990.8,61.6,326.4,5.82,6.9,220.904,5.23,0.59 77 | 1977.0,4.0,5450.793,3597.5,713.356,504.584,4071.2,62.7,334.4,6.2,6.6,221.477,7.08,-0.88 78 | 1978.0,1.0,5469.405,3618.5,727.504,506.314,4096.4,63.9,339.9,6.34,6.3,221.991,7.58,-1.24 79 | 1978.0,2.0,5684.569,3695.9,777.454,518.366,4143.4,65.5,347.6,6.72,6.0,222.585,9.89,-3.18 80 | 1978.0,3.0,5740.3,3711.4,801.452,520.199,4177.1,67.1,353.3,7.64,6.0,223.271,9.65,-2.01 81 | 1978.0,4.0,5816.222,3741.3,819.689,524.782,4209.8,68.5,358.6,9.02,5.9,223.865,8.26,0.76 82 | 1979.0,1.0,5825.949,3760.2,819.556,525.524,4255.9,70.6,368.0,9.42,5.9,224.438,12.08,-2.66 83 | 1979.0,2.0,5831.418,3758.0,817.66,532.04,4226.1,73.0,377.2,9.3,5.7,225.055,13.37,-4.07 84 | 1979.0,3.0,5873.335,3794.9,801.742,531.232,4250.3,75.2,380.8,10.49,5.9,225.801,11.88,-1.38 85 | 1979.0,4.0,5889.495,3805.0,786.817,531.126,4284.3,78.0,385.8,11.94,5.9,226.451,14.62,-2.68 86 | 1980.0,1.0,5908.467,3798.4,781.114,548.115,4296.2,80.9,383.8,13.75,6.3,227.061,14.6,-0.85 87 | 1980.0,2.0,5787.373,3712.2,710.64,561.895,4236.1,82.6,394.0,7.9,7.3,227.726,8.32,-0.42 88 | 1980.0,3.0,5776.617,3752.0,656.477,554.292,4279.7,84.7,409.0,10.34,7.7,228.417,10.04,0.3 89 | 1980.0,4.0,5883.46,3802.0,723.22,556.13,4368.1,87.2,411.3,14.75,7.4,228.937,11.64,3.11 90 | 1981.0,1.0,6005.717,3822.8,795.091,567.618,4358.1,89.1,427.4,13.95,7.4,229.403,8.62,5.32 91 | 1981.0,2.0,5957.795,3822.8,757.24,584.54,4358.6,91.5,426.9,15.33,7.4,229.966,10.63,4.69 92 | 1981.0,3.0,6030.184,3838.3,804.242,583.89,4455.4,93.4,428.4,14.58,7.4,230.641,8.22,6.36 93 | 1981.0,4.0,5955.062,3809.3,773.053,590.125,4464.4,94.4,442.7,11.33,8.2,231.157,4.26,7.07 94 | 1982.0,1.0,5857.333,3833.9,692.514,591.043,4469.6,95.0,447.1,12.95,8.8,231.645,2.53,10.42 95 | 1982.0,2.0,5889.074,3847.7,691.9,596.403,4500.8,97.5,448.0,11.97,9.4,232.188,10.39,1.58 96 | 1982.0,3.0,5866.37,3877.2,683.825,605.37,4520.6,98.1,464.5,8.1,9.9,232.816,2.45,5.65 97 | 1982.0,4.0,5871.001,3947.9,622.93,623.307,4536.4,97.9,477.2,7.96,10.7,233.322,-0.82,8.77 98 | 1983.0,1.0,5944.02,3986.6,645.11,630.873,4572.2,98.8,493.2,8.22,10.4,233.781,3.66,4.56 99 | 1983.0,2.0,6077.619,4065.7,707.372,644.322,4605.5,99.8,507.8,8.69,10.1,234.307,4.03,4.66 100 | 1983.0,3.0,6197.468,4137.6,754.937,662.412,4674.7,100.8,517.2,8.99,9.4,234.907,3.99,5.01 101 | 1983.0,4.0,6325.574,4203.2,834.427,639.197,4771.1,102.1,525.1,8.89,8.5,235.385,5.13,3.76 102 | 1984.0,1.0,6448.264,4239.2,921.763,644.635,4875.4,103.3,535.0,9.43,7.9,235.839,4.67,4.76 103 | 1984.0,2.0,6559.594,4299.9,952.841,664.839,4959.4,104.1,540.9,9.94,7.5,236.348,3.09,6.85 104 | 1984.0,3.0,6623.343,4333.0,974.989,662.294,5036.6,105.1,543.7,10.19,7.4,236.976,3.82,6.37 105 | 1984.0,4.0,6677.264,4390.1,958.993,684.282,5084.5,105.7,557.0,8.14,7.3,237.468,2.28,5.87 106 | 1985.0,1.0,6740.275,4464.6,927.375,691.613,5072.0,107.0,570.4,8.25,7.3,237.9,4.89,3.36 107 | 1985.0,2.0,6797.344,4505.2,943.383,708.524,5172.7,107.7,589.1,7.17,7.3,238.466,2.61,4.56 108 | 1985.0,3.0,6903.523,4590.8,932.959,732.305,5140.7,108.5,607.8,7.13,7.2,239.113,2.96,4.17 109 | 1985.0,4.0,6955.918,4600.9,969.434,732.026,5193.9,109.9,621.4,7.14,7.0,239.638,5.13,2.01 110 | 1986.0,1.0,7022.757,4639.3,967.442,728.125,5255.8,108.7,641.0,6.56,7.0,240.094,-4.39,10.95 111 | 1986.0,2.0,7050.969,4688.7,945.972,751.334,5315.5,109.5,670.3,6.06,7.2,240.651,2.93,3.13 112 | 1986.0,3.0,7118.95,4770.7,916.315,779.77,5343.3,110.2,694.9,5.31,7.0,241.274,2.55,2.76 113 | 1986.0,4.0,7153.359,4799.4,917.736,767.671,5346.5,111.4,730.2,5.44,6.8,241.784,4.33,1.1 114 | 1987.0,1.0,7193.019,4792.1,945.776,772.247,5379.4,112.7,743.9,5.61,6.6,242.252,4.64,0.97 115 | 1987.0,2.0,7269.51,4856.3,947.1,782.962,5321.0,113.8,743.0,5.67,6.3,242.804,3.89,1.79 116 | 1987.0,3.0,7332.558,4910.4,948.055,783.804,5416.2,115.0,756.2,6.19,6.0,243.446,4.2,1.99 117 | 1987.0,4.0,7458.022,4922.2,1021.98,795.467,5493.1,116.0,756.2,5.76,5.9,243.981,3.46,2.29 118 | 1988.0,1.0,7496.6,5004.4,964.398,773.851,5562.1,117.2,768.1,5.76,5.7,244.445,4.12,1.64 119 | 1988.0,2.0,7592.881,5040.8,987.858,765.98,5614.3,118.5,781.4,6.48,5.5,245.021,4.41,2.07 120 | 1988.0,3.0,7632.082,5080.6,994.204,760.245,5657.5,119.9,783.3,7.22,5.5,245.693,4.7,2.52 121 | 1988.0,4.0,7733.991,5140.4,1007.371,783.065,5708.5,121.2,785.7,8.03,5.3,246.224,4.31,3.72 122 | 1989.0,1.0,7806.603,5159.3,1045.975,767.024,5773.4,123.1,779.2,8.67,5.2,246.721,6.22,2.44 123 | 1989.0,2.0,7865.016,5182.4,1033.753,784.275,5749.8,124.5,777.8,8.15,5.2,247.342,4.52,3.63 124 | 1989.0,3.0,7927.393,5236.1,1021.604,791.819,5787.0,125.4,786.6,7.76,5.3,248.067,2.88,4.88 125 | 1989.0,4.0,7944.697,5261.7,1011.119,787.844,5831.3,127.5,795.4,7.65,5.4,248.659,6.64,1.01 126 | 1990.0,1.0,8027.693,5303.3,1021.07,799.681,5875.1,128.9,806.2,7.8,5.3,249.306,4.37,3.44 127 | 1990.0,2.0,8059.598,5320.8,1021.36,800.639,5913.9,130.5,810.1,7.7,5.3,250.132,4.93,2.76 128 | 1990.0,3.0,8059.476,5341.0,997.319,793.513,5918.1,133.4,819.8,7.33,5.7,251.057,8.79,-1.46 129 | 1990.0,4.0,7988.864,5299.5,934.248,800.525,5878.2,134.7,827.2,6.67,6.1,251.889,3.88,2.79 130 | 1991.0,1.0,7950.164,5284.4,896.21,806.775,5896.3,135.1,843.2,5.83,6.6,252.643,1.19,4.65 131 | 1991.0,2.0,8003.822,5324.7,891.704,809.081,5941.1,136.2,861.5,5.54,6.8,253.493,3.24,2.29 132 | 1991.0,3.0,8037.538,5345.0,913.904,793.987,5953.6,137.2,878.0,5.18,6.9,254.435,2.93,2.25 133 | 1991.0,4.0,8069.046,5342.6,948.891,778.378,5992.4,138.3,910.4,4.14,7.1,255.214,3.19,0.95 134 | 1992.0,1.0,8157.616,5434.5,927.796,778.568,6082.9,139.4,943.8,3.88,7.4,255.992,3.17,0.71 135 | 1992.0,2.0,8244.294,5466.7,988.912,777.762,6129.5,140.5,963.2,3.5,7.6,256.894,3.14,0.36 136 | 1992.0,3.0,8329.361,5527.1,999.135,786.639,6160.6,141.7,1003.8,2.97,7.6,257.861,3.4,-0.44 137 | 1992.0,4.0,8417.016,5594.6,1030.758,787.064,6248.2,142.8,1030.4,3.12,7.4,258.679,3.09,0.02 138 | 1993.0,1.0,8432.485,5617.2,1054.979,762.901,6156.5,143.8,1047.6,2.92,7.2,259.414,2.79,0.13 139 | 1993.0,2.0,8486.435,5671.1,1063.263,752.158,6252.3,144.5,1084.5,3.02,7.1,260.255,1.94,1.08 140 | 1993.0,3.0,8531.108,5732.7,1062.514,744.227,6265.7,145.6,1113.0,3.0,6.8,261.163,3.03,-0.04 141 | 1993.0,4.0,8643.769,5783.7,1118.583,748.102,6358.1,146.3,1131.6,3.05,6.6,261.919,1.92,1.13 142 | 1994.0,1.0,8727.919,5848.1,1166.845,721.288,6332.6,147.2,1141.1,3.48,6.6,262.631,2.45,1.02 143 | 1994.0,2.0,8847.303,5891.5,1234.855,717.197,6440.6,148.4,1150.5,4.2,6.2,263.436,3.25,0.96 144 | 1994.0,3.0,8904.289,5938.7,1212.655,736.89,6487.9,149.4,1150.1,4.68,6.0,264.301,2.69,2.0 145 | 1994.0,4.0,9003.18,5997.3,1269.19,716.702,6574.0,150.5,1151.4,5.53,5.6,265.044,2.93,2.6 146 | 1995.0,1.0,9025.267,6004.3,1282.09,715.326,6616.6,151.8,1149.3,5.72,5.5,265.755,3.44,2.28 147 | 1995.0,2.0,9044.668,6053.5,1247.61,712.492,6617.2,152.6,1145.4,5.52,5.7,266.557,2.1,3.42 148 | 1995.0,3.0,9120.684,6107.6,1235.601,707.649,6666.8,153.5,1137.3,5.32,5.7,267.456,2.35,2.97 149 | 1995.0,4.0,9184.275,6150.6,1270.392,681.081,6706.2,154.7,1123.5,5.17,5.6,268.151,3.11,2.05 150 | 1996.0,1.0,9247.188,6206.9,1287.128,695.265,6777.7,156.1,1124.8,4.91,5.5,268.853,3.6,1.31 151 | 1996.0,2.0,9407.052,6277.1,1353.795,705.172,6850.6,157.0,1112.4,5.09,5.5,269.667,2.3,2.79 152 | 1996.0,3.0,9488.879,6314.6,1422.059,692.741,6908.9,158.2,1086.1,5.04,5.3,270.581,3.05,2.0 153 | 1996.0,4.0,9592.458,6366.1,1418.193,690.744,6946.8,159.4,1081.5,4.99,5.3,271.36,3.02,1.97 154 | 1997.0,1.0,9666.235,6430.2,1451.304,681.445,7008.9,159.9,1063.8,5.1,5.2,272.083,1.25,3.85 155 | 1997.0,2.0,9809.551,6456.2,1543.976,693.525,7061.5,160.4,1066.2,5.01,5.0,272.912,1.25,3.76 156 | 1997.0,3.0,9932.672,6566.0,1571.426,691.261,7142.4,161.5,1065.5,5.02,4.9,273.852,2.73,2.29 157 | 1997.0,4.0,10008.874,6641.1,1596.523,690.311,7241.5,162.0,1074.4,5.11,4.7,274.626,1.24,3.88 158 | 1998.0,1.0,10103.425,6707.2,1672.732,668.783,7406.2,162.2,1076.1,5.02,4.6,275.304,0.49,4.53 159 | 1998.0,2.0,10194.277,6822.6,1652.716,687.184,7512.0,163.2,1075.0,4.98,4.4,276.115,2.46,2.52 160 | 1998.0,3.0,10328.787,6913.1,1700.071,681.472,7591.0,163.9,1086.0,4.49,4.5,277.003,1.71,2.78 161 | 1998.0,4.0,10507.575,7019.1,1754.743,688.147,7646.5,164.7,1097.8,4.38,4.4,277.79,1.95,2.43 162 | 1999.0,1.0,10601.179,7088.3,1809.993,683.601,7698.4,165.9,1101.9,4.39,4.3,278.451,2.9,1.49 163 | 1999.0,2.0,10684.049,7199.9,1803.674,683.594,7716.0,166.7,1098.7,4.54,4.3,279.295,1.92,2.62 164 | 1999.0,3.0,10819.914,7286.4,1848.949,697.936,7765.9,168.1,1102.3,4.75,4.2,280.203,3.35,1.41 165 | 1999.0,4.0,11014.254,7389.2,1914.567,713.445,7887.7,169.3,1121.9,5.2,4.1,280.976,2.85,2.35 166 | 2000.0,1.0,11043.044,7501.3,1887.836,685.216,8053.4,170.9,1113.5,5.63,4.0,281.653,3.76,1.87 167 | 2000.0,2.0,11258.454,7571.8,2018.529,712.641,8135.9,172.7,1103.0,5.81,3.9,282.385,4.19,1.62 168 | 2000.0,3.0,11267.867,7645.9,1986.956,698.827,8222.3,173.9,1098.7,6.07,4.0,283.19,2.77,3.3 169 | 2000.0,4.0,11334.544,7713.5,1987.845,695.597,8234.6,175.6,1097.7,5.7,3.9,283.9,3.89,1.81 170 | 2001.0,1.0,11297.171,7744.3,1882.691,710.403,8296.5,176.4,1114.9,4.39,4.2,284.55,1.82,2.57 171 | 2001.0,2.0,11371.251,7773.5,1876.65,725.623,8273.7,177.4,1139.7,3.54,4.4,285.267,2.26,1.28 172 | 2001.0,3.0,11340.075,7807.7,1837.074,730.493,8484.5,177.6,1166.0,2.72,4.8,286.047,0.45,2.27 173 | 2001.0,4.0,11380.128,7930.0,1731.189,739.318,8385.5,177.7,1190.9,1.74,5.5,286.728,0.23,1.51 174 | 2002.0,1.0,11477.868,7957.3,1789.327,756.915,8611.6,179.3,1185.9,1.75,5.7,287.328,3.59,-1.84 175 | 2002.0,2.0,11538.77,7997.8,1810.779,774.408,8658.9,180.0,1199.5,1.7,5.8,288.028,1.56,0.14 176 | 2002.0,3.0,11596.43,8052.0,1814.531,786.673,8629.2,181.2,1204.0,1.61,5.7,288.783,2.66,-1.05 177 | 2002.0,4.0,11598.824,8080.6,1813.219,799.967,8649.6,182.6,1226.8,1.2,5.8,289.421,3.08,-1.88 178 | 2003.0,1.0,11645.819,8122.3,1813.141,800.196,8681.3,183.2,1248.4,1.14,5.9,290.019,1.31,-0.17 179 | 2003.0,2.0,11738.706,8197.8,1823.698,838.775,8812.5,183.7,1287.9,0.96,6.2,290.704,1.09,-0.13 180 | 2003.0,3.0,11935.461,8312.1,1889.883,839.598,8935.4,184.9,1297.3,0.94,6.1,291.449,2.6,-1.67 181 | 2003.0,4.0,12042.817,8358.0,1959.783,845.722,8986.4,186.3,1306.1,0.9,5.8,292.057,3.02,-2.11 182 | 2004.0,1.0,12127.623,8437.6,1970.015,856.57,9025.9,187.4,1332.1,0.94,5.7,292.635,2.35,-1.42 183 | 2004.0,2.0,12213.818,8483.2,2055.58,861.44,9115.0,189.1,1340.5,1.21,5.6,293.31,3.61,-2.41 184 | 2004.0,3.0,12303.533,8555.8,2082.231,876.385,9175.9,190.8,1361.0,1.63,5.4,294.066,3.58,-1.95 185 | 2004.0,4.0,12410.282,8654.2,2125.152,865.596,9303.4,191.8,1366.6,2.2,5.4,294.741,2.09,0.11 186 | 2005.0,1.0,12534.113,8719.0,2170.299,869.204,9189.6,193.8,1357.8,2.69,5.3,295.308,4.15,-1.46 187 | 2005.0,2.0,12587.535,8802.9,2131.468,870.044,9253.0,194.7,1366.6,3.01,5.1,295.994,1.85,1.16 188 | 2005.0,3.0,12683.153,8865.6,2154.949,890.394,9308.0,199.2,1375.0,3.52,5.0,296.77,9.14,-5.62 189 | 2005.0,4.0,12748.699,8888.5,2232.193,875.557,9358.7,199.4,1380.6,4.0,4.9,297.435,0.4,3.6 190 | 2006.0,1.0,12915.938,8986.6,2264.721,900.511,9533.8,200.7,1380.5,4.51,4.7,298.061,2.6,1.91 191 | 2006.0,2.0,12962.462,9035.0,2261.247,892.839,9617.3,202.7,1369.2,4.82,4.7,298.766,3.97,0.85 192 | 2006.0,3.0,12965.916,9090.7,2229.636,892.002,9662.5,201.9,1369.4,4.9,4.7,299.593,-1.58,6.48 193 | 2006.0,4.0,13060.679,9181.6,2165.966,894.404,9788.8,203.574,1373.6,4.92,4.4,300.32,3.3,1.62 194 | 2007.0,1.0,13099.901,9265.1,2132.609,882.766,9830.2,205.92,1379.7,4.95,4.5,300.977,4.58,0.36 195 | 2007.0,2.0,13203.977,9291.5,2162.214,898.713,9842.7,207.338,1370.0,4.72,4.5,301.714,2.75,1.97 196 | 2007.0,3.0,13321.109,9335.6,2166.491,918.983,9883.9,209.133,1379.2,4.0,4.7,302.509,3.45,0.55 197 | 2007.0,4.0,13391.249,9363.6,2123.426,925.11,9886.2,212.495,1377.4,3.01,4.8,303.204,6.38,-3.37 198 | 2008.0,1.0,13366.865,9349.6,2082.886,943.372,9826.8,213.997,1384.0,1.56,4.9,303.803,2.82,-1.26 199 | 2008.0,2.0,13415.266,9351.0,2026.518,961.28,10059.0,218.61,1409.3,1.74,5.4,304.483,8.53,-6.79 200 | 2008.0,3.0,13324.6,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.27,-3.16,4.33 201 | 2008.0,4.0,13141.92,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91 202 | 2009.0,1.0,12925.41,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71 203 | 2009.0,2.0,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19 204 | 2009.0,3.0,12990.341,9256.0,1486.398,1044.088,10040.6,216.385,1673.9,0.12,9.6,308.013,3.56,-3.44 205 | -------------------------------------------------------------------------------- /matplotlib/examples/tips.csv: -------------------------------------------------------------------------------- 1 | total_bill,tip,smoker,day,time,size 2 | 16.99,1.01,No,Sun,Dinner,2 3 | 10.34,1.66,No,Sun,Dinner,3 4 | 21.01,3.5,No,Sun,Dinner,3 5 | 23.68,3.31,No,Sun,Dinner,2 6 | 24.59,3.61,No,Sun,Dinner,4 7 | 25.29,4.71,No,Sun,Dinner,4 8 | 8.77,2.0,No,Sun,Dinner,2 9 | 26.88,3.12,No,Sun,Dinner,4 10 | 15.04,1.96,No,Sun,Dinner,2 11 | 14.78,3.23,No,Sun,Dinner,2 12 | 10.27,1.71,No,Sun,Dinner,2 13 | 35.26,5.0,No,Sun,Dinner,4 14 | 15.42,1.57,No,Sun,Dinner,2 15 | 18.43,3.0,No,Sun,Dinner,4 16 | 14.83,3.02,No,Sun,Dinner,2 17 | 21.58,3.92,No,Sun,Dinner,2 18 | 10.33,1.67,No,Sun,Dinner,3 19 | 16.29,3.71,No,Sun,Dinner,3 20 | 16.97,3.5,No,Sun,Dinner,3 21 | 20.65,3.35,No,Sat,Dinner,3 22 | 17.92,4.08,No,Sat,Dinner,2 23 | 20.29,2.75,No,Sat,Dinner,2 24 | 15.77,2.23,No,Sat,Dinner,2 25 | 39.42,7.58,No,Sat,Dinner,4 26 | 19.82,3.18,No,Sat,Dinner,2 27 | 17.81,2.34,No,Sat,Dinner,4 28 | 13.37,2.0,No,Sat,Dinner,2 29 | 12.69,2.0,No,Sat,Dinner,2 30 | 21.7,4.3,No,Sat,Dinner,2 31 | 19.65,3.0,No,Sat,Dinner,2 32 | 9.55,1.45,No,Sat,Dinner,2 33 | 18.35,2.5,No,Sat,Dinner,4 34 | 15.06,3.0,No,Sat,Dinner,2 35 | 20.69,2.45,No,Sat,Dinner,4 36 | 17.78,3.27,No,Sat,Dinner,2 37 | 24.06,3.6,No,Sat,Dinner,3 38 | 16.31,2.0,No,Sat,Dinner,3 39 | 16.93,3.07,No,Sat,Dinner,3 40 | 18.69,2.31,No,Sat,Dinner,3 41 | 31.27,5.0,No,Sat,Dinner,3 42 | 16.04,2.24,No,Sat,Dinner,3 43 | 17.46,2.54,No,Sun,Dinner,2 44 | 13.94,3.06,No,Sun,Dinner,2 45 | 9.68,1.32,No,Sun,Dinner,2 46 | 30.4,5.6,No,Sun,Dinner,4 47 | 18.29,3.0,No,Sun,Dinner,2 48 | 22.23,5.0,No,Sun,Dinner,2 49 | 32.4,6.0,No,Sun,Dinner,4 50 | 28.55,2.05,No,Sun,Dinner,3 51 | 18.04,3.0,No,Sun,Dinner,2 52 | 12.54,2.5,No,Sun,Dinner,2 53 | 10.29,2.6,No,Sun,Dinner,2 54 | 34.81,5.2,No,Sun,Dinner,4 55 | 9.94,1.56,No,Sun,Dinner,2 56 | 25.56,4.34,No,Sun,Dinner,4 57 | 19.49,3.51,No,Sun,Dinner,2 58 | 38.01,3.0,Yes,Sat,Dinner,4 59 | 26.41,1.5,No,Sat,Dinner,2 60 | 11.24,1.76,Yes,Sat,Dinner,2 61 | 48.27,6.73,No,Sat,Dinner,4 62 | 20.29,3.21,Yes,Sat,Dinner,2 63 | 13.81,2.0,Yes,Sat,Dinner,2 64 | 11.02,1.98,Yes,Sat,Dinner,2 65 | 18.29,3.76,Yes,Sat,Dinner,4 66 | 17.59,2.64,No,Sat,Dinner,3 67 | 20.08,3.15,No,Sat,Dinner,3 68 | 16.45,2.47,No,Sat,Dinner,2 69 | 3.07,1.0,Yes,Sat,Dinner,1 70 | 20.23,2.01,No,Sat,Dinner,2 71 | 15.01,2.09,Yes,Sat,Dinner,2 72 | 12.02,1.97,No,Sat,Dinner,2 73 | 17.07,3.0,No,Sat,Dinner,3 74 | 26.86,3.14,Yes,Sat,Dinner,2 75 | 25.28,5.0,Yes,Sat,Dinner,2 76 | 14.73,2.2,No,Sat,Dinner,2 77 | 10.51,1.25,No,Sat,Dinner,2 78 | 17.92,3.08,Yes,Sat,Dinner,2 79 | 27.2,4.0,No,Thur,Lunch,4 80 | 22.76,3.0,No,Thur,Lunch,2 81 | 17.29,2.71,No,Thur,Lunch,2 82 | 19.44,3.0,Yes,Thur,Lunch,2 83 | 16.66,3.4,No,Thur,Lunch,2 84 | 10.07,1.83,No,Thur,Lunch,1 85 | 32.68,5.0,Yes,Thur,Lunch,2 86 | 15.98,2.03,No,Thur,Lunch,2 87 | 34.83,5.17,No,Thur,Lunch,4 88 | 13.03,2.0,No,Thur,Lunch,2 89 | 18.28,4.0,No,Thur,Lunch,2 90 | 24.71,5.85,No,Thur,Lunch,2 91 | 21.16,3.0,No,Thur,Lunch,2 92 | 28.97,3.0,Yes,Fri,Dinner,2 93 | 22.49,3.5,No,Fri,Dinner,2 94 | 5.75,1.0,Yes,Fri,Dinner,2 95 | 16.32,4.3,Yes,Fri,Dinner,2 96 | 22.75,3.25,No,Fri,Dinner,2 97 | 40.17,4.73,Yes,Fri,Dinner,4 98 | 27.28,4.0,Yes,Fri,Dinner,2 99 | 12.03,1.5,Yes,Fri,Dinner,2 100 | 21.01,3.0,Yes,Fri,Dinner,2 101 | 12.46,1.5,No,Fri,Dinner,2 102 | 11.35,2.5,Yes,Fri,Dinner,2 103 | 15.38,3.0,Yes,Fri,Dinner,2 104 | 44.3,2.5,Yes,Sat,Dinner,3 105 | 22.42,3.48,Yes,Sat,Dinner,2 106 | 20.92,4.08,No,Sat,Dinner,2 107 | 15.36,1.64,Yes,Sat,Dinner,2 108 | 20.49,4.06,Yes,Sat,Dinner,2 109 | 25.21,4.29,Yes,Sat,Dinner,2 110 | 18.24,3.76,No,Sat,Dinner,2 111 | 14.31,4.0,Yes,Sat,Dinner,2 112 | 14.0,3.0,No,Sat,Dinner,2 113 | 7.25,1.0,No,Sat,Dinner,1 114 | 38.07,4.0,No,Sun,Dinner,3 115 | 23.95,2.55,No,Sun,Dinner,2 116 | 25.71,4.0,No,Sun,Dinner,3 117 | 17.31,3.5,No,Sun,Dinner,2 118 | 29.93,5.07,No,Sun,Dinner,4 119 | 10.65,1.5,No,Thur,Lunch,2 120 | 12.43,1.8,No,Thur,Lunch,2 121 | 24.08,2.92,No,Thur,Lunch,4 122 | 11.69,2.31,No,Thur,Lunch,2 123 | 13.42,1.68,No,Thur,Lunch,2 124 | 14.26,2.5,No,Thur,Lunch,2 125 | 15.95,2.0,No,Thur,Lunch,2 126 | 12.48,2.52,No,Thur,Lunch,2 127 | 29.8,4.2,No,Thur,Lunch,6 128 | 8.52,1.48,No,Thur,Lunch,2 129 | 14.52,2.0,No,Thur,Lunch,2 130 | 11.38,2.0,No,Thur,Lunch,2 131 | 22.82,2.18,No,Thur,Lunch,3 132 | 19.08,1.5,No,Thur,Lunch,2 133 | 20.27,2.83,No,Thur,Lunch,2 134 | 11.17,1.5,No,Thur,Lunch,2 135 | 12.26,2.0,No,Thur,Lunch,2 136 | 18.26,3.25,No,Thur,Lunch,2 137 | 8.51,1.25,No,Thur,Lunch,2 138 | 10.33,2.0,No,Thur,Lunch,2 139 | 14.15,2.0,No,Thur,Lunch,2 140 | 16.0,2.0,Yes,Thur,Lunch,2 141 | 13.16,2.75,No,Thur,Lunch,2 142 | 17.47,3.5,No,Thur,Lunch,2 143 | 34.3,6.7,No,Thur,Lunch,6 144 | 41.19,5.0,No,Thur,Lunch,5 145 | 27.05,5.0,No,Thur,Lunch,6 146 | 16.43,2.3,No,Thur,Lunch,2 147 | 8.35,1.5,No,Thur,Lunch,2 148 | 18.64,1.36,No,Thur,Lunch,3 149 | 11.87,1.63,No,Thur,Lunch,2 150 | 9.78,1.73,No,Thur,Lunch,2 151 | 7.51,2.0,No,Thur,Lunch,2 152 | 14.07,2.5,No,Sun,Dinner,2 153 | 13.13,2.0,No,Sun,Dinner,2 154 | 17.26,2.74,No,Sun,Dinner,3 155 | 24.55,2.0,No,Sun,Dinner,4 156 | 19.77,2.0,No,Sun,Dinner,4 157 | 29.85,5.14,No,Sun,Dinner,5 158 | 48.17,5.0,No,Sun,Dinner,6 159 | 25.0,3.75,No,Sun,Dinner,4 160 | 13.39,2.61,No,Sun,Dinner,2 161 | 16.49,2.0,No,Sun,Dinner,4 162 | 21.5,3.5,No,Sun,Dinner,4 163 | 12.66,2.5,No,Sun,Dinner,2 164 | 16.21,2.0,No,Sun,Dinner,3 165 | 13.81,2.0,No,Sun,Dinner,2 166 | 17.51,3.0,Yes,Sun,Dinner,2 167 | 24.52,3.48,No,Sun,Dinner,3 168 | 20.76,2.24,No,Sun,Dinner,2 169 | 31.71,4.5,No,Sun,Dinner,4 170 | 10.59,1.61,Yes,Sat,Dinner,2 171 | 10.63,2.0,Yes,Sat,Dinner,2 172 | 50.81,10.0,Yes,Sat,Dinner,3 173 | 15.81,3.16,Yes,Sat,Dinner,2 174 | 7.25,5.15,Yes,Sun,Dinner,2 175 | 31.85,3.18,Yes,Sun,Dinner,2 176 | 16.82,4.0,Yes,Sun,Dinner,2 177 | 32.9,3.11,Yes,Sun,Dinner,2 178 | 17.89,2.0,Yes,Sun,Dinner,2 179 | 14.48,2.0,Yes,Sun,Dinner,2 180 | 9.6,4.0,Yes,Sun,Dinner,2 181 | 34.63,3.55,Yes,Sun,Dinner,2 182 | 34.65,3.68,Yes,Sun,Dinner,4 183 | 23.33,5.65,Yes,Sun,Dinner,2 184 | 45.35,3.5,Yes,Sun,Dinner,3 185 | 23.17,6.5,Yes,Sun,Dinner,4 186 | 40.55,3.0,Yes,Sun,Dinner,2 187 | 20.69,5.0,No,Sun,Dinner,5 188 | 20.9,3.5,Yes,Sun,Dinner,3 189 | 30.46,2.0,Yes,Sun,Dinner,5 190 | 18.15,3.5,Yes,Sun,Dinner,3 191 | 23.1,4.0,Yes,Sun,Dinner,3 192 | 15.69,1.5,Yes,Sun,Dinner,2 193 | 19.81,4.19,Yes,Thur,Lunch,2 194 | 28.44,2.56,Yes,Thur,Lunch,2 195 | 15.48,2.02,Yes,Thur,Lunch,2 196 | 16.58,4.0,Yes,Thur,Lunch,2 197 | 7.56,1.44,No,Thur,Lunch,2 198 | 10.34,2.0,Yes,Thur,Lunch,2 199 | 43.11,5.0,Yes,Thur,Lunch,4 200 | 13.0,2.0,Yes,Thur,Lunch,2 201 | 13.51,2.0,Yes,Thur,Lunch,2 202 | 18.71,4.0,Yes,Thur,Lunch,3 203 | 12.74,2.01,Yes,Thur,Lunch,2 204 | 13.0,2.0,Yes,Thur,Lunch,2 205 | 16.4,2.5,Yes,Thur,Lunch,2 206 | 20.53,4.0,Yes,Thur,Lunch,4 207 | 16.47,3.23,Yes,Thur,Lunch,3 208 | 26.59,3.41,Yes,Sat,Dinner,3 209 | 38.73,3.0,Yes,Sat,Dinner,4 210 | 24.27,2.03,Yes,Sat,Dinner,2 211 | 12.76,2.23,Yes,Sat,Dinner,2 212 | 30.06,2.0,Yes,Sat,Dinner,3 213 | 25.89,5.16,Yes,Sat,Dinner,4 214 | 48.33,9.0,No,Sat,Dinner,4 215 | 13.27,2.5,Yes,Sat,Dinner,2 216 | 28.17,6.5,Yes,Sat,Dinner,3 217 | 12.9,1.1,Yes,Sat,Dinner,2 218 | 28.15,3.0,Yes,Sat,Dinner,5 219 | 11.59,1.5,Yes,Sat,Dinner,2 220 | 7.74,1.44,Yes,Sat,Dinner,2 221 | 30.14,3.09,Yes,Sat,Dinner,4 222 | 12.16,2.2,Yes,Fri,Lunch,2 223 | 13.42,3.48,Yes,Fri,Lunch,2 224 | 8.58,1.92,Yes,Fri,Lunch,1 225 | 15.98,3.0,No,Fri,Lunch,3 226 | 13.42,1.58,Yes,Fri,Lunch,2 227 | 16.27,2.5,Yes,Fri,Lunch,2 228 | 10.09,2.0,Yes,Fri,Lunch,2 229 | 20.45,3.0,No,Sat,Dinner,4 230 | 13.28,2.72,No,Sat,Dinner,2 231 | 22.12,2.88,Yes,Sat,Dinner,2 232 | 24.01,2.0,Yes,Sat,Dinner,4 233 | 15.69,3.0,Yes,Sat,Dinner,3 234 | 11.61,3.39,No,Sat,Dinner,2 235 | 10.77,1.47,No,Sat,Dinner,2 236 | 15.53,3.0,Yes,Sat,Dinner,2 237 | 10.07,1.25,No,Sat,Dinner,2 238 | 12.6,1.0,Yes,Sat,Dinner,2 239 | 32.83,1.17,Yes,Sat,Dinner,2 240 | 35.83,4.67,No,Sat,Dinner,3 241 | 29.03,5.92,No,Sat,Dinner,3 242 | 27.18,2.0,Yes,Sat,Dinner,2 243 | 22.67,2.0,Yes,Sat,Dinner,2 244 | 17.82,1.75,No,Sat,Dinner,2 245 | 18.78,3.0,No,Thur,Dinner,2 246 | -------------------------------------------------------------------------------- /matplotlib/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3,1.4,0.1,setosa 15 | 4.3,3,1.1,0.1,setosa 16 | 5.8,4,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5,3,1.6,0.2,setosa 28 | 5,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5,3.3,1.4,0.2,setosa 52 | 7,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5,2,3.5,1,versicolor 63 | 5.9,3,4.2,1.5,versicolor 64 | 6,2.2,4,1,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3,5,1.7,versicolor 80 | 6,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6,2.7,5.1,1.6,versicolor 86 | 5.4,3,4.5,1.5,versicolor 87 | 6,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3,4.1,1.3,versicolor 91 | 5.5,2.5,4,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3,4.6,1.4,versicolor 94 | 5.8,2.6,4,1.2,versicolor 95 | 5,2.3,3.3,1,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3,5.8,2.2,virginica 107 | 7.6,3,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3,5.5,2.1,virginica 115 | 5.7,2.5,5,2,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6,2.2,5,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2,virginica 124 | 7.7,2.8,6.7,2,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6,3,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3,5.2,2.3,virginica 148 | 6.3,2.5,5,1.9,virginica 149 | 6.5,3,5.2,2,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /matplotlib/plots/plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/matplotlib/plots/plot1-1.png -------------------------------------------------------------------------------- /matplotlib/plots/plot1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/matplotlib/plots/plot1-2.png -------------------------------------------------------------------------------- /matplotlib/plots/plot10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/matplotlib/plots/plot10_1.png -------------------------------------------------------------------------------- /matplotlib/plots/plot10_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/matplotlib/plots/plot10_2.png -------------------------------------------------------------------------------- /matplotlib/plots/plot3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/matplotlib/plots/plot3-1.png -------------------------------------------------------------------------------- /matplotlib/snippets.txt: -------------------------------------------------------------------------------- 1 | # Part 9 2 | # Another way to do it without clearing the Axis 3 | from itertools import count 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | from matplotlib.animation import FuncAnimation 7 | 8 | plt.style.use('fivethirtyeight') 9 | 10 | x_vals = [] 11 | y_vals = [] 12 | 13 | plt.plot([], [], label='Channel 1') 14 | plt.plot([], [], label='Channel 2') 15 | 16 | 17 | def animate(i): 18 | data = pd.read_csv('data.csv') 19 | x = data['x_value'] 20 | y1 = data['total_1'] 21 | y2 = data['total_2'] 22 | 23 | ax = plt.gca() 24 | line1, line2 = ax.lines 25 | 26 | line1.set_data(x, y1) 27 | line2.set_data(x, y2) 28 | 29 | xlim_low, xlim_high = ax.get_xlim() 30 | ylim_low, ylim_high = ax.get_ylim() 31 | 32 | ax.set_xlim(xlim_low, (x.max() + 5)) 33 | 34 | y1max = y1.max() 35 | y2max = y2.max() 36 | current_ymax = y1max if (y1max > y2max) else y2max 37 | 38 | y1min = y1.min() 39 | y2min = y2.min() 40 | current_ymin = y1min if (y1min < y2min) else y2min 41 | 42 | ax.set_ylim((current_ymin - 5), (current_ymax + 5)) 43 | 44 | 45 | ani = FuncAnimation(plt.gcf(), animate, interval=1000) 46 | 47 | plt.legend() 48 | plt.tight_layout() 49 | plt.show() 50 | -------------------------------------------------------------------------------- /numpy/data.txt: -------------------------------------------------------------------------------- 1 | 1,13,21,11,196,75,4,3,34,6,7,8,0,1,2,3,4,5 2 | 3,42,12,33,766,75,4,55,6,4,3,4,5,6,7,0,11,12 3 | 1,22,33,11,999,11,2,1,78,0,1,2,9,8,7,1,76,88 4 | -------------------------------------------------------------------------------- /pandas/Data Loading, Storage, and File Formats.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2019-12-24T17:44:12.942927Z", 19 | "start_time": "2019-12-24T17:44:11.943916Z" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import pandas as pd\n", 25 | "import numpy as np" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Reading and Writing Data in Text Format" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "ExecuteTime": { 40 | "end_time": "2019-12-24T17:44:12.966381Z", 41 | "start_time": "2019-12-24T17:44:12.945392Z" 42 | } 43 | }, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "a,b,c,d,message\n", 50 | "1,2,3,4,hello\n", 51 | "5,6,7,8,world\n", 52 | "9,10,11,12,foo\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "!type examples\\ex1.csv" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": { 64 | "ExecuteTime": { 65 | "end_time": "2019-12-24T17:44:13.113898Z", 66 | "start_time": "2019-12-24T17:44:12.967654Z" 67 | } 68 | }, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/html": [ 73 | "
\n", 74 | "\n", 87 | "\n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | "
abcdmessage
01234hello
15678world
29101112foo
\n", 125 | "
" 126 | ], 127 | "text/plain": [ 128 | " a b c d message\n", 129 | "0 1 2 3 4 hello\n", 130 | "1 5 6 7 8 world\n", 131 | "2 9 10 11 12 foo" 132 | ] 133 | }, 134 | "execution_count": 3, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "df = pd.read_csv(r'examples\\ex1.csv')\n", 141 | "df" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 4, 147 | "metadata": { 148 | "ExecuteTime": { 149 | "end_time": "2019-12-24T17:44:13.284776Z", 150 | "start_time": "2019-12-24T17:44:13.116362Z" 151 | } 152 | }, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/html": [ 157 | "
\n", 158 | "\n", 171 | "\n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
abcdmessage
01234hello
15678world
29101112foo
\n", 209 | "
" 210 | ], 211 | "text/plain": [ 212 | " a b c d message\n", 213 | "0 1 2 3 4 hello\n", 214 | "1 5 6 7 8 world\n", 215 | "2 9 10 11 12 foo" 216 | ] 217 | }, 218 | "execution_count": 4, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "pd.read_table('examples/ex1.csv', sep=',')" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "A file will not always have a header row. Consider this file" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 5, 237 | "metadata": { 238 | "ExecuteTime": { 239 | "end_time": "2019-12-24T17:44:13.454943Z", 240 | "start_time": "2019-12-24T17:44:13.290845Z" 241 | } 242 | }, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "1,2,3,4,hello\n", 249 | "5,6,7,8,world\n", 250 | "9,10,11,12,foo\n" 251 | ] 252 | } 253 | ], 254 | "source": [ 255 | "!type examples\\ex2.csv" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 6, 261 | "metadata": { 262 | "ExecuteTime": { 263 | "end_time": "2019-12-24T17:44:13.597498Z", 264 | "start_time": "2019-12-24T17:44:13.458877Z" 265 | } 266 | }, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/html": [ 271 | "
\n", 272 | "\n", 285 | "\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | "
01234
01234hello
15678world
29101112foo
\n", 323 | "
" 324 | ], 325 | "text/plain": [ 326 | " 0 1 2 3 4\n", 327 | "0 1 2 3 4 hello\n", 328 | "1 5 6 7 8 world\n", 329 | "2 9 10 11 12 foo" 330 | ] 331 | }, 332 | "execution_count": 6, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "pd.read_csv(r'examples/ex2.csv', header=None)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 7, 344 | "metadata": { 345 | "ExecuteTime": { 346 | "end_time": "2019-12-24T17:44:13.738188Z", 347 | "start_time": "2019-12-24T17:44:13.609101Z" 348 | } 349 | }, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/html": [ 354 | "
\n", 355 | "\n", 368 | "\n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | "
abcdmessage
01234hello
15678world
29101112foo
\n", 406 | "
" 407 | ], 408 | "text/plain": [ 409 | " a b c d message\n", 410 | "0 1 2 3 4 hello\n", 411 | "1 5 6 7 8 world\n", 412 | "2 9 10 11 12 foo" 413 | ] 414 | }, 415 | "execution_count": 7, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "pd.read_csv(r'examples/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 8, 427 | "metadata": { 428 | "ExecuteTime": { 429 | "end_time": "2019-12-24T17:44:13.885401Z", 430 | "start_time": "2019-12-24T17:44:13.741950Z" 431 | } 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "names=['a', 'b', 'c', 'd', 'message']" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 9, 441 | "metadata": { 442 | "ExecuteTime": { 443 | "end_time": "2019-12-24T17:44:14.053188Z", 444 | "start_time": "2019-12-24T17:44:13.885401Z" 445 | } 446 | }, 447 | "outputs": [ 448 | { 449 | "data": { 450 | "text/html": [ 451 | "
\n", 452 | "\n", 465 | "\n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | "
abcd
message
hello1234
world5678
foo9101112
\n", 506 | "
" 507 | ], 508 | "text/plain": [ 509 | " a b c d\n", 510 | "message \n", 511 | "hello 1 2 3 4\n", 512 | "world 5 6 7 8\n", 513 | "foo 9 10 11 12" 514 | ] 515 | }, 516 | "execution_count": 9, 517 | "metadata": {}, 518 | "output_type": "execute_result" 519 | } 520 | ], 521 | "source": [ 522 | "# make message column to be the index of the returned DataFrame\n", 523 | "\n", 524 | "pd.read_csv(r'examples/ex2.csv', names=names, index_col='message')" 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "In the event that you want to form a hierarchical index from multiple columns, pass a list of column numbers or names" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 10, 537 | "metadata": { 538 | "ExecuteTime": { 539 | "end_time": "2019-12-24T17:44:14.240514Z", 540 | "start_time": "2019-12-24T17:44:14.054189Z" 541 | } 542 | }, 543 | "outputs": [ 544 | { 545 | "name": "stdout", 546 | "output_type": "stream", 547 | "text": [ 548 | "key1,key2,value1,value2\n", 549 | "one,a,1,2\n", 550 | "one,b,3,4\n", 551 | "one,c,5,6\n", 552 | "one,d,7,8\n", 553 | "two,a,9,10\n", 554 | "two,b,11,12\n", 555 | "two,c,13,14\n", 556 | "two,d,15,16\n" 557 | ] 558 | } 559 | ], 560 | "source": [ 561 | "!type examples\\csv_mindex.csv" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 11, 567 | "metadata": { 568 | "ExecuteTime": { 569 | "end_time": "2019-12-24T17:44:14.396063Z", 570 | "start_time": "2019-12-24T17:44:14.242881Z" 571 | } 572 | }, 573 | "outputs": [ 574 | { 575 | "data": { 576 | "text/html": [ 577 | "
\n", 578 | "\n", 591 | "\n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | "
value1value2
key1key2
onea12
b34
c56
d78
twoa910
b1112
c1314
d1516
\n", 651 | "
" 652 | ], 653 | "text/plain": [ 654 | " value1 value2\n", 655 | "key1 key2 \n", 656 | "one a 1 2\n", 657 | " b 3 4\n", 658 | " c 5 6\n", 659 | " d 7 8\n", 660 | "two a 9 10\n", 661 | " b 11 12\n", 662 | " c 13 14\n", 663 | " d 15 16" 664 | ] 665 | }, 666 | "execution_count": 11, 667 | "metadata": {}, 668 | "output_type": "execute_result" 669 | } 670 | ], 671 | "source": [ 672 | "parsed = pd.read_csv(r'examples/csv_mindex.csv', index_col=['key1', 'key2'])\n", 673 | "parsed" 674 | ] 675 | }, 676 | { 677 | "cell_type": "markdown", 678 | "metadata": {}, 679 | "source": [ 680 | "In some cases, a table might not have a fixed delimiter, using whitespace or some other pattern to separate fields." 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": 12, 686 | "metadata": { 687 | "ExecuteTime": { 688 | "end_time": "2019-12-24T17:44:14.540609Z", 689 | "start_time": "2019-12-24T17:44:14.397062Z" 690 | } 691 | }, 692 | "outputs": [ 693 | { 694 | "data": { 695 | "text/plain": [ 696 | "[' A B C\\n',\n", 697 | " 'aaa -0.264438 -1.026059 -0.619500\\n',\n", 698 | " 'bbb 0.927272 0.302904 -0.032399\\n',\n", 699 | " 'ccc -0.264273 -0.386314 -0.217601\\n',\n", 700 | " 'ddd -0.871858 -0.348382 1.100491\\n']" 701 | ] 702 | }, 703 | "execution_count": 12, 704 | "metadata": {}, 705 | "output_type": "execute_result" 706 | } 707 | ], 708 | "source": [ 709 | "list(open(r'examples/ex3.txt'))" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 13, 715 | "metadata": { 716 | "ExecuteTime": { 717 | "end_time": "2019-12-24T17:44:14.697406Z", 718 | "start_time": "2019-12-24T17:44:14.546683Z" 719 | } 720 | }, 721 | "outputs": [ 722 | { 723 | "data": { 724 | "text/html": [ 725 | "
\n", 726 | "\n", 739 | "\n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | "
ABC
aaa-0.264438-1.026059-0.619500
bbb0.9272720.302904-0.032399
ccc-0.264273-0.386314-0.217601
ddd-0.871858-0.3483821.100491
\n", 775 | "
" 776 | ], 777 | "text/plain": [ 778 | " A B C\n", 779 | "aaa -0.264438 -1.026059 -0.619500\n", 780 | "bbb 0.927272 0.302904 -0.032399\n", 781 | "ccc -0.264273 -0.386314 -0.217601\n", 782 | "ddd -0.871858 -0.348382 1.100491" 783 | ] 784 | }, 785 | "execution_count": 13, 786 | "metadata": {}, 787 | "output_type": "execute_result" 788 | } 789 | ], 790 | "source": [ 791 | "result = pd.read_table(r'examples/ex3.txt', sep='\\s+')\n", 792 | "result" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": 14, 798 | "metadata": { 799 | "ExecuteTime": { 800 | "end_time": "2019-12-24T17:44:14.864981Z", 801 | "start_time": "2019-12-24T17:44:14.700337Z" 802 | } 803 | }, 804 | "outputs": [ 805 | { 806 | "name": "stdout", 807 | "output_type": "stream", 808 | "text": [ 809 | "# hey!\n", 810 | "a,b,c,d,message\n", 811 | "# just wanted to make things more difficult for you\n", 812 | "# who reads CSV files with computers, anyway?\n", 813 | "1,2,3,4,hello\n", 814 | "5,6,7,8,world\n", 815 | "9,10,11,12,foo\n" 816 | ] 817 | } 818 | ], 819 | "source": [ 820 | "!type examples\\ex4.csv" 821 | ] 822 | }, 823 | { 824 | "cell_type": "code", 825 | "execution_count": 15, 826 | "metadata": { 827 | "ExecuteTime": { 828 | "end_time": "2019-12-24T17:44:14.990299Z", 829 | "start_time": "2019-12-24T17:44:14.867993Z" 830 | } 831 | }, 832 | "outputs": [ 833 | { 834 | "data": { 835 | "text/html": [ 836 | "
\n", 837 | "\n", 850 | "\n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | "
abcdmessage
01234hello
15678world
29101112foo
\n", 888 | "
" 889 | ], 890 | "text/plain": [ 891 | " a b c d message\n", 892 | "0 1 2 3 4 hello\n", 893 | "1 5 6 7 8 world\n", 894 | "2 9 10 11 12 foo" 895 | ] 896 | }, 897 | "execution_count": 15, 898 | "metadata": {}, 899 | "output_type": "execute_result" 900 | } 901 | ], 902 | "source": [ 903 | "pd.read_csv(r'examples/ex4.csv', skiprows=[0, 2,3])" 904 | ] 905 | }, 906 | { 907 | "cell_type": "markdown", 908 | "metadata": {}, 909 | "source": [ 910 | "Handling missing values is an important and frequently nuanced part of the file parsing process. Missing data is usually either not present (empty string) or marked by some sentinel value. By default, pandas uses a set of commonly occurring sentinels, such as **NA** and **NUL**" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": 16, 916 | "metadata": { 917 | "ExecuteTime": { 918 | "end_time": "2019-12-24T17:44:15.168025Z", 919 | "start_time": "2019-12-24T17:44:14.994393Z" 920 | } 921 | }, 922 | "outputs": [ 923 | { 924 | "name": "stdout", 925 | "output_type": "stream", 926 | "text": [ 927 | "something,a,b,c,d,message\n", 928 | "one,1,2,3,4,NA\n", 929 | "two,5,6,,8,world\n", 930 | "three,9,10,11,12,foo\n" 931 | ] 932 | } 933 | ], 934 | "source": [ 935 | "!type examples\\ex5.csv" 936 | ] 937 | }, 938 | { 939 | "cell_type": "code", 940 | "execution_count": 17, 941 | "metadata": { 942 | "ExecuteTime": { 943 | "end_time": "2019-12-24T17:44:15.304991Z", 944 | "start_time": "2019-12-24T17:44:15.169022Z" 945 | } 946 | }, 947 | "outputs": [ 948 | { 949 | "data": { 950 | "text/html": [ 951 | "
\n", 952 | "\n", 965 | "\n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | "
somethingabcdmessage
0one123.04NaN
1two56NaN8world
2three91011.012foo
\n", 1007 | "
" 1008 | ], 1009 | "text/plain": [ 1010 | " something a b c d message\n", 1011 | "0 one 1 2 3.0 4 NaN\n", 1012 | "1 two 5 6 NaN 8 world\n", 1013 | "2 three 9 10 11.0 12 foo" 1014 | ] 1015 | }, 1016 | "execution_count": 17, 1017 | "metadata": {}, 1018 | "output_type": "execute_result" 1019 | } 1020 | ], 1021 | "source": [ 1022 | "result = pd.read_csv(r'examples/ex5.csv')\n", 1023 | "result" 1024 | ] 1025 | }, 1026 | { 1027 | "cell_type": "code", 1028 | "execution_count": 18, 1029 | "metadata": { 1030 | "ExecuteTime": { 1031 | "end_time": "2019-12-24T17:44:15.550424Z", 1032 | "start_time": "2019-12-24T17:44:15.307795Z" 1033 | } 1034 | }, 1035 | "outputs": [ 1036 | { 1037 | "data": { 1038 | "text/html": [ 1039 | "
\n", 1040 | "\n", 1053 | "\n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | "
somethingabcdmessage
0FalseFalseFalseFalseFalseTrue
1FalseFalseFalseTrueFalseFalse
2FalseFalseFalseFalseFalseFalse
\n", 1095 | "
" 1096 | ], 1097 | "text/plain": [ 1098 | " something a b c d message\n", 1099 | "0 False False False False False True\n", 1100 | "1 False False False True False False\n", 1101 | "2 False False False False False False" 1102 | ] 1103 | }, 1104 | "execution_count": 18, 1105 | "metadata": {}, 1106 | "output_type": "execute_result" 1107 | } 1108 | ], 1109 | "source": [ 1110 | "pd.isnull(result)" 1111 | ] 1112 | }, 1113 | { 1114 | "cell_type": "markdown", 1115 | "metadata": {}, 1116 | "source": [ 1117 | "The na_values option can take either a list or set of strings to consider missing values" 1118 | ] 1119 | }, 1120 | { 1121 | "cell_type": "code", 1122 | "execution_count": 19, 1123 | "metadata": { 1124 | "ExecuteTime": { 1125 | "end_time": "2019-12-24T17:44:15.711268Z", 1126 | "start_time": "2019-12-24T17:44:15.554376Z" 1127 | } 1128 | }, 1129 | "outputs": [ 1130 | { 1131 | "data": { 1132 | "text/html": [ 1133 | "
\n", 1134 | "\n", 1147 | "\n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | "
somethingabcdmessage
0one123.04NaN
1two56NaN8world
2three91011.012foo
\n", 1189 | "
" 1190 | ], 1191 | "text/plain": [ 1192 | " something a b c d message\n", 1193 | "0 one 1 2 3.0 4 NaN\n", 1194 | "1 two 5 6 NaN 8 world\n", 1195 | "2 three 9 10 11.0 12 foo" 1196 | ] 1197 | }, 1198 | "execution_count": 19, 1199 | "metadata": {}, 1200 | "output_type": "execute_result" 1201 | } 1202 | ], 1203 | "source": [ 1204 | "result = pd.read_csv(r'examples/ex5.csv', na_values=['NULL'])\n", 1205 | "result" 1206 | ] 1207 | }, 1208 | { 1209 | "cell_type": "code", 1210 | "execution_count": 20, 1211 | "metadata": { 1212 | "ExecuteTime": { 1213 | "end_time": "2019-12-24T17:44:15.847179Z", 1214 | "start_time": "2019-12-24T17:44:15.711268Z" 1215 | } 1216 | }, 1217 | "outputs": [], 1218 | "source": [ 1219 | "# Different NA sentinels can be specified for each column in a dict\n", 1220 | "\n", 1221 | "sentinels = {'message': ['foo', 'NA'], 'something':['two']}" 1222 | ] 1223 | }, 1224 | { 1225 | "cell_type": "code", 1226 | "execution_count": 21, 1227 | "metadata": { 1228 | "ExecuteTime": { 1229 | "end_time": "2019-12-24T17:44:16.005092Z", 1230 | "start_time": "2019-12-24T17:44:15.851139Z" 1231 | } 1232 | }, 1233 | "outputs": [ 1234 | { 1235 | "data": { 1236 | "text/html": [ 1237 | "
\n", 1238 | "\n", 1251 | "\n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | "
somethingabcdmessage
0one123.04NaN
1NaN56NaN8world
2three91011.012NaN
\n", 1293 | "
" 1294 | ], 1295 | "text/plain": [ 1296 | " something a b c d message\n", 1297 | "0 one 1 2 3.0 4 NaN\n", 1298 | "1 NaN 5 6 NaN 8 world\n", 1299 | "2 three 9 10 11.0 12 NaN" 1300 | ] 1301 | }, 1302 | "execution_count": 21, 1303 | "metadata": {}, 1304 | "output_type": "execute_result" 1305 | } 1306 | ], 1307 | "source": [ 1308 | "pd.read_csv(r'examples/ex5.csv', na_values=sentinels)" 1309 | ] 1310 | }, 1311 | { 1312 | "cell_type": "markdown", 1313 | "metadata": { 1314 | "ExecuteTime": { 1315 | "end_time": "2019-12-23T12:59:01.714350Z", 1316 | "start_time": "2019-12-23T12:59:01.697710Z" 1317 | } 1318 | }, 1319 | "source": [ 1320 | "## Reading Text Files in Pieces" 1321 | ] 1322 | }, 1323 | { 1324 | "cell_type": "markdown", 1325 | "metadata": {}, 1326 | "source": [ 1327 | "When processing very large files or figuring out the right set of arguments to correctly process a large file, you may only want to read in a small piece of a file or iterate\n", 1328 | "through smaller chunks of the file.Before we look at a large file, we make the pandas display settings more compact" 1329 | ] 1330 | }, 1331 | { 1332 | "cell_type": "code", 1333 | "execution_count": 22, 1334 | "metadata": { 1335 | "ExecuteTime": { 1336 | "end_time": "2019-12-24T17:44:16.156659Z", 1337 | "start_time": "2019-12-24T17:44:16.007763Z" 1338 | } 1339 | }, 1340 | "outputs": [], 1341 | "source": [ 1342 | "pd.options.display.max_rows = 10" 1343 | ] 1344 | }, 1345 | { 1346 | "cell_type": "code", 1347 | "execution_count": 23, 1348 | "metadata": { 1349 | "ExecuteTime": { 1350 | "end_time": "2019-12-24T17:44:16.353691Z", 1351 | "start_time": "2019-12-24T17:44:16.161827Z" 1352 | } 1353 | }, 1354 | "outputs": [ 1355 | { 1356 | "data": { 1357 | "text/html": [ 1358 | "
\n", 1359 | "\n", 1372 | "\n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | "
onetwothreefourkey
00.467976-0.038649-0.295344-1.824726L
1-0.3588931.4044530.704965-0.200638B
2-0.5018400.659254-0.421691-0.057688G
30.2048861.0741341.388361-0.982404R
40.354628-0.1331160.283763-0.837063Q
..................
99952.311896-0.417070-1.409599-0.515821L
9996-0.479893-0.6504190.745152-0.646038E
99970.5233310.7871120.4860661.093156K
9998-0.3625590.598894-1.8432010.887292G
9999-0.096376-1.012999-0.657431-0.5733150
\n", 1474 | "

10000 rows × 5 columns

\n", 1475 | "
" 1476 | ], 1477 | "text/plain": [ 1478 | " one two three four key\n", 1479 | "0 0.467976 -0.038649 -0.295344 -1.824726 L\n", 1480 | "1 -0.358893 1.404453 0.704965 -0.200638 B\n", 1481 | "2 -0.501840 0.659254 -0.421691 -0.057688 G\n", 1482 | "3 0.204886 1.074134 1.388361 -0.982404 R\n", 1483 | "4 0.354628 -0.133116 0.283763 -0.837063 Q\n", 1484 | "... ... ... ... ... ..\n", 1485 | "9995 2.311896 -0.417070 -1.409599 -0.515821 L\n", 1486 | "9996 -0.479893 -0.650419 0.745152 -0.646038 E\n", 1487 | "9997 0.523331 0.787112 0.486066 1.093156 K\n", 1488 | "9998 -0.362559 0.598894 -1.843201 0.887292 G\n", 1489 | "9999 -0.096376 -1.012999 -0.657431 -0.573315 0\n", 1490 | "\n", 1491 | "[10000 rows x 5 columns]" 1492 | ] 1493 | }, 1494 | "execution_count": 23, 1495 | "metadata": {}, 1496 | "output_type": "execute_result" 1497 | } 1498 | ], 1499 | "source": [ 1500 | "result = pd.read_csv(r'examples/ex6.csv')\n", 1501 | "result" 1502 | ] 1503 | }, 1504 | { 1505 | "cell_type": "code", 1506 | "execution_count": 24, 1507 | "metadata": { 1508 | "ExecuteTime": { 1509 | "end_time": "2019-12-24T17:44:16.491655Z", 1510 | "start_time": "2019-12-24T17:44:16.356466Z" 1511 | } 1512 | }, 1513 | "outputs": [ 1514 | { 1515 | "data": { 1516 | "text/html": [ 1517 | "
\n", 1518 | "\n", 1531 | "\n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | "
onetwothreefourkey
00.467976-0.038649-0.295344-1.824726L
1-0.3588931.4044530.704965-0.200638B
2-0.5018400.659254-0.421691-0.057688G
30.2048861.0741341.388361-0.982404R
40.354628-0.1331160.283763-0.837063Q
\n", 1585 | "
" 1586 | ], 1587 | "text/plain": [ 1588 | " one two three four key\n", 1589 | "0 0.467976 -0.038649 -0.295344 -1.824726 L\n", 1590 | "1 -0.358893 1.404453 0.704965 -0.200638 B\n", 1591 | "2 -0.501840 0.659254 -0.421691 -0.057688 G\n", 1592 | "3 0.204886 1.074134 1.388361 -0.982404 R\n", 1593 | "4 0.354628 -0.133116 0.283763 -0.837063 Q" 1594 | ] 1595 | }, 1596 | "execution_count": 24, 1597 | "metadata": {}, 1598 | "output_type": "execute_result" 1599 | } 1600 | ], 1601 | "source": [ 1602 | "# nrows - read a small number of rows \n", 1603 | "\n", 1604 | "pd.read_csv(r'examples/ex6.csv', nrows=5)" 1605 | ] 1606 | }, 1607 | { 1608 | "cell_type": "markdown", 1609 | "metadata": {}, 1610 | "source": [ 1611 | "To read a file in pieces, specify a **chunksize** as a number of rows\n", 1612 | "\n", 1613 | "The TextParser object returned by read_csv allows you to iterate over the parts of the file according to the chunksize." 1614 | ] 1615 | }, 1616 | { 1617 | "cell_type": "code", 1618 | "execution_count": 25, 1619 | "metadata": { 1620 | "ExecuteTime": { 1621 | "end_time": "2019-12-24T17:44:16.639722Z", 1622 | "start_time": "2019-12-24T17:44:16.497681Z" 1623 | } 1624 | }, 1625 | "outputs": [ 1626 | { 1627 | "data": { 1628 | "text/plain": [ 1629 | "" 1630 | ] 1631 | }, 1632 | "execution_count": 25, 1633 | "metadata": {}, 1634 | "output_type": "execute_result" 1635 | } 1636 | ], 1637 | "source": [ 1638 | "chunker = pd.read_csv(r'examples/ex6.csv', chunksize=1000)\n", 1639 | "chunker" 1640 | ] 1641 | }, 1642 | { 1643 | "cell_type": "code", 1644 | "execution_count": 26, 1645 | "metadata": { 1646 | "ExecuteTime": { 1647 | "end_time": "2019-12-24T17:44:16.931145Z", 1648 | "start_time": "2019-12-24T17:44:16.643350Z" 1649 | } 1650 | }, 1651 | "outputs": [], 1652 | "source": [ 1653 | "chunker = pd.read_csv(r'examples/ex6.csv', chunksize=1000)\n", 1654 | "tot = pd.Series([])\n", 1655 | "for piece in chunker:\n", 1656 | " tot = tot.add(piece['key'].value_counts(), fill_value=0)\n", 1657 | "tot = tot.sort_values(ascending=False)" 1658 | ] 1659 | }, 1660 | { 1661 | "cell_type": "code", 1662 | "execution_count": 27, 1663 | "metadata": { 1664 | "ExecuteTime": { 1665 | "end_time": "2019-12-24T17:44:16.948713Z", 1666 | "start_time": "2019-12-24T17:44:16.933227Z" 1667 | } 1668 | }, 1669 | "outputs": [ 1670 | { 1671 | "data": { 1672 | "text/plain": [ 1673 | "E 368.0\n", 1674 | "X 364.0\n", 1675 | "L 346.0\n", 1676 | "O 343.0\n", 1677 | "Q 340.0\n", 1678 | "M 338.0\n", 1679 | "J 337.0\n", 1680 | "F 335.0\n", 1681 | "K 334.0\n", 1682 | "H 330.0\n", 1683 | "dtype: float64" 1684 | ] 1685 | }, 1686 | "execution_count": 27, 1687 | "metadata": {}, 1688 | "output_type": "execute_result" 1689 | } 1690 | ], 1691 | "source": [ 1692 | "tot[:10]" 1693 | ] 1694 | }, 1695 | { 1696 | "cell_type": "markdown", 1697 | "metadata": {}, 1698 | "source": [ 1699 | "## Writing Data to Text Format" 1700 | ] 1701 | }, 1702 | { 1703 | "cell_type": "code", 1704 | "execution_count": 28, 1705 | "metadata": { 1706 | "ExecuteTime": { 1707 | "end_time": "2019-12-24T17:44:17.102861Z", 1708 | "start_time": "2019-12-24T17:44:16.948713Z" 1709 | } 1710 | }, 1711 | "outputs": [ 1712 | { 1713 | "data": { 1714 | "text/html": [ 1715 | "
\n", 1716 | "\n", 1729 | "\n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | "
somethingabcdmessage
0one123.04NaN
1two56NaN8world
2three91011.012foo
\n", 1771 | "
" 1772 | ], 1773 | "text/plain": [ 1774 | " something a b c d message\n", 1775 | "0 one 1 2 3.0 4 NaN\n", 1776 | "1 two 5 6 NaN 8 world\n", 1777 | "2 three 9 10 11.0 12 foo" 1778 | ] 1779 | }, 1780 | "execution_count": 28, 1781 | "metadata": {}, 1782 | "output_type": "execute_result" 1783 | } 1784 | ], 1785 | "source": [ 1786 | "data = pd.read_csv(r'examples/ex5.csv')\n", 1787 | "data" 1788 | ] 1789 | }, 1790 | { 1791 | "cell_type": "markdown", 1792 | "metadata": {}, 1793 | "source": [ 1794 | "Using DataFrame’s **to_csv** method, we can write the data out to a comma separated file" 1795 | ] 1796 | }, 1797 | { 1798 | "cell_type": "code", 1799 | "execution_count": 29, 1800 | "metadata": { 1801 | "ExecuteTime": { 1802 | "end_time": "2019-12-24T17:44:17.249514Z", 1803 | "start_time": "2019-12-24T17:44:17.105944Z" 1804 | } 1805 | }, 1806 | "outputs": [], 1807 | "source": [ 1808 | "data.to_csv(r'examples/out.csv')" 1809 | ] 1810 | }, 1811 | { 1812 | "cell_type": "code", 1813 | "execution_count": 30, 1814 | "metadata": { 1815 | "ExecuteTime": { 1816 | "end_time": "2019-12-24T17:44:17.428423Z", 1817 | "start_time": "2019-12-24T17:44:17.257017Z" 1818 | } 1819 | }, 1820 | "outputs": [ 1821 | { 1822 | "name": "stdout", 1823 | "output_type": "stream", 1824 | "text": [ 1825 | ",something,a,b,c,d,message\n", 1826 | "0,one,1,2,3.0,4,\n", 1827 | "1,two,5,6,,8,world\n", 1828 | "2,three,9,10,11.0,12,foo\n" 1829 | ] 1830 | } 1831 | ], 1832 | "source": [ 1833 | "!type examples\\out.csv" 1834 | ] 1835 | }, 1836 | { 1837 | "cell_type": "code", 1838 | "execution_count": 31, 1839 | "metadata": { 1840 | "ExecuteTime": { 1841 | "end_time": "2019-12-24T17:44:17.544879Z", 1842 | "start_time": "2019-12-24T17:44:17.431586Z" 1843 | } 1844 | }, 1845 | "outputs": [ 1846 | { 1847 | "name": "stdout", 1848 | "output_type": "stream", 1849 | "text": [ 1850 | "|something|a|b|c|d|message\r\n", 1851 | "0|one|1|2|3.0|4|\r\n", 1852 | "1|two|5|6||8|world\r\n", 1853 | "2|three|9|10|11.0|12|foo\r\n" 1854 | ] 1855 | } 1856 | ], 1857 | "source": [ 1858 | "import sys\n", 1859 | "\n", 1860 | "data.to_csv(sys.stdout, sep='|')" 1861 | ] 1862 | }, 1863 | { 1864 | "cell_type": "markdown", 1865 | "metadata": {}, 1866 | "source": [ 1867 | "Missing values appear as empty strings in the output. You might want to denote them by some other sentinel value" 1868 | ] 1869 | }, 1870 | { 1871 | "cell_type": "code", 1872 | "execution_count": 32, 1873 | "metadata": { 1874 | "ExecuteTime": { 1875 | "end_time": "2019-12-24T17:44:17.693974Z", 1876 | "start_time": "2019-12-24T17:44:17.552304Z" 1877 | } 1878 | }, 1879 | "outputs": [ 1880 | { 1881 | "name": "stdout", 1882 | "output_type": "stream", 1883 | "text": [ 1884 | ",something,a,b,c,d,message\r\n", 1885 | "0,one,1,2,3.0,4,NULL\r\n", 1886 | "1,two,5,6,NULL,8,world\r\n", 1887 | "2,three,9,10,11.0,12,foo\r\n" 1888 | ] 1889 | } 1890 | ], 1891 | "source": [ 1892 | "data.to_csv(sys.stdout, na_rep='NULL')" 1893 | ] 1894 | }, 1895 | { 1896 | "cell_type": "code", 1897 | "execution_count": 33, 1898 | "metadata": { 1899 | "ExecuteTime": { 1900 | "end_time": "2019-12-24T17:44:17.831547Z", 1901 | "start_time": "2019-12-24T17:44:17.699052Z" 1902 | } 1903 | }, 1904 | "outputs": [ 1905 | { 1906 | "name": "stdout", 1907 | "output_type": "stream", 1908 | "text": [ 1909 | "one,1,2,3.0,4,\r\n", 1910 | "two,5,6,,8,world\r\n", 1911 | "three,9,10,11.0,12,foo\r\n" 1912 | ] 1913 | } 1914 | ], 1915 | "source": [ 1916 | "# With no other options specified, both the row and column labels \n", 1917 | "# are written. Both of these can be disabled\n", 1918 | "\n", 1919 | "data.to_csv(sys.stdout, index=False, header=False)" 1920 | ] 1921 | }, 1922 | { 1923 | "cell_type": "code", 1924 | "execution_count": 34, 1925 | "metadata": { 1926 | "ExecuteTime": { 1927 | "end_time": "2019-12-24T17:44:17.980296Z", 1928 | "start_time": "2019-12-24T17:44:17.836819Z" 1929 | } 1930 | }, 1931 | "outputs": [ 1932 | { 1933 | "name": "stdout", 1934 | "output_type": "stream", 1935 | "text": [ 1936 | "a,b,c\r\n", 1937 | "1,2,3.0\r\n", 1938 | "5,6,\r\n", 1939 | "9,10,11.0\r\n" 1940 | ] 1941 | } 1942 | ], 1943 | "source": [ 1944 | "# write only a subset of the columns, and in an order of your choosing\n", 1945 | "\n", 1946 | "data.to_csv(sys.stdout ,index=False, columns=['a', 'b', 'c'])" 1947 | ] 1948 | }, 1949 | { 1950 | "cell_type": "code", 1951 | "execution_count": 35, 1952 | "metadata": { 1953 | "ExecuteTime": { 1954 | "end_time": "2019-12-24T17:44:18.437609Z", 1955 | "start_time": "2019-12-24T17:44:17.984237Z" 1956 | } 1957 | }, 1958 | "outputs": [], 1959 | "source": [ 1960 | "#Series\n", 1961 | "\n", 1962 | "dates = pd.date_range('1/1/2020', periods=10)\n", 1963 | "ts = pd.Series(np.arange(10), index=dates)\n", 1964 | "ts.to_csv(r'examples/tseries.csv', header=False)" 1965 | ] 1966 | }, 1967 | { 1968 | "cell_type": "code", 1969 | "execution_count": 36, 1970 | "metadata": { 1971 | "ExecuteTime": { 1972 | "end_time": "2019-12-24T17:44:18.595013Z", 1973 | "start_time": "2019-12-24T17:44:18.443009Z" 1974 | } 1975 | }, 1976 | "outputs": [ 1977 | { 1978 | "name": "stdout", 1979 | "output_type": "stream", 1980 | "text": [ 1981 | "2020-01-01,0\n", 1982 | "2020-01-02,1\n", 1983 | "2020-01-03,2\n", 1984 | "2020-01-04,3\n", 1985 | "2020-01-05,4\n", 1986 | "2020-01-06,5\n", 1987 | "2020-01-07,6\n", 1988 | "2020-01-08,7\n", 1989 | "2020-01-09,8\n", 1990 | "2020-01-10,9\n" 1991 | ] 1992 | } 1993 | ], 1994 | "source": [ 1995 | "!type examples\\tseries.csv" 1996 | ] 1997 | }, 1998 | { 1999 | "cell_type": "markdown", 2000 | "metadata": {}, 2001 | "source": [ 2002 | "## Working with Delimited Formats\n" 2003 | ] 2004 | }, 2005 | { 2006 | "cell_type": "markdown", 2007 | "metadata": {}, 2008 | "source": [ 2009 | "It’s possible to load most forms of tabular data from disk using functions like pandas.read_table. In some cases, however, some manual processing may be necessary.\n", 2010 | "It’s not uncommon to receive a file with one or more malformed lines that trip up read_table" 2011 | ] 2012 | }, 2013 | { 2014 | "cell_type": "code", 2015 | "execution_count": 37, 2016 | "metadata": { 2017 | "ExecuteTime": { 2018 | "end_time": "2019-12-24T17:44:18.746774Z", 2019 | "start_time": "2019-12-24T17:44:18.596249Z" 2020 | } 2021 | }, 2022 | "outputs": [ 2023 | { 2024 | "name": "stdout", 2025 | "output_type": "stream", 2026 | "text": [ 2027 | "\"a\",\"b\",\"c\"\n", 2028 | "\"1\",\"2\",\"3\"\n", 2029 | "\"1\",\"2\",\"3\"\n" 2030 | ] 2031 | } 2032 | ], 2033 | "source": [ 2034 | "!type examples\\ex7.csv" 2035 | ] 2036 | }, 2037 | { 2038 | "cell_type": "markdown", 2039 | "metadata": {}, 2040 | "source": [ 2041 | "For any file with a single-character delimiter, you can use Python’s built-in csv module. To use it, pass any open file or file-like object to csv.reader" 2042 | ] 2043 | }, 2044 | { 2045 | "cell_type": "code", 2046 | "execution_count": 38, 2047 | "metadata": { 2048 | "ExecuteTime": { 2049 | "end_time": "2019-12-24T17:44:18.872970Z", 2050 | "start_time": "2019-12-24T17:44:18.749959Z" 2051 | } 2052 | }, 2053 | "outputs": [], 2054 | "source": [ 2055 | "import csv\n", 2056 | "\n", 2057 | "f = open(r'examples/ex7.csv')\n", 2058 | "reader = csv.reader(f)" 2059 | ] 2060 | }, 2061 | { 2062 | "cell_type": "code", 2063 | "execution_count": 39, 2064 | "metadata": { 2065 | "ExecuteTime": { 2066 | "end_time": "2019-12-24T17:44:19.022732Z", 2067 | "start_time": "2019-12-24T17:44:18.878009Z" 2068 | } 2069 | }, 2070 | "outputs": [ 2071 | { 2072 | "name": "stdout", 2073 | "output_type": "stream", 2074 | "text": [ 2075 | "['a', 'b', 'c']\n", 2076 | "['1', '2', '3']\n", 2077 | "['1', '2', '3']\n" 2078 | ] 2079 | } 2080 | ], 2081 | "source": [ 2082 | "for line in reader:\n", 2083 | " print(line)" 2084 | ] 2085 | }, 2086 | { 2087 | "cell_type": "code", 2088 | "execution_count": 40, 2089 | "metadata": { 2090 | "ExecuteTime": { 2091 | "end_time": "2019-12-24T17:44:19.162044Z", 2092 | "start_time": "2019-12-24T17:44:19.026915Z" 2093 | } 2094 | }, 2095 | "outputs": [], 2096 | "source": [ 2097 | "with open(r'examples/ex7.csv') as f:\n", 2098 | " lines = list(csv.reader(f))" 2099 | ] 2100 | }, 2101 | { 2102 | "cell_type": "code", 2103 | "execution_count": 41, 2104 | "metadata": { 2105 | "ExecuteTime": { 2106 | "end_time": "2019-12-24T17:44:19.313160Z", 2107 | "start_time": "2019-12-24T17:44:19.163107Z" 2108 | }, 2109 | "scrolled": true 2110 | }, 2111 | "outputs": [ 2112 | { 2113 | "name": "stdout", 2114 | "output_type": "stream", 2115 | "text": [ 2116 | "['a', 'b', 'c']\n", 2117 | "[['1', '2', '3'], ['1', '2', '3']]\n" 2118 | ] 2119 | } 2120 | ], 2121 | "source": [ 2122 | "header, values = lines[0], lines[1:]\n", 2123 | "print(header)\n", 2124 | "print(values)" 2125 | ] 2126 | }, 2127 | { 2128 | "cell_type": "code", 2129 | "execution_count": 42, 2130 | "metadata": { 2131 | "ExecuteTime": { 2132 | "end_time": "2019-12-24T17:44:19.469890Z", 2133 | "start_time": "2019-12-24T17:44:19.318193Z" 2134 | }, 2135 | "scrolled": false 2136 | }, 2137 | "outputs": [ 2138 | { 2139 | "name": "stdout", 2140 | "output_type": "stream", 2141 | "text": [ 2142 | "1 1\n", 2143 | "2 2\n", 2144 | "3 3\n" 2145 | ] 2146 | } 2147 | ], 2148 | "source": [ 2149 | "for i, j in zip(*values):\n", 2150 | " print(i, j)" 2151 | ] 2152 | }, 2153 | { 2154 | "cell_type": "code", 2155 | "execution_count": 43, 2156 | "metadata": { 2157 | "ExecuteTime": { 2158 | "end_time": "2019-12-24T17:44:19.610273Z", 2159 | "start_time": "2019-12-24T17:44:19.473506Z" 2160 | } 2161 | }, 2162 | "outputs": [], 2163 | "source": [ 2164 | "data_dict = {h: v for h, v in zip(header, zip(*values))}" 2165 | ] 2166 | }, 2167 | { 2168 | "cell_type": "code", 2169 | "execution_count": 44, 2170 | "metadata": { 2171 | "ExecuteTime": { 2172 | "end_time": "2019-12-24T17:44:19.754072Z", 2173 | "start_time": "2019-12-24T17:44:19.618224Z" 2174 | } 2175 | }, 2176 | "outputs": [ 2177 | { 2178 | "data": { 2179 | "text/plain": [ 2180 | "{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}" 2181 | ] 2182 | }, 2183 | "execution_count": 44, 2184 | "metadata": {}, 2185 | "output_type": "execute_result" 2186 | } 2187 | ], 2188 | "source": [ 2189 | "data_dict" 2190 | ] 2191 | }, 2192 | { 2193 | "cell_type": "markdown", 2194 | "metadata": { 2195 | "ExecuteTime": { 2196 | "end_time": "2019-12-23T16:17:13.529427Z", 2197 | "start_time": "2019-12-23T16:17:13.524002Z" 2198 | } 2199 | }, 2200 | "source": [ 2201 | "## JSON Data" 2202 | ] 2203 | }, 2204 | { 2205 | "cell_type": "code", 2206 | "execution_count": 45, 2207 | "metadata": { 2208 | "ExecuteTime": { 2209 | "end_time": "2019-12-24T17:44:19.901997Z", 2210 | "start_time": "2019-12-24T17:44:19.761474Z" 2211 | } 2212 | }, 2213 | "outputs": [], 2214 | "source": [ 2215 | "obj = '''\n", 2216 | "{\"name\": \"Wes\",\n", 2217 | " \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n", 2218 | " \"pet\": null,\n", 2219 | " \"siblings\": [{\"name\": \"Scott\", \"age\": 30, \"pets\": [\"Zeus\", \"Zuko\"]},\n", 2220 | " {\"name\": \"Katie\", \"age\": 38,\n", 2221 | " \"pets\": [\"Sixes\", \"Stache\", \"Cisco\"]}]\n", 2222 | "}\n", 2223 | "'''" 2224 | ] 2225 | }, 2226 | { 2227 | "cell_type": "code", 2228 | "execution_count": 46, 2229 | "metadata": { 2230 | "ExecuteTime": { 2231 | "end_time": "2019-12-24T17:44:20.068177Z", 2232 | "start_time": "2019-12-24T17:44:19.907459Z" 2233 | } 2234 | }, 2235 | "outputs": [], 2236 | "source": [ 2237 | "import json" 2238 | ] 2239 | }, 2240 | { 2241 | "cell_type": "code", 2242 | "execution_count": 47, 2243 | "metadata": { 2244 | "ExecuteTime": { 2245 | "end_time": "2019-12-24T17:44:20.222470Z", 2246 | "start_time": "2019-12-24T17:44:20.072556Z" 2247 | } 2248 | }, 2249 | "outputs": [ 2250 | { 2251 | "data": { 2252 | "text/plain": [ 2253 | "{'name': 'Wes',\n", 2254 | " 'places_lived': ['United States', 'Spain', 'Germany'],\n", 2255 | " 'pet': None,\n", 2256 | " 'siblings': [{'name': 'Scott', 'age': 30, 'pets': ['Zeus', 'Zuko']},\n", 2257 | " {'name': 'Katie', 'age': 38, 'pets': ['Sixes', 'Stache', 'Cisco']}]}" 2258 | ] 2259 | }, 2260 | "execution_count": 47, 2261 | "metadata": {}, 2262 | "output_type": "execute_result" 2263 | } 2264 | ], 2265 | "source": [ 2266 | "result = json.loads(obj)\n", 2267 | "result" 2268 | ] 2269 | }, 2270 | { 2271 | "cell_type": "code", 2272 | "execution_count": 48, 2273 | "metadata": { 2274 | "ExecuteTime": { 2275 | "end_time": "2019-12-24T17:44:20.371155Z", 2276 | "start_time": "2019-12-24T17:44:20.228491Z" 2277 | } 2278 | }, 2279 | "outputs": [], 2280 | "source": [ 2281 | "# json.dumps, on the other hand, converts a Python object back to JSON:\n", 2282 | "\n", 2283 | "asjson = json.dumps(result)" 2284 | ] 2285 | }, 2286 | { 2287 | "cell_type": "code", 2288 | "execution_count": 49, 2289 | "metadata": { 2290 | "ExecuteTime": { 2291 | "end_time": "2019-12-24T17:44:20.540849Z", 2292 | "start_time": "2019-12-24T17:44:20.377740Z" 2293 | } 2294 | }, 2295 | "outputs": [ 2296 | { 2297 | "data": { 2298 | "text/html": [ 2299 | "
\n", 2300 | "\n", 2313 | "\n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | "
nameagepets
0Scott30[Zeus, Zuko]
1Katie38[Sixes, Stache, Cisco]
\n", 2337 | "
" 2338 | ], 2339 | "text/plain": [ 2340 | " name age pets\n", 2341 | "0 Scott 30 [Zeus, Zuko]\n", 2342 | "1 Katie 38 [Sixes, Stache, Cisco]" 2343 | ] 2344 | }, 2345 | "execution_count": 49, 2346 | "metadata": {}, 2347 | "output_type": "execute_result" 2348 | } 2349 | ], 2350 | "source": [ 2351 | "siblings = pd.DataFrame(result['siblings'], columns=['name', 'age', 'pets'])\n", 2352 | "siblings" 2353 | ] 2354 | }, 2355 | { 2356 | "cell_type": "markdown", 2357 | "metadata": {}, 2358 | "source": [ 2359 | "The **pandas.read_json** can automatically convert JSON datasets in specific arrangements into a Series or DataFrame." 2360 | ] 2361 | }, 2362 | { 2363 | "cell_type": "code", 2364 | "execution_count": 50, 2365 | "metadata": { 2366 | "ExecuteTime": { 2367 | "end_time": "2019-12-24T17:44:20.729151Z", 2368 | "start_time": "2019-12-24T17:44:20.544383Z" 2369 | } 2370 | }, 2371 | "outputs": [ 2372 | { 2373 | "name": "stdout", 2374 | "output_type": "stream", 2375 | "text": [ 2376 | "[{\"a\": 1, \"b\": 2, \"c\": 3},\n", 2377 | " {\"a\": 4, \"b\": 5, \"c\": 6},\n", 2378 | " {\"a\": 7, \"b\": 8, \"c\": 9}]\n" 2379 | ] 2380 | } 2381 | ], 2382 | "source": [ 2383 | "!type examples\\example.json" 2384 | ] 2385 | }, 2386 | { 2387 | "cell_type": "code", 2388 | "execution_count": 51, 2389 | "metadata": { 2390 | "ExecuteTime": { 2391 | "end_time": "2019-12-24T17:44:20.868493Z", 2392 | "start_time": "2019-12-24T17:44:20.731146Z" 2393 | } 2394 | }, 2395 | "outputs": [ 2396 | { 2397 | "data": { 2398 | "text/html": [ 2399 | "
\n", 2400 | "\n", 2413 | "\n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | "
abc
0123
1456
2789
\n", 2443 | "
" 2444 | ], 2445 | "text/plain": [ 2446 | " a b c\n", 2447 | "0 1 2 3\n", 2448 | "1 4 5 6\n", 2449 | "2 7 8 9" 2450 | ] 2451 | }, 2452 | "execution_count": 51, 2453 | "metadata": {}, 2454 | "output_type": "execute_result" 2455 | } 2456 | ], 2457 | "source": [ 2458 | "data = pd.read_json(r'examples/example.json')\n", 2459 | "data" 2460 | ] 2461 | }, 2462 | { 2463 | "cell_type": "code", 2464 | "execution_count": 52, 2465 | "metadata": { 2466 | "ExecuteTime": { 2467 | "end_time": "2019-12-24T17:44:21.008429Z", 2468 | "start_time": "2019-12-24T17:44:20.873164Z" 2469 | } 2470 | }, 2471 | "outputs": [ 2472 | { 2473 | "name": "stdout", 2474 | "output_type": "stream", 2475 | "text": [ 2476 | "{\"a\":{\"0\":1,\"1\":4,\"2\":7},\"b\":{\"0\":2,\"1\":5,\"2\":8},\"c\":{\"0\":3,\"1\":6,\"2\":9}}\n", 2477 | "[{\"a\":1,\"b\":2,\"c\":3},{\"a\":4,\"b\":5,\"c\":6},{\"a\":7,\"b\":8,\"c\":9}]\n" 2478 | ] 2479 | } 2480 | ], 2481 | "source": [ 2482 | "# pandas -> json\n", 2483 | "\n", 2484 | "print(data.to_json())\n", 2485 | "print(data.to_json(orient='records'))" 2486 | ] 2487 | }, 2488 | { 2489 | "cell_type": "code", 2490 | "execution_count": 53, 2491 | "metadata": { 2492 | "ExecuteTime": { 2493 | "end_time": "2019-12-24T17:44:21.143500Z", 2494 | "start_time": "2019-12-24T17:44:21.013899Z" 2495 | } 2496 | }, 2497 | "outputs": [], 2498 | "source": [ 2499 | "# **ToDo:**\n", 2500 | "\n", 2501 | "# 1.5 - XML and HTML: Web Scraping\n", 2502 | "# 2 - Binary Data Formats\n", 2503 | "# 2.1 - Using HDF5 Format\n", 2504 | "# 2.2 - Reading Microsoft Excel Files\n", 2505 | "# 3 - Interacting with Web APIs\n", 2506 | "# 4 - Interacting with Databases" 2507 | ] 2508 | } 2509 | ], 2510 | "metadata": { 2511 | "kernelspec": { 2512 | "display_name": "Python 3", 2513 | "language": "python", 2514 | "name": "python3" 2515 | }, 2516 | "language_info": { 2517 | "codemirror_mode": { 2518 | "name": "ipython", 2519 | "version": 3 2520 | }, 2521 | "file_extension": ".py", 2522 | "mimetype": "text/x-python", 2523 | "name": "python", 2524 | "nbconvert_exporter": "python", 2525 | "pygments_lexer": "ipython3", 2526 | "version": "3.7.4" 2527 | }, 2528 | "toc": { 2529 | "base_numbering": 1, 2530 | "nav_menu": {}, 2531 | "number_sections": true, 2532 | "sideBar": true, 2533 | "skip_h1_title": false, 2534 | "title_cell": "Table of Contents", 2535 | "title_sidebar": "Contents", 2536 | "toc_cell": true, 2537 | "toc_position": {}, 2538 | "toc_section_display": true, 2539 | "toc_window_display": false 2540 | }, 2541 | "varInspector": { 2542 | "cols": { 2543 | "lenName": 16, 2544 | "lenType": 16, 2545 | "lenVar": 40 2546 | }, 2547 | "kernels_config": { 2548 | "python": { 2549 | "delete_cmd_postfix": "", 2550 | "delete_cmd_prefix": "del ", 2551 | "library": "var_list.py", 2552 | "varRefreshCmd": "print(var_dic_list())" 2553 | }, 2554 | "r": { 2555 | "delete_cmd_postfix": ") ", 2556 | "delete_cmd_prefix": "rm(", 2557 | "library": "var_list.r", 2558 | "varRefreshCmd": "cat(var_dic_list()) " 2559 | } 2560 | }, 2561 | "types_to_exclude": [ 2562 | "module", 2563 | "function", 2564 | "builtin_function_or_method", 2565 | "instance", 2566 | "_Feature" 2567 | ], 2568 | "window_display": false 2569 | } 2570 | }, 2571 | "nbformat": 4, 2572 | "nbformat_minor": 2 2573 | } 2574 | -------------------------------------------------------------------------------- /pandas/MovieLens-1M/movies.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/veb-101/Numpy-Pandas-Matplotlib-Tutorial/c7584da8b334ed84d77ac5d4e191f21c78c0b2d4/pandas/MovieLens-1M/movies.dat -------------------------------------------------------------------------------- /pandas/README.md: -------------------------------------------------------------------------------- 1 | ### Pandas Chapters 2 | ____________ 3 | 4 | * Following **Python for Data Analysis** By **Wes McKinney** (*Creator of pandas*) 5 | * Book link [Amazon](https://www.amazon.com/Python-Data-Analysis-Wrangling-IPython/dp/1491957662/ref=sr_1_1?keywords=python+for+data+analysis+wes+mckinney&link_code=qs&qid=1577210221&sr=8-1) 6 | * Github link for the book: [pydata-book](https://github.com/wesm/pydata-book) 7 | 8 | * To Do: 9 | * Chp 6: Data Loading, Storage and File Formats 10 | * Binary Data Formats 11 | * Using HDF5 Format 12 | * Reading Microsoft Excel Files 13 | * Interacting with Web APIs 14 | * Interacting with Databases 15 | 16 | * Chp 7: Data Cleaning and Preparation 17 | * Discretization and Binning 18 | 19 | * Chp 8: Data Wrangling: Join, Combine, and Reshape 20 | * Chp 10: Data Aggregation and Group Operation 21 | * Chp 11: Time Series 22 | * Chp 12: Advanced Pandas 23 | -------------------------------------------------------------------------------- /pandas/examples/NA_handling_methods.csv: -------------------------------------------------------------------------------- 1 | dropna Filter axis labels based on whether values for each label have missing data, with varying thresholds for how much missing data to tolerate. 2 | fillna Fill in missing data with some value or using an interpolation method such as 'ffill' or 'bfill'. 3 | isnull Return boolean values indicating which values are missing/NA. 4 | notnull Negation of isnull. -------------------------------------------------------------------------------- /pandas/examples/csv_mindex.csv: -------------------------------------------------------------------------------- 1 | key1,key2,value1,value2 2 | one,a,1,2 3 | one,b,3,4 4 | one,c,5,6 5 | one,d,7,8 6 | two,a,9,10 7 | two,b,11,12 8 | two,c,13,14 9 | two,d,15,16 10 | -------------------------------------------------------------------------------- /pandas/examples/ex1.csv: -------------------------------------------------------------------------------- 1 | a,b,c,d,message 2 | 1,2,3,4,hello 3 | 5,6,7,8,world 4 | 9,10,11,12,foo -------------------------------------------------------------------------------- /pandas/examples/ex2.csv: -------------------------------------------------------------------------------- 1 | 1,2,3,4,hello 2 | 5,6,7,8,world 3 | 9,10,11,12,foo 4 | -------------------------------------------------------------------------------- /pandas/examples/ex3.txt: -------------------------------------------------------------------------------- 1 | A B C 2 | aaa -0.264438 -1.026059 -0.619500 3 | bbb 0.927272 0.302904 -0.032399 4 | ccc -0.264273 -0.386314 -0.217601 5 | ddd -0.871858 -0.348382 1.100491 6 | -------------------------------------------------------------------------------- /pandas/examples/ex4.csv: -------------------------------------------------------------------------------- 1 | # hey! 2 | a,b,c,d,message 3 | # just wanted to make things more difficult for you 4 | # who reads CSV files with computers, anyway? 5 | 1,2,3,4,hello 6 | 5,6,7,8,world 7 | 9,10,11,12,foo -------------------------------------------------------------------------------- /pandas/examples/ex5.csv: -------------------------------------------------------------------------------- 1 | something,a,b,c,d,message 2 | one,1,2,3,4,NA 3 | two,5,6,,8,world 4 | three,9,10,11,12,foo -------------------------------------------------------------------------------- /pandas/examples/ex7.csv: -------------------------------------------------------------------------------- 1 | "a","b","c" 2 | "1","2","3" 3 | "1","2","3" 4 | -------------------------------------------------------------------------------- /pandas/examples/example.json: -------------------------------------------------------------------------------- 1 | [{"a": 1, "b": 2, "c": 3}, 2 | {"a": 4, "b": 5, "c": 6}, 3 | {"a": 7, "b": 8, "c": 9}] 4 | -------------------------------------------------------------------------------- /pandas/examples/out.csv: -------------------------------------------------------------------------------- 1 | ,something,a,b,c,d,message 2 | 0,one,1,2,3.0,4, 3 | 1,two,5,6,,8,world 4 | 2,three,9,10,11.0,12,foo 5 | -------------------------------------------------------------------------------- /pandas/examples/test.csv: -------------------------------------------------------------------------------- 1 | Argument Description 2 | path String indicating filesystem location, URL, or file-like object 3 | sep Character sequence or regular expression to use to split fields in each row 4 | header Row number to use as column names; defaults to 0 (first row), but should be None if there is no header row 5 | index_col Column numbers or names to use as the row index in the result; can be a single name/number or a list of them for a hierarchical index 6 | names List of column names for result, combine with header=None 7 | skiprows Number of rows at beginning of file to ignore or list of row numbers (starting from 0) to skip. 8 | na_values Sequence of values to replace with NA. 9 | comment Character(s) to split comments off the end of lines. 10 | parse_dates Attempt to parse data to datetime; False by default. If True, will attempt to parse all columns. Otherwise can specify a list of column numbers or name to parse. If element of list is tuple or list, will combine multiple columns together and parse to date (e.g., if date/time split across two columns). 11 | keep_date_col If joining columns to parse date, keep the joined columns; False by default. 12 | converters Dict containing column number of name mapping to functions (e.g., {'foo': f} would apply the function f to all values in the 'foo' column). 13 | dayfirst When parsing potentially ambiguous dates, treat as international format (e.g., 7/6/2012 -> June 7, 2012); False by default. 14 | date_parser Function to use to parse dates. 15 | nrows Number of rows to read from beginning of file. 16 | iterator Return a TextParser object for reading file piecemeal. 17 | chunksize For iteration, size of file chunks. 18 | skip_footer Number of lines to ignore at end of file. 19 | verbose Print various parser output information, like the number of missing values placed in non-numeric columns. 20 | encoding Text encoding for Unicode (e.g., 'utf-8' for UTF-8 encoded text). 21 | squeeze If the parsed data only contains one column, return a Series. 22 | thousands Separator for thousands (e.g., ',' or '.'). -------------------------------------------------------------------------------- /pandas/examples/tseries.csv: -------------------------------------------------------------------------------- 1 | 2020-01-01,0 2 | 2020-01-02,1 3 | 2020-01-03,2 4 | 2020-01-04,3 5 | 2020-01-05,4 6 | 2020-01-06,5 7 | 2020-01-07,6 8 | 2020-01-08,7 9 | 2020-01-09,8 10 | 2020-01-10,9 11 | -------------------------------------------------------------------------------- /pandas/test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-12-24T17:41:41.902299Z", 9 | "start_time": "2019-12-24T17:41:40.774410Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import pandas as pd\n", 15 | "pd.options.display.max_colwidth = 500" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "ExecuteTime": { 23 | "end_time": "2019-12-24T17:41:41.913664Z", 24 | "start_time": "2019-12-24T17:41:41.904256Z" 25 | } 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "df = pd.read_csv(r'examples/test.csv', sep='\\n', skiprows=[0], names=['Description'])\n", 30 | "\n", 31 | "df.index = list(range(1, 22))" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": { 38 | "ExecuteTime": { 39 | "end_time": "2019-12-24T17:41:42.060477Z", 40 | "start_time": "2019-12-24T17:41:41.915659Z" 41 | } 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "def f1(x):\n", 46 | " x = x.split(' ')\n", 47 | " return x.pop(0)\n", 48 | "\n", 49 | "\n", 50 | "df['Argument'] = df['Description'].map(f1) # get the first word" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": { 57 | "ExecuteTime": { 58 | "end_time": "2019-12-24T17:41:42.218867Z", 59 | "start_time": "2019-12-24T17:41:42.066314Z" 60 | } 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "def f2(x):\n", 65 | " x = x.split(' ')\n", 66 | " return \" \".join(x[1:])\n", 67 | "\n", 68 | "df['Description'] = df['Description'].map(f2) # remove the first word" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": { 75 | "ExecuteTime": { 76 | "end_time": "2019-12-24T17:41:42.377774Z", 77 | "start_time": "2019-12-24T17:41:42.225841Z" 78 | } 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "df = df.reindex(columns=['Argument', 'Description'])" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": { 89 | "ExecuteTime": { 90 | "end_time": "2019-12-24T17:41:42.589564Z", 91 | "start_time": "2019-12-24T17:41:42.381000Z" 92 | } 93 | }, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/html": [ 98 | "
\n", 99 | "\n", 112 | "\n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | "
ArgumentDescription
1pathString indicating filesystem location, URL, or file-like object
2sepCharacter sequence or regular expression to use to split fields in each row
3headerRow number to use as column names; defaults to 0 (first row), but should be None if there is no header row
4index_colColumn numbers or names to use as the row index in the result; can be a single name/number or a list of them for a hierarchical index
5namesList of column names for result, combine with header=None
6skiprowsNumber of rows at beginning of file to ignore or list of row numbers (starting from 0) to skip.
7na_valuesSequence of values to replace with NA.
8commentCharacter(s) to split comments off the end of lines.
9parse_datesAttempt to parse data to datetime; False by default. If True, will attempt to parse all columns. Otherwise can specify a list of column numbers or name to parse. If element of list is tuple or list, will combine multiple columns together and parse to date (e.g., if date/time split across two columns).
10keep_date_colIf joining columns to parse date, keep the joined columns; False by default.
11convertersDict containing column number of name mapping to functions (e.g., {'foo': f} would apply the function f to all values in the 'foo' column).
12dayfirstWhen parsing potentially ambiguous dates, treat as international format (e.g., 7/6/2012 -> June 7, 2012); False by default.
13date_parserFunction to use to parse dates.
14nrowsNumber of rows to read from beginning of file.
15iteratorReturn a TextParser object for reading file piecemeal.
16chunksizeFor iteration, size of file chunks.
17skip_footerNumber of lines to ignore at end of file.
18verbosePrint various parser output information, like the number of missing values placed in non-numeric columns.
19encodingText encoding for Unicode (e.g., 'utf-8' for UTF-8 encoded text).
20squeezeIf the parsed data only contains one column, return a Series.
21thousandsSeparator for thousands (e.g., ',' or '.').
\n", 228 | "
" 229 | ], 230 | "text/plain": [ 231 | " Argument \\\n", 232 | "1 path \n", 233 | "2 sep \n", 234 | "3 header \n", 235 | "4 index_col \n", 236 | "5 names \n", 237 | "6 skiprows \n", 238 | "7 na_values \n", 239 | "8 comment \n", 240 | "9 parse_dates \n", 241 | "10 keep_date_col \n", 242 | "11 converters \n", 243 | "12 dayfirst \n", 244 | "13 date_parser \n", 245 | "14 nrows \n", 246 | "15 iterator \n", 247 | "16 chunksize \n", 248 | "17 skip_footer \n", 249 | "18 verbose \n", 250 | "19 encoding \n", 251 | "20 squeeze \n", 252 | "21 thousands \n", 253 | "\n", 254 | " Description \n", 255 | "1 String indicating filesystem location, URL, or file-like object \n", 256 | "2 Character sequence or regular expression to use to split fields in each row \n", 257 | "3 Row number to use as column names; defaults to 0 (first row), but should be None if there is no header row \n", 258 | "4 Column numbers or names to use as the row index in the result; can be a single name/number or a list of them for a hierarchical index \n", 259 | "5 List of column names for result, combine with header=None \n", 260 | "6 Number of rows at beginning of file to ignore or list of row numbers (starting from 0) to skip. \n", 261 | "7 Sequence of values to replace with NA. \n", 262 | "8 Character(s) to split comments off the end of lines. \n", 263 | "9 Attempt to parse data to datetime; False by default. If True, will attempt to parse all columns. Otherwise can specify a list of column numbers or name to parse. If element of list is tuple or list, will combine multiple columns together and parse to date (e.g., if date/time split across two columns). \n", 264 | "10 If joining columns to parse date, keep the joined columns; False by default. \n", 265 | "11 Dict containing column number of name mapping to functions (e.g., {'foo': f} would apply the function f to all values in the 'foo' column). \n", 266 | "12 When parsing potentially ambiguous dates, treat as international format (e.g., 7/6/2012 -> June 7, 2012); False by default. \n", 267 | "13 Function to use to parse dates. \n", 268 | "14 Number of rows to read from beginning of file. \n", 269 | "15 Return a TextParser object for reading file piecemeal. \n", 270 | "16 For iteration, size of file chunks. \n", 271 | "17 Number of lines to ignore at end of file. \n", 272 | "18 Print various parser output information, like the number of missing values placed in non-numeric columns. \n", 273 | "19 Text encoding for Unicode (e.g., 'utf-8' for UTF-8 encoded text). \n", 274 | "20 If the parsed data only contains one column, return a Series. \n", 275 | "21 Separator for thousands (e.g., ',' or '.'). " 276 | ] 277 | }, 278 | "execution_count": 6, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "df" 285 | ] 286 | } 287 | ], 288 | "metadata": { 289 | "kernelspec": { 290 | "display_name": "Python 3", 291 | "language": "python", 292 | "name": "python3" 293 | }, 294 | "language_info": { 295 | "codemirror_mode": { 296 | "name": "ipython", 297 | "version": 3 298 | }, 299 | "file_extension": ".py", 300 | "mimetype": "text/x-python", 301 | "name": "python", 302 | "nbconvert_exporter": "python", 303 | "pygments_lexer": "ipython3", 304 | "version": "3.7.4" 305 | }, 306 | "toc": { 307 | "base_numbering": 1, 308 | "nav_menu": {}, 309 | "number_sections": true, 310 | "sideBar": true, 311 | "skip_h1_title": false, 312 | "title_cell": "Table of Contents", 313 | "title_sidebar": "Contents", 314 | "toc_cell": false, 315 | "toc_position": {}, 316 | "toc_section_display": true, 317 | "toc_window_display": false 318 | }, 319 | "varInspector": { 320 | "cols": { 321 | "lenName": 16, 322 | "lenType": 16, 323 | "lenVar": 40 324 | }, 325 | "kernels_config": { 326 | "python": { 327 | "delete_cmd_postfix": "", 328 | "delete_cmd_prefix": "del ", 329 | "library": "var_list.py", 330 | "varRefreshCmd": "print(var_dic_list())" 331 | }, 332 | "r": { 333 | "delete_cmd_postfix": ") ", 334 | "delete_cmd_prefix": "rm(", 335 | "library": "var_list.r", 336 | "varRefreshCmd": "cat(var_dic_list()) " 337 | } 338 | }, 339 | "types_to_exclude": [ 340 | "module", 341 | "function", 342 | "builtin_function_or_method", 343 | "instance", 344 | "_Feature" 345 | ], 346 | "window_display": false 347 | } 348 | }, 349 | "nbformat": 4, 350 | "nbformat_minor": 2 351 | } 352 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | matplotlib 4 | seaborn 5 | jupyter --------------------------------------------------------------------------------