├── .gitignore
├── LICENSE
├── README.md
├── data.zip
├── data
    ├── billboard.csv
    ├── country_timeseries.csv
    ├── gapminder.tsv
    ├── pew.csv
    ├── table1.csv
    ├── table2.csv
    ├── table3.csv
    ├── table4a.csv
    ├── table4b.csv
    └── weather.csv
├── exercises
    └── exercises.ipynb
├── notebooks
    ├── .gitkeep
    ├── 01-intro.ipynb
    ├── 02-tidy.ipynb
    ├── 03-apply.ipynb
    ├── 04-plots.ipynb
    └── 05-model.ipynb
├── notes
    ├── 01-intro.ipynb
    ├── 02-tidy.ipynb
    ├── 03-apply.ipynb
    ├── 04-plots.ipynb
    └── 05-models.ipynb
└── test_installation.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Daniel Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # scipy-2019-pandas
 2 | Pandas tutorial for SciPy 2019
 3 | 
 4 | 
 5 | # Installation
 6 | 
 7 | 1. Install anaconda (use the Python 3 version): https://www.anaconda.com/distribution/
 8 | 2. See the Software-Carpentry Installations for `bash`, `git`, `python`, and `text editor`:   https://carpentries.github.io/workshop-template/
 9 | 
10 | # Testing your installation
11 | 
12 | 1. Run the `test_installation.py` script (or copy/paste the import statments into a python interpreter)
13 | 
14 | ## How to run the Jupyter Notebook
15 | 
16 | #### Windows/Mac
17 | 
18 | There will be an [Anaconda Navigator](https://docs.continuum.io/anaconda/navigator/) application that installs to your system.
19 | You can launch the Jupyter notebook from there to run your python code.
20 | 
21 | #### Linux
22 | 
23 | Anaconda's Python installation should be your system's default python.
24 | Make sure you open a new terminal window for this to take effect.
25 | You can launch python by typing `jupyter notebook`
26 | 
27 | ## Creating a Notebook
28 | 
29 | Once you have the Jupyter notebook launched, there's a button towards the top right called `new`.
30 | Click this and select `Python 3`.
31 | 
32 | # Get Data
33 | 
34 | 1. Download or Clone the this repository.
35 |     - Press the green button towards the top right
36 |     - click download zip
37 |     - extract
38 |     - celebrate
39 | 


--------------------------------------------------------------------------------
/data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chendaniely/scipy-2019-pandas/7e515b7561fd5076ee462dd75af9488beadf6148/data.zip


--------------------------------------------------------------------------------
/data/country_timeseries.csv:
--------------------------------------------------------------------------------
  1 | Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali
  2 | 1/5/2015,289,2776,,10030,,,,,,1786,,2977,,,,,
  3 | 1/4/2015,288,2775,,9780,,,,,,1781,,2943,,,,,
  4 | 1/3/2015,287,2769,8166,9722,,,,,,1767,3496,2915,,,,,
  5 | 1/2/2015,286,,8157,,,,,,,,3496,,,,,,
  6 | 12/31/2014,284,2730,8115,9633,,,,,,1739,3471,2827,,,,,
  7 | 12/28/2014,281,2706,8018,9446,,,,,,1708,3423,2758,,,,,
  8 | 12/27/2014,280,2695,,9409,,,,,,1697,,2732,,,,,
  9 | 12/24/2014,277,2630,7977,9203,,,,,,,3413,2655,,,,,
 10 | 12/21/2014,273,2597,,9004,,,,,,1607,,2582,,,,,
 11 | 12/20/2014,272,2571,7862,8939,,,,,,1586,3384,2556,,,,,
 12 | 12/18/2014,271,,7830,,,,,,,,3376,,,,,,
 13 | 12/14/2014,267,2416,,8356,,,,,,1525,,2085,,,,,
 14 | 12/9/2014,262,,7797,,,,,,,,3290,,,,,,
 15 | 12/7/2014,260,2292,,7897,20,1,4,1,7,1428,,1768,8,0,1,0,6
 16 | 12/3/2014,256,,7719,,,,,,,,3177,,,,,,
 17 | 11/30/2014,253,2164,,7312,20,1,4,1,7,1327,,1583,8,0,1,0,6
 18 | 11/28/2014,251,,7635,,,,,,,,3145,,,,,,
 19 | 11/23/2014,246,2134,,6599,20,1,4,1,7,1260,,1398,8,0,1,0,6
 20 | 11/22/2014,245,,7168,,,,,,,,3016,,,,,,
 21 | 11/18/2014,241,2047,7082,6190,20,1,4,1,6,1214,2963,1267,8,0,1,0,6
 22 | 11/16/2014,239,1971,,6073,20,1,4,1,5,1192,,1250,8,0,1,0,5
 23 | 11/15/2014,238,,7069,,,,,,,,2964,,,,,,
 24 | 11/11/2014,234,1919,,5586,20,1,4,1,4,1166,,1187,8,0,1,0,3
 25 | 11/10/2014,233,,6878,,,,,,,,2812,,,,,,
 26 | 11/9/2014,232,1878,,5368,20,1,4,1,1,1142,,1169,8,0,1,0,1
 27 | 11/8/2014,231,,6822,,,,,,,,2836,,,,,,
 28 | 11/4/2014,227,,6619,4862,20,1,4,1,1,,2766,1130,8,0,1,0,1
 29 | 11/3/2014,226,1760,,,,,,,,1054,,,,,,,
 30 | 11/2/2014,225,1731,,4759,20,1,4,1,1,1041,,1070,8,0,1,0,1
 31 | 10/31/2014,222,,6525,,,,,,,,2697,,,,,,
 32 | 10/29/2014,220,1667,,5338,20,1,4,1,1,1018,,1510,8,0,1,0,1
 33 | 10/27/2014,218,1906,,5235,20,1,4,1,1,997,,1500,8,0,1,0,1
 34 | 10/25/2014,216,,6535,,,,,,,,2413,,,,,,
 35 | 10/22/2014,214,,,3896,,,4,1,1,,,1281,,,1,0,1
 36 | 10/21/2014,213,1553,,,,,,,,926,,,,,,,
 37 | 10/19/2014,211,1540,,3706,20,1,3,1,,904,,1259,8,0,1,0,
 38 | 10/18/2014,210,,4665,,,,,,,,2705,,,,,,
 39 | 10/14/2014,206,1519,,3410,20,1,3,1,,862,,1200,8,0,0,1,
 40 | 10/13/2014,205,,4262,,,,,,,,2484,,,,,,
 41 | 10/12/2014,204,1472,,3252,20,1,2,1,,843,,1183,8,0,1,1,
 42 | 10/11/2014,203,,4249,,,,,,,,2458,,,,,,
 43 | 10/8/2014,200,,,2950,20,1,1,1,,,,930,8,0,1,1,
 44 | 10/7/2014,199,1350,4076,,,,,,,778,2316,,,,,,
 45 | 10/5/2014,197,1298,,2789,20,1,1,,,768,,879,8,0,0,,
 46 | 10/4/2014,196,,3924,,,,,,,,2210,,,,,,
 47 | 10/1/2014,193,1199,3834,2437,20,1,1,,,739,2069,623,8,0,0,,
 48 | 9/28/2014,190,1157,3696,2304,20,1,,,,710,1998,622,8,0,,,
 49 | 9/23/2014,185,1074,3458,2021,20,1,,,,648,1830,605,8,0,,,
 50 | 9/21/2014,183,1022,3280,1940,20,1,,,,635,1677,597,8,0,,,
 51 | 9/20/2014,182,,,1813,,,,,,,,593,,,,,
 52 | 9/19/2014,181,1008,,,,,,,,632,,,,,,,
 53 | 9/17/2014,179,,3022,,,,,,,,1578,,,,,,
 54 | 9/14/2014,176,942,2710,1673,,,,,,601,1459,562,,,,,
 55 | 9/13/2014,175,936,,1620,21,1,,,,595,1296,562,8,0,,,
 56 | 9/10/2014,172,899,,1478,21,1,,,,568,,536,8,,,,
 57 | 9/9/2014,171,,2407,,,,,,,,,,,,,,
 58 | 9/7/2014,169,861,2081,1424,21,3,,,,557,1137,524,8,0,,,
 59 | 9/5/2014,167,812,1871,1261,22,1,,,,517,1089,491,8,,,,
 60 | 8/31/2014,162,771,1698,1216,21,1,,,,494,871,476,7,,,,
 61 | 8/26/2014,157,648,1378,1026,17,,,,,430,694,422,6,,,,
 62 | 8/20/2014,151,607,1082,910,16,,,,,406,624,392,5,,,,
 63 | 8/18/2014,149,579,972,907,15,,,,,396,576,374,4,,,,
 64 | 8/16/2014,147,543,834,848,15,,,,,394,466,365,4,,,,
 65 | 8/13/2014,144,519,786,810,12,,,,,380,413,348,4,,,,
 66 | 8/11/2014,142,510,670,783,12,,,,,377,355,334,3,,,,
 67 | 8/9/2014,140,506,599,730,13,,,,,373,323,315,2,,,,
 68 | 8/6/2014,137,495,554,717,13,,,,,367,294,298,2,,,,
 69 | 8/4/2014,135,495,516,691,9,,,,,363,282,286,1,,,,
 70 | 8/1/2014,132,485,468,646,4,,,,,358,255,273,1,,,,
 71 | 7/30/2014,129,472,391,574,3,,,,,346,227,252,1,,,,
 72 | 7/27/2014,126,460,329,533,1,,,,,339,156,233,1,,,,
 73 | 7/23/2014,123,427,249,525,0,,,,,319,129,224,0,,,,
 74 | 7/20/2014,120,415,224,454,,,,,,314,127,219,,,,,
 75 | 7/17/2014,117,410,196,442,,,,,,310,116,206,,,,,
 76 | 7/14/2014,114,411,174,397,,,,,,310,106,197,,,,,
 77 | 7/12/2014,112,406,172,386,,,,,,304,105,194,,,,,
 78 | 7/8/2014,108,409,142,337,,,,,,309,88,142,,,,,
 79 | 7/6/2014,106,408,131,305,,,,,,307,84,127,,,,,
 80 | 7/2/2014,102,412,115,252,,,,,,305,75,101,,,,,
 81 | 6/30/2014,100,413,107,239,,,,,,303,65,99,,,,,
 82 | 6/22/2014,92,,51,,,,,,,,34,,,,,,
 83 | 6/20/2014,90,390,,158,,,,,,270,,34,,,,,
 84 | 6/19/2014,89,,41,,,,,,,,25,,,,,,
 85 | 6/18/2014,88,390,,136,,,,,,267,,28,,,,,
 86 | 6/17/2014,87,,,97,,,,,,,,49,,,,,
 87 | 6/16/2014,86,398,33,,,,,,,264,24,,,,,,
 88 | 6/10/2014,80,351,13,89,,,,,,226,24,7,,,,,
 89 | 6/5/2014,75,,13,81,,,,,,,,6,,,,,
 90 | 6/3/2014,73,344,13,,,,,,,215,12,6,,,,,
 91 | 6/1/2014,71,328,13,79,,,,,,208,12,6,,,,,
 92 | 5/28/2014,67,291,13,50,,,,,,193,12,6,,,,,
 93 | 5/27/2014,66,281,12,16,,,,,,186,11,5,,,,,
 94 | 5/23/2014,62,258,12,0,,,,,,174,11,0,,,,,
 95 | 5/12/2014,51,248,12,0,,,,,,171,11,0,,,,,
 96 | 5/10/2014,49,233,12,0,,,,,,157,11,0,,,,,
 97 | 5/7/2014,46,236,13,0,,,,,,158,11,0,,,,,
 98 | 5/5/2014,44,235,13,0,,,,,,157,11,0,,,,,
 99 | 5/3/2014,42,231,13,0,,,,,,155,11,0,,,,,
100 | 5/1/2014,40,226,13,0,,,,,,149,11,0,,,,,
101 | 4/26/2014,35,224,,0,,,,,,143,,0,,,,,
102 | 4/24/2014,33,,35,0,,,,,,,,0,,,,,
103 | 4/23/2014,32,218,,0,,,,,,141,,0,,,,,
104 | 4/22/2014,31,,,0,,,,,,,,0,,,,,
105 | 4/21/2014,30,,34,,,,,,,,11,,,,,,
106 | 4/20/2014,29,208,,,,,,,,136,6,,,,,,
107 | 4/17/2014,26,203,27,,,,,,,129,,,,,,,
108 | 4/16/2014,25,197,27,,,,,,,122,13,,,,,,
109 | 4/15/2014,24,,,12,,,,,,,,,,,,,
110 | 4/14/2014,23,168,,,,,,,,108,,,,,,,
111 | 4/11/2014,20,159,26,2,,,,,,106,13,2,,,,,
112 | 4/9/2014,18,158,25,2,,,,,,101,12,2,,,,,
113 | 4/7/2014,16,151,21,2,,,,,,95,10,2,,,,,
114 | 4/4/2014,13,143,18,2,,,,,,86,7,2,,,,,
115 | 4/1/2014,10,127,8,2,,,,,,83,5,2,,,,,
116 | 3/31/2014,9,122,8,2,,,,,,80,4,2,,,,,
117 | 3/29/2014,7,112,7,,,,,,,70,2,,,,,,
118 | 3/28/2014,6,112,3,2,,,,,,70,3,2,,,,,
119 | 3/27/2014,5,103,8,6,,,,,,66,6,5,,,,,
120 | 3/26/2014,4,86,,,,,,,,62,,,,,,,
121 | 3/25/2014,3,86,,,,,,,,60,,,,,,,
122 | 3/24/2014,2,86,,,,,,,,59,,,,,,,
123 | 3/22/2014,0,49,,,,,,,,29,,,,,,,


--------------------------------------------------------------------------------
/data/pew.csv:
--------------------------------------------------------------------------------
 1 | "religion","<$10k","$10-20k","$20-30k","$30-40k","$40-50k","$50-75k","$75-100k","$100-150k",">150k","Don't know/refused"
 2 | "Agnostic",27,34,60,81,76,137,122,109,84,96
 3 | "Atheist",12,27,37,52,35,70,73,59,74,76
 4 | "Buddhist",27,21,30,34,33,58,62,39,53,54
 5 | "Catholic",418,617,732,670,638,1116,949,792,633,1489
 6 | "Don’t know/refused",15,14,15,11,10,35,21,17,18,116
 7 | "Evangelical Prot",575,869,1064,982,881,1486,949,723,414,1529
 8 | "Hindu",1,9,7,9,11,34,47,48,54,37
 9 | "Historically Black Prot",228,244,236,238,197,223,131,81,78,339
10 | "Jehovah's Witness",20,27,24,24,21,30,15,11,6,37
11 | "Jewish",19,19,25,25,30,95,69,87,151,162
12 | "Mainline Prot",289,495,619,655,651,1107,939,753,634,1328
13 | "Mormon",29,40,48,51,56,112,85,49,42,69
14 | "Muslim",6,7,9,10,9,23,16,8,6,22
15 | "Orthodox",13,17,23,32,32,47,38,42,46,73
16 | "Other Christian",9,7,11,13,13,14,18,14,12,18
17 | "Other Faiths",20,33,40,46,49,63,46,40,41,71
18 | "Other World Religions",5,2,3,4,2,7,3,4,4,8
19 | "Unaffiliated",217,299,374,365,341,528,407,321,258,597
20 | 


--------------------------------------------------------------------------------
/data/table1.csv:
--------------------------------------------------------------------------------
1 | "country","year","cases","population"
2 | "Afghanistan",1999,745,19987071
3 | "Afghanistan",2000,2666,20595360
4 | "Brazil",1999,37737,172006362
5 | "Brazil",2000,80488,174504898
6 | "China",1999,212258,1272915272
7 | "China",2000,213766,1280428583
8 | 


--------------------------------------------------------------------------------
/data/table2.csv:
--------------------------------------------------------------------------------
 1 | "country","year","type","count"
 2 | "Afghanistan",1999,"cases",745
 3 | "Afghanistan",1999,"population",19987071
 4 | "Afghanistan",2000,"cases",2666
 5 | "Afghanistan",2000,"population",20595360
 6 | "Brazil",1999,"cases",37737
 7 | "Brazil",1999,"population",172006362
 8 | "Brazil",2000,"cases",80488
 9 | "Brazil",2000,"population",174504898
10 | "China",1999,"cases",212258
11 | "China",1999,"population",1272915272
12 | "China",2000,"cases",213766
13 | "China",2000,"population",1280428583
14 | 


--------------------------------------------------------------------------------
/data/table3.csv:
--------------------------------------------------------------------------------
1 | "country","year","rate"
2 | "Afghanistan",1999,"745/19987071"
3 | "Afghanistan",2000,"2666/20595360"
4 | "Brazil",1999,"37737/172006362"
5 | "Brazil",2000,"80488/174504898"
6 | "China",1999,"212258/1272915272"
7 | "China",2000,"213766/1280428583"
8 | 


--------------------------------------------------------------------------------
/data/table4a.csv:
--------------------------------------------------------------------------------
1 | "country","1999","2000"
2 | "Afghanistan",745,2666
3 | "Brazil",37737,80488
4 | "China",212258,213766
5 | 


--------------------------------------------------------------------------------
/data/table4b.csv:
--------------------------------------------------------------------------------
1 | "country","1999","2000"
2 | "Afghanistan",19987071,20595360
3 | "Brazil",172006362,174504898
4 | "China",1272915272,1280428583
5 | 


--------------------------------------------------------------------------------
/data/weather.csv:
--------------------------------------------------------------------------------
 1 | "id","year","month","element","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31"
 2 | "MX17004",2010,1,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,27.8,NA
 3 | "MX17004",2010,1,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,14.5,NA
 4 | "MX17004",2010,2,"tmax",NA,27.3,24.1,NA,NA,NA,NA,NA,NA,NA,29.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA
 5 | "MX17004",2010,2,"tmin",NA,14.4,14.4,NA,NA,NA,NA,NA,NA,NA,13.4,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,10.7,NA,NA,NA,NA,NA,NA,NA,NA
 6 | "MX17004",2010,3,"tmax",NA,NA,NA,NA,32.1,NA,NA,NA,NA,34.5,NA,NA,NA,NA,NA,31.1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 7 | "MX17004",2010,3,"tmin",NA,NA,NA,NA,14.2,NA,NA,NA,NA,16.8,NA,NA,NA,NA,NA,17.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 8 | "MX17004",2010,4,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,36.3,NA,NA,NA,NA
 9 | "MX17004",2010,4,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.7,NA,NA,NA,NA
10 | "MX17004",2010,5,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,33.2,NA,NA,NA,NA
11 | "MX17004",2010,5,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18.2,NA,NA,NA,NA
12 | "MX17004",2010,6,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,30.1,NA,NA
13 | "MX17004",2010,6,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18,NA,NA
14 | "MX17004",2010,7,"tmax",NA,NA,28.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
15 | "MX17004",2010,7,"tmin",NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
16 | "MX17004",2010,8,"tmax",NA,NA,NA,NA,29.6,NA,NA,29,NA,NA,NA,NA,29.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,26.4,NA,29.7,NA,NA,NA,28,NA,25.4
17 | "MX17004",2010,8,"tmin",NA,NA,NA,NA,15.8,NA,NA,17.3,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,15.6,NA,NA,NA,15.3,NA,15.4
18 | "MX17004",2010,10,"tmax",NA,NA,NA,NA,27,NA,28.1,NA,NA,NA,NA,NA,NA,29.5,28.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,31.2,NA,NA,NA
19 | "MX17004",2010,10,"tmin",NA,NA,NA,NA,14,NA,12.9,NA,NA,NA,NA,NA,NA,13,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,NA,NA
20 | "MX17004",2010,11,"tmax",NA,31.3,NA,27.2,26.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28.1,27.7,NA,NA,NA,NA
21 | "MX17004",2010,11,"tmin",NA,16.3,NA,12,7.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,12.1,14.2,NA,NA,NA,NA
22 | "MX17004",2010,12,"tmax",29.9,NA,NA,NA,NA,27.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
23 | "MX17004",2010,12,"tmin",13.8,NA,NA,NA,NA,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
24 | 


--------------------------------------------------------------------------------
/notebooks/.gitkeep:
--------------------------------------------------------------------------------
1 |  
2 | 


--------------------------------------------------------------------------------
/notebooks/03-apply.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 3,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "def my_function(x, y):\n",
  10 |     "    pass"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 4,
  16 |    "metadata": {},
  17 |    "outputs": [],
  18 |    "source": [
  19 |     "def my_sq(x):\n",
  20 |     "    return x ** 2"
  21 |    ]
  22 |   },
  23 |   {
  24 |    "cell_type": "code",
  25 |    "execution_count": 5,
  26 |    "metadata": {},
  27 |    "outputs": [
  28 |     {
  29 |      "data": {
  30 |       "text/plain": [
  31 |        "4"
  32 |       ]
  33 |      },
  34 |      "execution_count": 5,
  35 |      "metadata": {},
  36 |      "output_type": "execute_result"
  37 |     }
  38 |    ],
  39 |    "source": [
  40 |     "my_sq(2)"
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": 6,
  46 |    "metadata": {},
  47 |    "outputs": [
  48 |     {
  49 |      "data": {
  50 |       "text/plain": [
  51 |        "16"
  52 |       ]
  53 |      },
  54 |      "execution_count": 6,
  55 |      "metadata": {},
  56 |      "output_type": "execute_result"
  57 |     }
  58 |    ],
  59 |    "source": [
  60 |     "my_sq(4)"
  61 |    ]
  62 |   },
  63 |   {
  64 |    "cell_type": "code",
  65 |    "execution_count": 11,
  66 |    "metadata": {},
  67 |    "outputs": [],
  68 |    "source": [
  69 |     "assert my_sq(4) == 16"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "code",
  74 |    "execution_count": 12,
  75 |    "metadata": {},
  76 |    "outputs": [],
  77 |    "source": [
  78 |     "def avg_2(x, y):\n",
  79 |     "    return (x + y) / 2"
  80 |    ]
  81 |   },
  82 |   {
  83 |    "cell_type": "code",
  84 |    "execution_count": 13,
  85 |    "metadata": {},
  86 |    "outputs": [
  87 |     {
  88 |      "data": {
  89 |       "text/plain": [
  90 |        "15.0"
  91 |       ]
  92 |      },
  93 |      "execution_count": 13,
  94 |      "metadata": {},
  95 |      "output_type": "execute_result"
  96 |     }
  97 |    ],
  98 |    "source": [
  99 |     "avg_2(10, 20)"
 100 |    ]
 101 |   },
 102 |   {
 103 |    "cell_type": "code",
 104 |    "execution_count": 18,
 105 |    "metadata": {},
 106 |    "outputs": [],
 107 |    "source": [
 108 |     "import pandas as pd"
 109 |    ]
 110 |   },
 111 |   {
 112 |    "cell_type": "code",
 113 |    "execution_count": 19,
 114 |    "metadata": {},
 115 |    "outputs": [],
 116 |    "source": [
 117 |     "df = pd.DataFrame({\n",
 118 |     "    'a': [10, 20, 30],\n",
 119 |     "    'b': [20, 30, 40]\n",
 120 |     "})"
 121 |    ]
 122 |   },
 123 |   {
 124 |    "cell_type": "code",
 125 |    "execution_count": 20,
 126 |    "metadata": {},
 127 |    "outputs": [
 128 |     {
 129 |      "data": {
 130 |       "text/html": [
 131 |        "<div>\n",
 132 |        "<style scoped>\n",
 133 |        "    .dataframe tbody tr th:only-of-type {\n",
 134 |        "        vertical-align: middle;\n",
 135 |        "    }\n",
 136 |        "\n",
 137 |        "    .dataframe tbody tr th {\n",
 138 |        "        vertical-align: top;\n",
 139 |        "    }\n",
 140 |        "\n",
 141 |        "    .dataframe thead th {\n",
 142 |        "        text-align: right;\n",
 143 |        "    }\n",
 144 |        "</style>\n",
 145 |        "<table border=\"1\" class=\"dataframe\">\n",
 146 |        "  <thead>\n",
 147 |        "    <tr style=\"text-align: right;\">\n",
 148 |        "      <th></th>\n",
 149 |        "      <th>a</th>\n",
 150 |        "      <th>b</th>\n",
 151 |        "    </tr>\n",
 152 |        "  </thead>\n",
 153 |        "  <tbody>\n",
 154 |        "    <tr>\n",
 155 |        "      <th>0</th>\n",
 156 |        "      <td>10</td>\n",
 157 |        "      <td>20</td>\n",
 158 |        "    </tr>\n",
 159 |        "    <tr>\n",
 160 |        "      <th>1</th>\n",
 161 |        "      <td>20</td>\n",
 162 |        "      <td>30</td>\n",
 163 |        "    </tr>\n",
 164 |        "    <tr>\n",
 165 |        "      <th>2</th>\n",
 166 |        "      <td>30</td>\n",
 167 |        "      <td>40</td>\n",
 168 |        "    </tr>\n",
 169 |        "  </tbody>\n",
 170 |        "</table>\n",
 171 |        "</div>"
 172 |       ],
 173 |       "text/plain": [
 174 |        "    a   b\n",
 175 |        "0  10  20\n",
 176 |        "1  20  30\n",
 177 |        "2  30  40"
 178 |       ]
 179 |      },
 180 |      "execution_count": 20,
 181 |      "metadata": {},
 182 |      "output_type": "execute_result"
 183 |     }
 184 |    ],
 185 |    "source": [
 186 |     "df"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": 22,
 192 |    "metadata": {},
 193 |    "outputs": [
 194 |     {
 195 |      "data": {
 196 |       "text/plain": [
 197 |        "0    100\n",
 198 |        "1    400\n",
 199 |        "2    900\n",
 200 |        "Name: a, dtype: int64"
 201 |       ]
 202 |      },
 203 |      "execution_count": 22,
 204 |      "metadata": {},
 205 |      "output_type": "execute_result"
 206 |     }
 207 |    ],
 208 |    "source": [
 209 |     "df['a'] ** 2"
 210 |    ]
 211 |   },
 212 |   {
 213 |    "cell_type": "code",
 214 |    "execution_count": 23,
 215 |    "metadata": {},
 216 |    "outputs": [
 217 |     {
 218 |      "data": {
 219 |       "text/plain": [
 220 |        "<function __main__.my_sq(x)>"
 221 |       ]
 222 |      },
 223 |      "execution_count": 23,
 224 |      "metadata": {},
 225 |      "output_type": "execute_result"
 226 |     }
 227 |    ],
 228 |    "source": [
 229 |     "my_sq"
 230 |    ]
 231 |   },
 232 |   {
 233 |    "cell_type": "code",
 234 |    "execution_count": 24,
 235 |    "metadata": {},
 236 |    "outputs": [
 237 |     {
 238 |      "data": {
 239 |       "text/plain": [
 240 |        "0    100\n",
 241 |        "1    400\n",
 242 |        "2    900\n",
 243 |        "Name: a, dtype: int64"
 244 |       ]
 245 |      },
 246 |      "execution_count": 24,
 247 |      "metadata": {},
 248 |      "output_type": "execute_result"
 249 |     }
 250 |    ],
 251 |    "source": [
 252 |     "df['a'].apply(my_sq)"
 253 |    ]
 254 |   },
 255 |   {
 256 |    "cell_type": "code",
 257 |    "execution_count": 25,
 258 |    "metadata": {},
 259 |    "outputs": [],
 260 |    "source": [
 261 |     "def my_exp(x, e):\n",
 262 |     "    return x ** e"
 263 |    ]
 264 |   },
 265 |   {
 266 |    "cell_type": "code",
 267 |    "execution_count": 26,
 268 |    "metadata": {},
 269 |    "outputs": [
 270 |     {
 271 |      "data": {
 272 |       "text/plain": [
 273 |        "1024"
 274 |       ]
 275 |      },
 276 |      "execution_count": 26,
 277 |      "metadata": {},
 278 |      "output_type": "execute_result"
 279 |     }
 280 |    ],
 281 |    "source": [
 282 |     "my_exp(2, 10)"
 283 |    ]
 284 |   },
 285 |   {
 286 |    "cell_type": "code",
 287 |    "execution_count": 27,
 288 |    "metadata": {},
 289 |    "outputs": [
 290 |     {
 291 |      "data": {
 292 |       "text/plain": [
 293 |        "0     10000\n",
 294 |        "1    160000\n",
 295 |        "2    810000\n",
 296 |        "Name: a, dtype: int64"
 297 |       ]
 298 |      },
 299 |      "execution_count": 27,
 300 |      "metadata": {},
 301 |      "output_type": "execute_result"
 302 |     }
 303 |    ],
 304 |    "source": [
 305 |     "df['a'].apply(my_exp, e=4)"
 306 |    ]
 307 |   },
 308 |   {
 309 |    "cell_type": "code",
 310 |    "execution_count": 28,
 311 |    "metadata": {},
 312 |    "outputs": [],
 313 |    "source": [
 314 |     "def print_me(x):\n",
 315 |     "    print(x)"
 316 |    ]
 317 |   },
 318 |   {
 319 |    "cell_type": "code",
 320 |    "execution_count": 29,
 321 |    "metadata": {},
 322 |    "outputs": [
 323 |     {
 324 |      "name": "stdout",
 325 |      "output_type": "stream",
 326 |      "text": [
 327 |       "0    10\n",
 328 |       "1    20\n",
 329 |       "2    30\n",
 330 |       "Name: a, dtype: int64\n",
 331 |       "0    20\n",
 332 |       "1    30\n",
 333 |       "2    40\n",
 334 |       "Name: b, dtype: int64\n"
 335 |      ]
 336 |     },
 337 |     {
 338 |      "data": {
 339 |       "text/plain": [
 340 |        "a    None\n",
 341 |        "b    None\n",
 342 |        "dtype: object"
 343 |       ]
 344 |      },
 345 |      "execution_count": 29,
 346 |      "metadata": {},
 347 |      "output_type": "execute_result"
 348 |     }
 349 |    ],
 350 |    "source": [
 351 |     "df.apply(print_me)"
 352 |    ]
 353 |   },
 354 |   {
 355 |    "cell_type": "code",
 356 |    "execution_count": 32,
 357 |    "metadata": {},
 358 |    "outputs": [
 359 |     {
 360 |      "data": {
 361 |       "text/html": [
 362 |        "<div>\n",
 363 |        "<style scoped>\n",
 364 |        "    .dataframe tbody tr th:only-of-type {\n",
 365 |        "        vertical-align: middle;\n",
 366 |        "    }\n",
 367 |        "\n",
 368 |        "    .dataframe tbody tr th {\n",
 369 |        "        vertical-align: top;\n",
 370 |        "    }\n",
 371 |        "\n",
 372 |        "    .dataframe thead th {\n",
 373 |        "        text-align: right;\n",
 374 |        "    }\n",
 375 |        "</style>\n",
 376 |        "<table border=\"1\" class=\"dataframe\">\n",
 377 |        "  <thead>\n",
 378 |        "    <tr style=\"text-align: right;\">\n",
 379 |        "      <th></th>\n",
 380 |        "      <th>a</th>\n",
 381 |        "      <th>b</th>\n",
 382 |        "    </tr>\n",
 383 |        "  </thead>\n",
 384 |        "  <tbody>\n",
 385 |        "    <tr>\n",
 386 |        "      <th>0</th>\n",
 387 |        "      <td>10</td>\n",
 388 |        "      <td>20</td>\n",
 389 |        "    </tr>\n",
 390 |        "    <tr>\n",
 391 |        "      <th>1</th>\n",
 392 |        "      <td>20</td>\n",
 393 |        "      <td>30</td>\n",
 394 |        "    </tr>\n",
 395 |        "    <tr>\n",
 396 |        "      <th>2</th>\n",
 397 |        "      <td>30</td>\n",
 398 |        "      <td>40</td>\n",
 399 |        "    </tr>\n",
 400 |        "  </tbody>\n",
 401 |        "</table>\n",
 402 |        "</div>"
 403 |       ],
 404 |       "text/plain": [
 405 |        "    a   b\n",
 406 |        "0  10  20\n",
 407 |        "1  20  30\n",
 408 |        "2  30  40"
 409 |       ]
 410 |      },
 411 |      "execution_count": 32,
 412 |      "metadata": {},
 413 |      "output_type": "execute_result"
 414 |     }
 415 |    ],
 416 |    "source": [
 417 |     "df"
 418 |    ]
 419 |   },
 420 |   {
 421 |    "cell_type": "code",
 422 |    "execution_count": 30,
 423 |    "metadata": {},
 424 |    "outputs": [],
 425 |    "source": [
 426 |     "def avg_3(x, y, z):\n",
 427 |     "    return (x + y + z) / 3"
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "code",
 432 |    "execution_count": 31,
 433 |    "metadata": {},
 434 |    "outputs": [
 435 |     {
 436 |      "ename": "TypeError",
 437 |      "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')",
 438 |      "output_type": "error",
 439 |      "traceback": [
 440 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 441 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 442 |       "\u001b[0;32m<ipython-input-31-c00f379f766c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 443 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m   6485\u001b[0m                          \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6486\u001b[0m                          kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6489\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 444 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    149\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 445 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    256\u001b[0m         \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m         \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 446 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    284\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m                     \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    287\u001b[0m                     \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 447 |       "\u001b[0;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')"
 448 |      ]
 449 |     }
 450 |    ],
 451 |    "source": [
 452 |     "df.apply(avg_3)"
 453 |    ]
 454 |   },
 455 |   {
 456 |    "cell_type": "code",
 457 |    "execution_count": 33,
 458 |    "metadata": {},
 459 |    "outputs": [],
 460 |    "source": [
 461 |     "import numpy as np"
 462 |    ]
 463 |   },
 464 |   {
 465 |    "cell_type": "code",
 466 |    "execution_count": 34,
 467 |    "metadata": {},
 468 |    "outputs": [],
 469 |    "source": [
 470 |     "def avg_3_apply(col):\n",
 471 |     "    return np.mean(col)"
 472 |    ]
 473 |   },
 474 |   {
 475 |    "cell_type": "code",
 476 |    "execution_count": 35,
 477 |    "metadata": {},
 478 |    "outputs": [
 479 |     {
 480 |      "data": {
 481 |       "text/plain": [
 482 |        "a    20.0\n",
 483 |        "b    30.0\n",
 484 |        "dtype: float64"
 485 |       ]
 486 |      },
 487 |      "execution_count": 35,
 488 |      "metadata": {},
 489 |      "output_type": "execute_result"
 490 |     }
 491 |    ],
 492 |    "source": [
 493 |     "df.apply(avg_3_apply)"
 494 |    ]
 495 |   },
 496 |   {
 497 |    "cell_type": "code",
 498 |    "execution_count": 40,
 499 |    "metadata": {},
 500 |    "outputs": [
 501 |     {
 502 |      "data": {
 503 |       "text/html": [
 504 |        "<div>\n",
 505 |        "<style scoped>\n",
 506 |        "    .dataframe tbody tr th:only-of-type {\n",
 507 |        "        vertical-align: middle;\n",
 508 |        "    }\n",
 509 |        "\n",
 510 |        "    .dataframe tbody tr th {\n",
 511 |        "        vertical-align: top;\n",
 512 |        "    }\n",
 513 |        "\n",
 514 |        "    .dataframe thead th {\n",
 515 |        "        text-align: right;\n",
 516 |        "    }\n",
 517 |        "</style>\n",
 518 |        "<table border=\"1\" class=\"dataframe\">\n",
 519 |        "  <thead>\n",
 520 |        "    <tr style=\"text-align: right;\">\n",
 521 |        "      <th></th>\n",
 522 |        "      <th>a</th>\n",
 523 |        "      <th>b</th>\n",
 524 |        "    </tr>\n",
 525 |        "  </thead>\n",
 526 |        "  <tbody>\n",
 527 |        "    <tr>\n",
 528 |        "      <th>0</th>\n",
 529 |        "      <td>10</td>\n",
 530 |        "      <td>20</td>\n",
 531 |        "    </tr>\n",
 532 |        "    <tr>\n",
 533 |        "      <th>1</th>\n",
 534 |        "      <td>20</td>\n",
 535 |        "      <td>30</td>\n",
 536 |        "    </tr>\n",
 537 |        "    <tr>\n",
 538 |        "      <th>2</th>\n",
 539 |        "      <td>30</td>\n",
 540 |        "      <td>40</td>\n",
 541 |        "    </tr>\n",
 542 |        "  </tbody>\n",
 543 |        "</table>\n",
 544 |        "</div>"
 545 |       ],
 546 |       "text/plain": [
 547 |        "    a   b\n",
 548 |        "0  10  20\n",
 549 |        "1  20  30\n",
 550 |        "2  30  40"
 551 |       ]
 552 |      },
 553 |      "execution_count": 40,
 554 |      "metadata": {},
 555 |      "output_type": "execute_result"
 556 |     }
 557 |    ],
 558 |    "source": [
 559 |     "df"
 560 |    ]
 561 |   },
 562 |   {
 563 |    "cell_type": "code",
 564 |    "execution_count": 37,
 565 |    "metadata": {},
 566 |    "outputs": [],
 567 |    "source": [
 568 |     "def avg_3_apply(col):\n",
 569 |     "    x = col[0]\n",
 570 |     "    y = col[1]\n",
 571 |     "    z = col[2]\n",
 572 |     "    return (x + y + z) / 3"
 573 |    ]
 574 |   },
 575 |   {
 576 |    "cell_type": "code",
 577 |    "execution_count": 38,
 578 |    "metadata": {},
 579 |    "outputs": [
 580 |     {
 581 |      "data": {
 582 |       "text/plain": [
 583 |        "a    20.0\n",
 584 |        "b    30.0\n",
 585 |        "dtype: float64"
 586 |       ]
 587 |      },
 588 |      "execution_count": 38,
 589 |      "metadata": {},
 590 |      "output_type": "execute_result"
 591 |     }
 592 |    ],
 593 |    "source": [
 594 |     "df.apply(avg_3_apply)"
 595 |    ]
 596 |   },
 597 |   {
 598 |    "cell_type": "code",
 599 |    "execution_count": 39,
 600 |    "metadata": {},
 601 |    "outputs": [
 602 |     {
 603 |      "ename": "IndexError",
 604 |      "evalue": "('index out of bounds', 'occurred at index 0')",
 605 |      "output_type": "error",
 606 |      "traceback": [
 607 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 608 |       "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
 609 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4374\u001b[0m             return self._engine.get_value(s, k,\n\u001b[0;32m-> 4375\u001b[0;31m                                           tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m   4376\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 610 |       "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
 611 |       "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
 612 |       "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
 613 |       "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
 614 |       "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
 615 |       "\u001b[0;31mKeyError\u001b[0m: 2",
 616 |       "\nDuring handling of the above exception, another exception occurred:\n",
 617 |       "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
 618 |       "\u001b[0;32m<ipython-input-39-6adb37282b5f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3_apply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'columns'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 619 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m   6485\u001b[0m                          \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6486\u001b[0m                          kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6489\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 620 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    149\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 621 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    256\u001b[0m         \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m         \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 622 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    284\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m                     \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    287\u001b[0m                     \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 623 |       "\u001b[0;32m<ipython-input-37-dc85aaf18e74>\u001b[0m in \u001b[0;36mavg_3_apply\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 624 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    866\u001b[0m         \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    867\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 868\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    869\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    870\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 625 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4380\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4381\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_box\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4382\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4383\u001b[0m                 \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 626 |       "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_box\u001b[0;34m()\u001b[0m\n",
 627 |       "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
 628 |       "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n",
 629 |       "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n",
 630 |       "\u001b[0;31mIndexError\u001b[0m: ('index out of bounds', 'occurred at index 0')"
 631 |      ]
 632 |     }
 633 |    ],
 634 |    "source": [
 635 |     "df.apply(avg_3_apply, axis='columns')"
 636 |    ]
 637 |   },
 638 |   {
 639 |    "cell_type": "code",
 640 |    "execution_count": 41,
 641 |    "metadata": {},
 642 |    "outputs": [
 643 |     {
 644 |      "data": {
 645 |       "text/plain": [
 646 |        "20.0"
 647 |       ]
 648 |      },
 649 |      "execution_count": 41,
 650 |      "metadata": {},
 651 |      "output_type": "execute_result"
 652 |     }
 653 |    ],
 654 |    "source": [
 655 |     "df['a'].mean()"
 656 |    ]
 657 |   },
 658 |   {
 659 |    "cell_type": "code",
 660 |    "execution_count": 42,
 661 |    "metadata": {},
 662 |    "outputs": [
 663 |     {
 664 |      "data": {
 665 |       "text/plain": [
 666 |        "0    30\n",
 667 |        "1    50\n",
 668 |        "2    70\n",
 669 |        "dtype: int64"
 670 |       ]
 671 |      },
 672 |      "execution_count": 42,
 673 |      "metadata": {},
 674 |      "output_type": "execute_result"
 675 |     }
 676 |    ],
 677 |    "source": [
 678 |     "df['a'] + df['b']"
 679 |    ]
 680 |   },
 681 |   {
 682 |    "cell_type": "code",
 683 |    "execution_count": 45,
 684 |    "metadata": {},
 685 |    "outputs": [],
 686 |    "source": [
 687 |     "def avg_2_mod(x, y):\n",
 688 |     "    if (x == 20):\n",
 689 |     "        return np.NaN #np.NAN np.nan\n",
 690 |     "    else:\n",
 691 |     "        return(x + y) / 2"
 692 |    ]
 693 |   },
 694 |   {
 695 |    "cell_type": "code",
 696 |    "execution_count": 46,
 697 |    "metadata": {},
 698 |    "outputs": [
 699 |     {
 700 |      "data": {
 701 |       "text/html": [
 702 |        "<div>\n",
 703 |        "<style scoped>\n",
 704 |        "    .dataframe tbody tr th:only-of-type {\n",
 705 |        "        vertical-align: middle;\n",
 706 |        "    }\n",
 707 |        "\n",
 708 |        "    .dataframe tbody tr th {\n",
 709 |        "        vertical-align: top;\n",
 710 |        "    }\n",
 711 |        "\n",
 712 |        "    .dataframe thead th {\n",
 713 |        "        text-align: right;\n",
 714 |        "    }\n",
 715 |        "</style>\n",
 716 |        "<table border=\"1\" class=\"dataframe\">\n",
 717 |        "  <thead>\n",
 718 |        "    <tr style=\"text-align: right;\">\n",
 719 |        "      <th></th>\n",
 720 |        "      <th>a</th>\n",
 721 |        "      <th>b</th>\n",
 722 |        "    </tr>\n",
 723 |        "  </thead>\n",
 724 |        "  <tbody>\n",
 725 |        "    <tr>\n",
 726 |        "      <th>0</th>\n",
 727 |        "      <td>10</td>\n",
 728 |        "      <td>20</td>\n",
 729 |        "    </tr>\n",
 730 |        "    <tr>\n",
 731 |        "      <th>1</th>\n",
 732 |        "      <td>20</td>\n",
 733 |        "      <td>30</td>\n",
 734 |        "    </tr>\n",
 735 |        "    <tr>\n",
 736 |        "      <th>2</th>\n",
 737 |        "      <td>30</td>\n",
 738 |        "      <td>40</td>\n",
 739 |        "    </tr>\n",
 740 |        "  </tbody>\n",
 741 |        "</table>\n",
 742 |        "</div>"
 743 |       ],
 744 |       "text/plain": [
 745 |        "    a   b\n",
 746 |        "0  10  20\n",
 747 |        "1  20  30\n",
 748 |        "2  30  40"
 749 |       ]
 750 |      },
 751 |      "execution_count": 46,
 752 |      "metadata": {},
 753 |      "output_type": "execute_result"
 754 |     }
 755 |    ],
 756 |    "source": [
 757 |     "df"
 758 |    ]
 759 |   },
 760 |   {
 761 |    "cell_type": "code",
 762 |    "execution_count": 47,
 763 |    "metadata": {},
 764 |    "outputs": [
 765 |     {
 766 |      "ename": "ValueError",
 767 |      "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().",
 768 |      "output_type": "error",
 769 |      "traceback": [
 770 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 771 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
 772 |       "\u001b[0;32m<ipython-input-47-07c16a2d06e5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 773 |       "\u001b[0;32m<ipython-input-45-73c4e761e491>\u001b[0m in \u001b[0;36mavg_2_mod\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m \u001b[0;31m#np.NAN np.nan\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0;32mreturn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 774 |       "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1476\u001b[0m         raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m   1477\u001b[0m                          \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1478\u001b[0;31m                          .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m   1479\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1480\u001b[0m     \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 775 |       "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
 776 |      ]
 777 |     }
 778 |    ],
 779 |    "source": [
 780 |     "avg_2_mod(df['a'], df['b'])"
 781 |    ]
 782 |   },
 783 |   {
 784 |    "cell_type": "code",
 785 |    "execution_count": 48,
 786 |    "metadata": {},
 787 |    "outputs": [],
 788 |    "source": [
 789 |     "import numpy as np"
 790 |    ]
 791 |   },
 792 |   {
 793 |    "cell_type": "code",
 794 |    "execution_count": 49,
 795 |    "metadata": {},
 796 |    "outputs": [],
 797 |    "source": [
 798 |     "avg_2_mod_vec = np.vectorize(avg_2_mod)"
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "code",
 803 |    "execution_count": 51,
 804 |    "metadata": {},
 805 |    "outputs": [
 806 |     {
 807 |      "data": {
 808 |       "text/html": [
 809 |        "<div>\n",
 810 |        "<style scoped>\n",
 811 |        "    .dataframe tbody tr th:only-of-type {\n",
 812 |        "        vertical-align: middle;\n",
 813 |        "    }\n",
 814 |        "\n",
 815 |        "    .dataframe tbody tr th {\n",
 816 |        "        vertical-align: top;\n",
 817 |        "    }\n",
 818 |        "\n",
 819 |        "    .dataframe thead th {\n",
 820 |        "        text-align: right;\n",
 821 |        "    }\n",
 822 |        "</style>\n",
 823 |        "<table border=\"1\" class=\"dataframe\">\n",
 824 |        "  <thead>\n",
 825 |        "    <tr style=\"text-align: right;\">\n",
 826 |        "      <th></th>\n",
 827 |        "      <th>a</th>\n",
 828 |        "      <th>b</th>\n",
 829 |        "    </tr>\n",
 830 |        "  </thead>\n",
 831 |        "  <tbody>\n",
 832 |        "    <tr>\n",
 833 |        "      <th>0</th>\n",
 834 |        "      <td>10</td>\n",
 835 |        "      <td>20</td>\n",
 836 |        "    </tr>\n",
 837 |        "    <tr>\n",
 838 |        "      <th>1</th>\n",
 839 |        "      <td>20</td>\n",
 840 |        "      <td>30</td>\n",
 841 |        "    </tr>\n",
 842 |        "    <tr>\n",
 843 |        "      <th>2</th>\n",
 844 |        "      <td>30</td>\n",
 845 |        "      <td>40</td>\n",
 846 |        "    </tr>\n",
 847 |        "  </tbody>\n",
 848 |        "</table>\n",
 849 |        "</div>"
 850 |       ],
 851 |       "text/plain": [
 852 |        "    a   b\n",
 853 |        "0  10  20\n",
 854 |        "1  20  30\n",
 855 |        "2  30  40"
 856 |       ]
 857 |      },
 858 |      "execution_count": 51,
 859 |      "metadata": {},
 860 |      "output_type": "execute_result"
 861 |     }
 862 |    ],
 863 |    "source": [
 864 |     "df"
 865 |    ]
 866 |   },
 867 |   {
 868 |    "cell_type": "code",
 869 |    "execution_count": 50,
 870 |    "metadata": {},
 871 |    "outputs": [
 872 |     {
 873 |      "data": {
 874 |       "text/plain": [
 875 |        "array([15., nan, 35.])"
 876 |       ]
 877 |      },
 878 |      "execution_count": 50,
 879 |      "metadata": {},
 880 |      "output_type": "execute_result"
 881 |     }
 882 |    ],
 883 |    "source": [
 884 |     "avg_2_mod_vec(df['a'], df['b'])"
 885 |    ]
 886 |   },
 887 |   {
 888 |    "cell_type": "code",
 889 |    "execution_count": 53,
 890 |    "metadata": {},
 891 |    "outputs": [],
 892 |    "source": [
 893 |     "@np.vectorize\n",
 894 |     "def avg_2_mod(x, y):\n",
 895 |     "    if (x == 20):\n",
 896 |     "        return np.NaN #np.NAN np.nan\n",
 897 |     "    else:\n",
 898 |     "        return(x + y) / 2"
 899 |    ]
 900 |   },
 901 |   {
 902 |    "cell_type": "code",
 903 |    "execution_count": 54,
 904 |    "metadata": {},
 905 |    "outputs": [
 906 |     {
 907 |      "data": {
 908 |       "text/plain": [
 909 |        "array([15., nan, 35.])"
 910 |       ]
 911 |      },
 912 |      "execution_count": 54,
 913 |      "metadata": {},
 914 |      "output_type": "execute_result"
 915 |     }
 916 |    ],
 917 |    "source": [
 918 |     "avg_2_mod(df['a'], df['b'])"
 919 |    ]
 920 |   },
 921 |   {
 922 |    "cell_type": "code",
 923 |    "execution_count": 55,
 924 |    "metadata": {},
 925 |    "outputs": [],
 926 |    "source": [
 927 |     "import numba"
 928 |    ]
 929 |   },
 930 |   {
 931 |    "cell_type": "code",
 932 |    "execution_count": 59,
 933 |    "metadata": {},
 934 |    "outputs": [],
 935 |    "source": [
 936 |     "@numba.vectorize\n",
 937 |     "def avg_2_mod_numba(x, y):\n",
 938 |     "    if (x == 20):\n",
 939 |     "        return np.NaN\n",
 940 |     "    else:\n",
 941 |     "        return(x + y) / 2"
 942 |    ]
 943 |   },
 944 |   {
 945 |    "cell_type": "code",
 946 |    "execution_count": 60,
 947 |    "metadata": {},
 948 |    "outputs": [
 949 |     {
 950 |      "data": {
 951 |       "text/plain": [
 952 |        "array([15., nan, 35.])"
 953 |       ]
 954 |      },
 955 |      "execution_count": 60,
 956 |      "metadata": {},
 957 |      "output_type": "execute_result"
 958 |     }
 959 |    ],
 960 |    "source": [
 961 |     "avg_2_mod_numba(df['a'].values, df['b'].values)"
 962 |    ]
 963 |   },
 964 |   {
 965 |    "cell_type": "code",
 966 |    "execution_count": 62,
 967 |    "metadata": {},
 968 |    "outputs": [
 969 |     {
 970 |      "name": "stdout",
 971 |      "output_type": "stream",
 972 |      "text": [
 973 |       "445 µs ± 7.79 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
 974 |      ]
 975 |     }
 976 |    ],
 977 |    "source": [
 978 |     "%%timeit\n",
 979 |     "avg_2(df['a'], df['b'])"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "code",
 984 |    "execution_count": 63,
 985 |    "metadata": {},
 986 |    "outputs": [
 987 |     {
 988 |      "name": "stdout",
 989 |      "output_type": "stream",
 990 |      "text": [
 991 |       "211 µs ± 7.73 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
 992 |      ]
 993 |     }
 994 |    ],
 995 |    "source": [
 996 |     "%%timeit\n",
 997 |     "avg_2_mod(df['a'], df['b'])"
 998 |    ]
 999 |   },
1000 |   {
1001 |    "cell_type": "code",
1002 |    "execution_count": 64,
1003 |    "metadata": {},
1004 |    "outputs": [
1005 |     {
1006 |      "name": "stdout",
1007 |      "output_type": "stream",
1008 |      "text": [
1009 |       "8.01 µs ± 226 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
1010 |      ]
1011 |     }
1012 |    ],
1013 |    "source": [
1014 |     "%%timeit\n",
1015 |     "avg_2_mod_numba(df['a'].values, df['b'].values)"
1016 |    ]
1017 |   },
1018 |   {
1019 |    "cell_type": "code",
1020 |    "execution_count": null,
1021 |    "metadata": {},
1022 |    "outputs": [],
1023 |    "source": []
1024 |   }
1025 |  ],
1026 |  "metadata": {
1027 |   "kernelspec": {
1028 |    "display_name": "Python 3",
1029 |    "language": "python",
1030 |    "name": "python3"
1031 |   },
1032 |   "language_info": {
1033 |    "codemirror_mode": {
1034 |     "name": "ipython",
1035 |     "version": 3
1036 |    },
1037 |    "file_extension": ".py",
1038 |    "mimetype": "text/x-python",
1039 |    "name": "python",
1040 |    "nbconvert_exporter": "python",
1041 |    "pygments_lexer": "ipython3",
1042 |    "version": "3.7.3"
1043 |   }
1044 |  },
1045 |  "nbformat": 4,
1046 |  "nbformat_minor": 2
1047 | }
1048 | 


--------------------------------------------------------------------------------
/notebooks/05-model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import seaborn as sns\n",
 11 |     "from sklearn import linear_model"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 4,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "tips = sns.load_dataset('tips')"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 5,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>total_bill</th>\n",
 50 |        "      <th>tip</th>\n",
 51 |        "      <th>sex</th>\n",
 52 |        "      <th>smoker</th>\n",
 53 |        "      <th>day</th>\n",
 54 |        "      <th>time</th>\n",
 55 |        "      <th>size</th>\n",
 56 |        "    </tr>\n",
 57 |        "  </thead>\n",
 58 |        "  <tbody>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>0</th>\n",
 61 |        "      <td>16.99</td>\n",
 62 |        "      <td>1.01</td>\n",
 63 |        "      <td>Female</td>\n",
 64 |        "      <td>No</td>\n",
 65 |        "      <td>Sun</td>\n",
 66 |        "      <td>Dinner</td>\n",
 67 |        "      <td>2</td>\n",
 68 |        "    </tr>\n",
 69 |        "    <tr>\n",
 70 |        "      <th>1</th>\n",
 71 |        "      <td>10.34</td>\n",
 72 |        "      <td>1.66</td>\n",
 73 |        "      <td>Male</td>\n",
 74 |        "      <td>No</td>\n",
 75 |        "      <td>Sun</td>\n",
 76 |        "      <td>Dinner</td>\n",
 77 |        "      <td>3</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>2</th>\n",
 81 |        "      <td>21.01</td>\n",
 82 |        "      <td>3.50</td>\n",
 83 |        "      <td>Male</td>\n",
 84 |        "      <td>No</td>\n",
 85 |        "      <td>Sun</td>\n",
 86 |        "      <td>Dinner</td>\n",
 87 |        "      <td>3</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>3</th>\n",
 91 |        "      <td>23.68</td>\n",
 92 |        "      <td>3.31</td>\n",
 93 |        "      <td>Male</td>\n",
 94 |        "      <td>No</td>\n",
 95 |        "      <td>Sun</td>\n",
 96 |        "      <td>Dinner</td>\n",
 97 |        "      <td>2</td>\n",
 98 |        "    </tr>\n",
 99 |        "    <tr>\n",
100 |        "      <th>4</th>\n",
101 |        "      <td>24.59</td>\n",
102 |        "      <td>3.61</td>\n",
103 |        "      <td>Female</td>\n",
104 |        "      <td>No</td>\n",
105 |        "      <td>Sun</td>\n",
106 |        "      <td>Dinner</td>\n",
107 |        "      <td>4</td>\n",
108 |        "    </tr>\n",
109 |        "  </tbody>\n",
110 |        "</table>\n",
111 |        "</div>"
112 |       ],
113 |       "text/plain": [
114 |        "   total_bill   tip     sex smoker  day    time  size\n",
115 |        "0       16.99  1.01  Female     No  Sun  Dinner     2\n",
116 |        "1       10.34  1.66    Male     No  Sun  Dinner     3\n",
117 |        "2       21.01  3.50    Male     No  Sun  Dinner     3\n",
118 |        "3       23.68  3.31    Male     No  Sun  Dinner     2\n",
119 |        "4       24.59  3.61  Female     No  Sun  Dinner     4"
120 |       ]
121 |      },
122 |      "execution_count": 5,
123 |      "metadata": {},
124 |      "output_type": "execute_result"
125 |     }
126 |    ],
127 |    "source": [
128 |     "tips.head()"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 6,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "lr = linear_model.LinearRegression()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 11,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
149 |        "         normalize=False)"
150 |       ]
151 |      },
152 |      "execution_count": 11,
153 |      "metadata": {},
154 |      "output_type": "execute_result"
155 |     }
156 |    ],
157 |    "source": [
158 |     "lr.fit(X=tips[['total_bill', 'size']], y=tips['tip'])"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 8,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "array([0.09271334, 0.19259779])"
170 |       ]
171 |      },
172 |      "execution_count": 8,
173 |      "metadata": {},
174 |      "output_type": "execute_result"
175 |     }
176 |    ],
177 |    "source": [
178 |     "lr.coef_"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 9,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "data": {
188 |       "text/plain": [
189 |        "0.6689447408125027"
190 |       ]
191 |      },
192 |      "execution_count": 9,
193 |      "metadata": {},
194 |      "output_type": "execute_result"
195 |     }
196 |    ],
197 |    "source": [
198 |     "lr.intercept_"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 12,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# dummy encoding\n",
208 |     "# one-hot encoding"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 16,
214 |    "metadata": {},
215 |    "outputs": [
216 |     {
217 |      "data": {
218 |       "text/html": [
219 |        "<div>\n",
220 |        "<style scoped>\n",
221 |        "    .dataframe tbody tr th:only-of-type {\n",
222 |        "        vertical-align: middle;\n",
223 |        "    }\n",
224 |        "\n",
225 |        "    .dataframe tbody tr th {\n",
226 |        "        vertical-align: top;\n",
227 |        "    }\n",
228 |        "\n",
229 |        "    .dataframe thead th {\n",
230 |        "        text-align: right;\n",
231 |        "    }\n",
232 |        "</style>\n",
233 |        "<table border=\"1\" class=\"dataframe\">\n",
234 |        "  <thead>\n",
235 |        "    <tr style=\"text-align: right;\">\n",
236 |        "      <th></th>\n",
237 |        "      <th>total_bill</th>\n",
238 |        "      <th>tip</th>\n",
239 |        "      <th>size</th>\n",
240 |        "      <th>sex_Female</th>\n",
241 |        "      <th>smoker_No</th>\n",
242 |        "      <th>day_Fri</th>\n",
243 |        "      <th>day_Sat</th>\n",
244 |        "      <th>day_Sun</th>\n",
245 |        "      <th>time_Dinner</th>\n",
246 |        "    </tr>\n",
247 |        "  </thead>\n",
248 |        "  <tbody>\n",
249 |        "    <tr>\n",
250 |        "      <th>0</th>\n",
251 |        "      <td>16.99</td>\n",
252 |        "      <td>1.01</td>\n",
253 |        "      <td>2</td>\n",
254 |        "      <td>1</td>\n",
255 |        "      <td>1</td>\n",
256 |        "      <td>0</td>\n",
257 |        "      <td>0</td>\n",
258 |        "      <td>1</td>\n",
259 |        "      <td>1</td>\n",
260 |        "    </tr>\n",
261 |        "    <tr>\n",
262 |        "      <th>1</th>\n",
263 |        "      <td>10.34</td>\n",
264 |        "      <td>1.66</td>\n",
265 |        "      <td>3</td>\n",
266 |        "      <td>0</td>\n",
267 |        "      <td>1</td>\n",
268 |        "      <td>0</td>\n",
269 |        "      <td>0</td>\n",
270 |        "      <td>1</td>\n",
271 |        "      <td>1</td>\n",
272 |        "    </tr>\n",
273 |        "    <tr>\n",
274 |        "      <th>2</th>\n",
275 |        "      <td>21.01</td>\n",
276 |        "      <td>3.50</td>\n",
277 |        "      <td>3</td>\n",
278 |        "      <td>0</td>\n",
279 |        "      <td>1</td>\n",
280 |        "      <td>0</td>\n",
281 |        "      <td>0</td>\n",
282 |        "      <td>1</td>\n",
283 |        "      <td>1</td>\n",
284 |        "    </tr>\n",
285 |        "    <tr>\n",
286 |        "      <th>3</th>\n",
287 |        "      <td>23.68</td>\n",
288 |        "      <td>3.31</td>\n",
289 |        "      <td>2</td>\n",
290 |        "      <td>0</td>\n",
291 |        "      <td>1</td>\n",
292 |        "      <td>0</td>\n",
293 |        "      <td>0</td>\n",
294 |        "      <td>1</td>\n",
295 |        "      <td>1</td>\n",
296 |        "    </tr>\n",
297 |        "    <tr>\n",
298 |        "      <th>4</th>\n",
299 |        "      <td>24.59</td>\n",
300 |        "      <td>3.61</td>\n",
301 |        "      <td>4</td>\n",
302 |        "      <td>1</td>\n",
303 |        "      <td>1</td>\n",
304 |        "      <td>0</td>\n",
305 |        "      <td>0</td>\n",
306 |        "      <td>1</td>\n",
307 |        "      <td>1</td>\n",
308 |        "    </tr>\n",
309 |        "  </tbody>\n",
310 |        "</table>\n",
311 |        "</div>"
312 |       ],
313 |       "text/plain": [
314 |        "   total_bill   tip  size  sex_Female  smoker_No  day_Fri  day_Sat  day_Sun  \\\n",
315 |        "0       16.99  1.01     2           1          1        0        0        1   \n",
316 |        "1       10.34  1.66     3           0          1        0        0        1   \n",
317 |        "2       21.01  3.50     3           0          1        0        0        1   \n",
318 |        "3       23.68  3.31     2           0          1        0        0        1   \n",
319 |        "4       24.59  3.61     4           1          1        0        0        1   \n",
320 |        "\n",
321 |        "   time_Dinner  \n",
322 |        "0            1  \n",
323 |        "1            1  \n",
324 |        "2            1  \n",
325 |        "3            1  \n",
326 |        "4            1  "
327 |       ]
328 |      },
329 |      "execution_count": 16,
330 |      "metadata": {},
331 |      "output_type": "execute_result"
332 |     }
333 |    ],
334 |    "source": [
335 |     "tips_dummy = pd.get_dummies(tips, drop_first=True)\n",
336 |     "tips_dummy.head()"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 19,
342 |    "metadata": {},
343 |    "outputs": [
344 |     {
345 |      "data": {
346 |       "text/plain": [
347 |        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
348 |        "         normalize=False)"
349 |       ]
350 |      },
351 |      "execution_count": 19,
352 |      "metadata": {},
353 |      "output_type": "execute_result"
354 |     }
355 |    ],
356 |    "source": [
357 |     "lr = linear_model.LinearRegression()\n",
358 |     "lr.fit(X=tips_dummy.iloc[:, 2:], y=tips_dummy['tip'])"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 20,
364 |    "metadata": {},
365 |    "outputs": [
366 |     {
367 |      "data": {
368 |       "text/plain": [
369 |        "array([ 0.71001644, -0.10057881, -0.20916402, -0.20180568, -0.36603136,\n",
370 |        "       -0.29452609,  0.48575489])"
371 |       ]
372 |      },
373 |      "execution_count": 20,
374 |      "metadata": {},
375 |      "output_type": "execute_result"
376 |     }
377 |    ],
378 |    "source": [
379 |     "lr.coef_"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": null,
385 |    "metadata": {},
386 |    "outputs": [],
387 |    "source": []
388 |   }
389 |  ],
390 |  "metadata": {
391 |   "kernelspec": {
392 |    "display_name": "Python 3",
393 |    "language": "python",
394 |    "name": "python3"
395 |   },
396 |   "language_info": {
397 |    "codemirror_mode": {
398 |     "name": "ipython",
399 |     "version": 3
400 |    },
401 |    "file_extension": ".py",
402 |    "mimetype": "text/x-python",
403 |    "name": "python",
404 |    "nbconvert_exporter": "python",
405 |    "pygments_lexer": "ipython3",
406 |    "version": "3.7.3"
407 |   }
408 |  },
409 |  "nbformat": 4,
410 |  "nbformat_minor": 2
411 | }
412 | 


--------------------------------------------------------------------------------
/notes/03-apply.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "source": [
  6 |         "# writing a python function"
  7 |       ],
  8 |       "outputs": [],
  9 |       "execution_count": 1,
 10 |       "metadata": {}
 11 |     },
 12 |     {
 13 |       "cell_type": "code",
 14 |       "source": [
 15 |         "def my_function():\n",
 16 |         "    pass"
 17 |       ],
 18 |       "outputs": [],
 19 |       "execution_count": 2,
 20 |       "metadata": {}
 21 |     },
 22 |     {
 23 |       "cell_type": "code",
 24 |       "source": [
 25 |         "def my_sq(x):\n",
 26 |         "    return x ** 2"
 27 |       ],
 28 |       "outputs": [],
 29 |       "execution_count": 3,
 30 |       "metadata": {}
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "source": [
 35 |         "my_sq(4)"
 36 |       ],
 37 |       "outputs": [
 38 |         {
 39 |           "output_type": "execute_result",
 40 |           "execution_count": 4,
 41 |           "data": {
 42 |             "text/plain": [
 43 |               "16"
 44 |             ]
 45 |           },
 46 |           "metadata": {}
 47 |         }
 48 |       ],
 49 |       "execution_count": 4,
 50 |       "metadata": {}
 51 |     },
 52 |     {
 53 |       "cell_type": "code",
 54 |       "source": [
 55 |         "assert my_sq(4) == 16"
 56 |       ],
 57 |       "outputs": [],
 58 |       "execution_count": 5,
 59 |       "metadata": {}
 60 |     },
 61 |     {
 62 |       "cell_type": "code",
 63 |       "source": [
 64 |         "def avg_2(x, y):\n",
 65 |         "    return (x + y) / 2"
 66 |       ],
 67 |       "outputs": [],
 68 |       "execution_count": 6,
 69 |       "metadata": {}
 70 |     },
 71 |     {
 72 |       "cell_type": "code",
 73 |       "source": [
 74 |         "avg_2(10, 20)"
 75 |       ],
 76 |       "outputs": [
 77 |         {
 78 |           "output_type": "execute_result",
 79 |           "execution_count": 7,
 80 |           "data": {
 81 |             "text/plain": [
 82 |               "15.0"
 83 |             ]
 84 |           },
 85 |           "metadata": {}
 86 |         }
 87 |       ],
 88 |       "execution_count": 7,
 89 |       "metadata": {}
 90 |     },
 91 |     {
 92 |       "cell_type": "code",
 93 |       "source": [
 94 |         "import pandas as pd"
 95 |       ],
 96 |       "outputs": [],
 97 |       "execution_count": 8,
 98 |       "metadata": {}
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "source": [
103 |         "df = pd.DataFrame({\n",
104 |         "    'a': [10, 20, 30],\n",
105 |         "    'b': [20, 30, 40]\n",
106 |         "})"
107 |       ],
108 |       "outputs": [],
109 |       "execution_count": 9,
110 |       "metadata": {}
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "source": [
115 |         "df['a'] ** 2"
116 |       ],
117 |       "outputs": [
118 |         {
119 |           "output_type": "execute_result",
120 |           "execution_count": 10,
121 |           "data": {
122 |             "text/plain": [
123 |               "0    100\n",
124 |               "1    400\n",
125 |               "2    900\n",
126 |               "Name: a, dtype: int64"
127 |             ]
128 |           },
129 |           "metadata": {}
130 |         }
131 |       ],
132 |       "execution_count": 10,
133 |       "metadata": {}
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "source": [
138 |         "df['a'].apply(my_sq)"
139 |       ],
140 |       "outputs": [
141 |         {
142 |           "output_type": "execute_result",
143 |           "execution_count": 11,
144 |           "data": {
145 |             "text/plain": [
146 |               "0    100\n",
147 |               "1    400\n",
148 |               "2    900\n",
149 |               "Name: a, dtype: int64"
150 |             ]
151 |           },
152 |           "metadata": {}
153 |         }
154 |       ],
155 |       "execution_count": 11,
156 |       "metadata": {}
157 |     },
158 |     {
159 |       "cell_type": "code",
160 |       "source": [
161 |         "def my_exp(x, e):\n",
162 |         "    return x ** e"
163 |       ],
164 |       "outputs": [],
165 |       "execution_count": 12,
166 |       "metadata": {}
167 |     },
168 |     {
169 |       "cell_type": "code",
170 |       "source": [
171 |         "my_exp(4, 2)"
172 |       ],
173 |       "outputs": [
174 |         {
175 |           "output_type": "execute_result",
176 |           "execution_count": 13,
177 |           "data": {
178 |             "text/plain": [
179 |               "16"
180 |             ]
181 |           },
182 |           "metadata": {}
183 |         }
184 |       ],
185 |       "execution_count": 13,
186 |       "metadata": {}
187 |     },
188 |     {
189 |       "cell_type": "code",
190 |       "source": [
191 |         "my_exp(4, 3)"
192 |       ],
193 |       "outputs": [
194 |         {
195 |           "output_type": "execute_result",
196 |           "execution_count": 14,
197 |           "data": {
198 |             "text/plain": [
199 |               "64"
200 |             ]
201 |           },
202 |           "metadata": {}
203 |         }
204 |       ],
205 |       "execution_count": 14,
206 |       "metadata": {}
207 |     },
208 |     {
209 |       "cell_type": "code",
210 |       "source": [
211 |         "df['a'].apply(my_exp, e=4)"
212 |       ],
213 |       "outputs": [
214 |         {
215 |           "output_type": "execute_result",
216 |           "execution_count": 15,
217 |           "data": {
218 |             "text/plain": [
219 |               "0     10000\n",
220 |               "1    160000\n",
221 |               "2    810000\n",
222 |               "Name: a, dtype: int64"
223 |             ]
224 |           },
225 |           "metadata": {}
226 |         }
227 |       ],
228 |       "execution_count": 15,
229 |       "metadata": {}
230 |     },
231 |     {
232 |       "cell_type": "code",
233 |       "source": [
234 |         "def print_me(x):\n",
235 |         "    print(x)"
236 |       ],
237 |       "outputs": [],
238 |       "execution_count": 16,
239 |       "metadata": {}
240 |     },
241 |     {
242 |       "cell_type": "code",
243 |       "source": [
244 |         "df.apply(print_me)"
245 |       ],
246 |       "outputs": [
247 |         {
248 |           "output_type": "stream",
249 |           "name": "stdout",
250 |           "text": [
251 |             "0    10\n",
252 |             "1    20\n",
253 |             "2    30\n",
254 |             "Name: a, dtype: int64\n",
255 |             "0    20\n",
256 |             "1    30\n",
257 |             "2    40\n",
258 |             "Name: b, dtype: int64\n"
259 |           ]
260 |         },
261 |         {
262 |           "output_type": "execute_result",
263 |           "execution_count": 17,
264 |           "data": {
265 |             "text/plain": [
266 |               "a    None\n",
267 |               "b    None\n",
268 |               "dtype: object"
269 |             ]
270 |           },
271 |           "metadata": {}
272 |         }
273 |       ],
274 |       "execution_count": 17,
275 |       "metadata": {}
276 |     },
277 |     {
278 |       "cell_type": "code",
279 |       "source": [
280 |         "def avg_3(x, y, z):\n",
281 |         "    return (x + y + z) / 3"
282 |       ],
283 |       "outputs": [],
284 |       "execution_count": 18,
285 |       "metadata": {}
286 |     },
287 |     {
288 |       "cell_type": "code",
289 |       "source": [
290 |         "df.apply(avg_3)"
291 |       ],
292 |       "outputs": [
293 |         {
294 |           "output_type": "error",
295 |           "ename": "TypeError",
296 |           "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')",
297 |           "traceback": [
298 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
299 |             "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
300 |             "\u001b[0;32m<ipython-input-19-c00f379f766c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
301 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m   6485\u001b[0m                          \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6486\u001b[0m                          kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6489\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
302 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    149\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
303 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    256\u001b[0m         \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m         \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
304 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    284\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m                     \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    287\u001b[0m                     \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
305 |             "\u001b[0;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')"
306 |           ]
307 |         }
308 |       ],
309 |       "execution_count": 19,
310 |       "metadata": {}
311 |     },
312 |     {
313 |       "cell_type": "code",
314 |       "source": [
315 |         "import numpy as np"
316 |       ],
317 |       "outputs": [],
318 |       "execution_count": 20,
319 |       "metadata": {}
320 |     },
321 |     {
322 |       "cell_type": "code",
323 |       "source": [
324 |         "def avg_3_apply(col):\n",
325 |         "    return np.mean(col)"
326 |       ],
327 |       "outputs": [],
328 |       "execution_count": 21,
329 |       "metadata": {}
330 |     },
331 |     {
332 |       "cell_type": "code",
333 |       "source": [
334 |         "df.apply(avg_3_apply)"
335 |       ],
336 |       "outputs": [
337 |         {
338 |           "output_type": "execute_result",
339 |           "execution_count": 22,
340 |           "data": {
341 |             "text/plain": [
342 |               "a    20.0\n",
343 |               "b    30.0\n",
344 |               "dtype: float64"
345 |             ]
346 |           },
347 |           "metadata": {}
348 |         }
349 |       ],
350 |       "execution_count": 22,
351 |       "metadata": {}
352 |     },
353 |     {
354 |       "cell_type": "code",
355 |       "source": [
356 |         "def avg_3_apply(col):\n",
357 |         "    x = col[0]\n",
358 |         "    y = col[1]\n",
359 |         "    z = col[2]\n",
360 |         "    return (x + y + z) / 3"
361 |       ],
362 |       "outputs": [],
363 |       "execution_count": 23,
364 |       "metadata": {}
365 |     },
366 |     {
367 |       "cell_type": "code",
368 |       "source": [
369 |         "df.apply(avg_3_apply)"
370 |       ],
371 |       "outputs": [
372 |         {
373 |           "output_type": "execute_result",
374 |           "execution_count": 24,
375 |           "data": {
376 |             "text/plain": [
377 |               "a    20.0\n",
378 |               "b    30.0\n",
379 |               "dtype: float64"
380 |             ]
381 |           },
382 |           "metadata": {}
383 |         }
384 |       ],
385 |       "execution_count": 24,
386 |       "metadata": {}
387 |     },
388 |     {
389 |       "cell_type": "code",
390 |       "source": [
391 |         "df.apply(avg_3_apply, axis='columns')"
392 |       ],
393 |       "outputs": [
394 |         {
395 |           "output_type": "error",
396 |           "ename": "IndexError",
397 |           "evalue": "('index out of bounds', 'occurred at index 0')",
398 |           "traceback": [
399 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
400 |             "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
401 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4374\u001b[0m             return self._engine.get_value(s, k,\n\u001b[0;32m-> 4375\u001b[0;31m                                           tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m   4376\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
402 |             "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
403 |             "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
404 |             "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
405 |             "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
406 |             "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
407 |             "\u001b[0;31mKeyError\u001b[0m: 2",
408 |             "\nDuring handling of the above exception, another exception occurred:\n",
409 |             "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
410 |             "\u001b[0;32m<ipython-input-25-6adb37282b5f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3_apply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'columns'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
411 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m   6485\u001b[0m                          \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6486\u001b[0m                          kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6489\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
412 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    149\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
413 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    256\u001b[0m         \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m         \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
414 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    284\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m                     \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    287\u001b[0m                     \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
415 |             "\u001b[0;32m<ipython-input-23-dc85aaf18e74>\u001b[0m in \u001b[0;36mavg_3_apply\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
416 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    866\u001b[0m         \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    867\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 868\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    869\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    870\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
417 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4380\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4381\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_box\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4382\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4383\u001b[0m                 \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
418 |             "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_box\u001b[0;34m()\u001b[0m\n",
419 |             "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
420 |             "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n",
421 |             "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n",
422 |             "\u001b[0;31mIndexError\u001b[0m: ('index out of bounds', 'occurred at index 0')"
423 |           ]
424 |         }
425 |       ],
426 |       "execution_count": 25,
427 |       "metadata": {}
428 |     },
429 |     {
430 |       "cell_type": "code",
431 |       "source": [
432 |         "df['a'].mean()"
433 |       ],
434 |       "outputs": [
435 |         {
436 |           "output_type": "execute_result",
437 |           "execution_count": 26,
438 |           "data": {
439 |             "text/plain": [
440 |               "20.0"
441 |             ]
442 |           },
443 |           "metadata": {}
444 |         }
445 |       ],
446 |       "execution_count": 26,
447 |       "metadata": {}
448 |     },
449 |     {
450 |       "cell_type": "code",
451 |       "source": [
452 |         "df['a'] + df['b']"
453 |       ],
454 |       "outputs": [
455 |         {
456 |           "output_type": "execute_result",
457 |           "execution_count": 27,
458 |           "data": {
459 |             "text/plain": [
460 |               "0    30\n",
461 |               "1    50\n",
462 |               "2    70\n",
463 |               "dtype: int64"
464 |             ]
465 |           },
466 |           "metadata": {}
467 |         }
468 |       ],
469 |       "execution_count": 27,
470 |       "metadata": {}
471 |     },
472 |     {
473 |       "cell_type": "code",
474 |       "source": [
475 |         "def avg_2_mod(x, y):\n",
476 |         "    if (x == 20):\n",
477 |         "        return np.NaN\n",
478 |         "    else:\n",
479 |         "        return (x + y) / 2"
480 |       ],
481 |       "outputs": [],
482 |       "execution_count": 28,
483 |       "metadata": {}
484 |     },
485 |     {
486 |       "cell_type": "code",
487 |       "source": [
488 |         "avg_2(df['a'], df['b'])"
489 |       ],
490 |       "outputs": [
491 |         {
492 |           "output_type": "execute_result",
493 |           "execution_count": 29,
494 |           "data": {
495 |             "text/plain": [
496 |               "0    15.0\n",
497 |               "1    25.0\n",
498 |               "2    35.0\n",
499 |               "dtype: float64"
500 |             ]
501 |           },
502 |           "metadata": {}
503 |         }
504 |       ],
505 |       "execution_count": 29,
506 |       "metadata": {}
507 |     },
508 |     {
509 |       "cell_type": "code",
510 |       "source": [
511 |         "avg_2_mod(df['a'], df['b'])"
512 |       ],
513 |       "outputs": [
514 |         {
515 |           "output_type": "error",
516 |           "ename": "ValueError",
517 |           "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().",
518 |           "traceback": [
519 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
520 |             "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
521 |             "\u001b[0;32m<ipython-input-30-07c16a2d06e5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
522 |             "\u001b[0;32m<ipython-input-28-dad2352c143f>\u001b[0m in \u001b[0;36mavg_2_mod\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
523 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1476\u001b[0m         raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m   1477\u001b[0m                          \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1478\u001b[0;31m                          .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m   1479\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1480\u001b[0m     \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
524 |             "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
525 |           ]
526 |         }
527 |       ],
528 |       "execution_count": 30,
529 |       "metadata": {}
530 |     },
531 |     {
532 |       "cell_type": "code",
533 |       "source": [
534 |         "import numpy as np"
535 |       ],
536 |       "outputs": [],
537 |       "execution_count": 31,
538 |       "metadata": {}
539 |     },
540 |     {
541 |       "cell_type": "code",
542 |       "source": [
543 |         "avg_2_mod_vec = np.vectorize(avg_2_mod)"
544 |       ],
545 |       "outputs": [],
546 |       "execution_count": 32,
547 |       "metadata": {}
548 |     },
549 |     {
550 |       "cell_type": "code",
551 |       "source": [
552 |         "avg_2_mod_vec(df['a'], df['b'])"
553 |       ],
554 |       "outputs": [
555 |         {
556 |           "output_type": "execute_result",
557 |           "execution_count": 33,
558 |           "data": {
559 |             "text/plain": [
560 |               "array([15., nan, 35.])"
561 |             ]
562 |           },
563 |           "metadata": {}
564 |         }
565 |       ],
566 |       "execution_count": 33,
567 |       "metadata": {}
568 |     },
569 |     {
570 |       "cell_type": "code",
571 |       "source": [
572 |         "@np.vectorize\n",
573 |         "def v_avg_2_mod(x, y):\n",
574 |         "    if (x == 20):\n",
575 |         "        return np.NaN\n",
576 |         "    else:\n",
577 |         "        return (x + y) / 2"
578 |       ],
579 |       "outputs": [],
580 |       "execution_count": 34,
581 |       "metadata": {}
582 |     },
583 |     {
584 |       "cell_type": "code",
585 |       "source": [
586 |         "v_avg_2_mod(df['a'], df['b'])"
587 |       ],
588 |       "outputs": [
589 |         {
590 |           "output_type": "execute_result",
591 |           "execution_count": 35,
592 |           "data": {
593 |             "text/plain": [
594 |               "array([15., nan, 35.])"
595 |             ]
596 |           },
597 |           "metadata": {}
598 |         }
599 |       ],
600 |       "execution_count": 35,
601 |       "metadata": {}
602 |     },
603 |     {
604 |       "cell_type": "code",
605 |       "source": [
606 |         "import numba"
607 |       ],
608 |       "outputs": [],
609 |       "execution_count": 36,
610 |       "metadata": {}
611 |     },
612 |     {
613 |       "cell_type": "code",
614 |       "source": [
615 |         "@numba.vectorize\n",
616 |         "def v_avg_2_mod_numba(x, y):\n",
617 |         "    if (x == 20):\n",
618 |         "        return np.NaN\n",
619 |         "    else:\n",
620 |         "        return (x + y) / 2"
621 |       ],
622 |       "outputs": [],
623 |       "execution_count": 37,
624 |       "metadata": {}
625 |     },
626 |     {
627 |       "cell_type": "code",
628 |       "source": [
629 |         "v_avg_2_mod_numba(df['a'].values, df['b'].values)"
630 |       ],
631 |       "outputs": [
632 |         {
633 |           "output_type": "execute_result",
634 |           "execution_count": 38,
635 |           "data": {
636 |             "text/plain": [
637 |               "array([15., nan, 35.])"
638 |             ]
639 |           },
640 |           "metadata": {}
641 |         }
642 |       ],
643 |       "execution_count": 38,
644 |       "metadata": {}
645 |     },
646 |     {
647 |       "cell_type": "code",
648 |       "source": [
649 |         "%%timeit\n",
650 |         "avg_2(df['a'], df['b'])"
651 |       ],
652 |       "outputs": [
653 |         {
654 |           "output_type": "stream",
655 |           "name": "stdout",
656 |           "text": [
657 |             "519 µs ± 34.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
658 |           ]
659 |         }
660 |       ],
661 |       "execution_count": 39,
662 |       "metadata": {}
663 |     },
664 |     {
665 |       "cell_type": "code",
666 |       "source": [
667 |         "%%timeit\n",
668 |         "v_avg_2_mod(df['a'], df['b'])"
669 |       ],
670 |       "outputs": [
671 |         {
672 |           "output_type": "stream",
673 |           "name": "stdout",
674 |           "text": [
675 |             "236 µs ± 33.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
676 |           ]
677 |         }
678 |       ],
679 |       "execution_count": 40,
680 |       "metadata": {}
681 |     },
682 |     {
683 |       "cell_type": "code",
684 |       "source": [
685 |         "%%timeit\n",
686 |         "v_avg_2_mod_numba(df['a'].values, df['b'].values)"
687 |       ],
688 |       "outputs": [
689 |         {
690 |           "output_type": "stream",
691 |           "name": "stdout",
692 |           "text": [
693 |             "8.28 µs ± 550 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
694 |           ]
695 |         }
696 |       ],
697 |       "execution_count": 41,
698 |       "metadata": {}
699 |     },
700 |     {
701 |       "cell_type": "code",
702 |       "source": [],
703 |       "outputs": [],
704 |       "execution_count": 42,
705 |       "metadata": {}
706 |     }
707 |   ],
708 |   "metadata": {
709 |     "kernelspec": {
710 |       "name": "python3",
711 |       "language": "python",
712 |       "display_name": "Python 3"
713 |     },
714 |     "language_info": {
715 |       "name": "python",
716 |       "version": "3.7.3",
717 |       "mimetype": "text/x-python",
718 |       "codemirror_mode": {
719 |         "name": "ipython",
720 |         "version": 3
721 |       },
722 |       "pygments_lexer": "ipython3",
723 |       "nbconvert_exporter": "python",
724 |       "file_extension": ".py"
725 |     },
726 |     "kernel_info": {
727 |       "name": "python3"
728 |     },
729 |     "nteract": {
730 |       "version": "0.14.3"
731 |     }
732 |   },
733 |   "nbformat": 4,
734 |   "nbformat_minor": 2
735 | }


--------------------------------------------------------------------------------
/notes/05-models.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "code",
   5 |       "source": [
   6 |         "import pandas as pd"
   7 |       ],
   8 |       "outputs": [],
   9 |       "execution_count": 1,
  10 |       "metadata": {}
  11 |     },
  12 |     {
  13 |       "cell_type": "code",
  14 |       "source": [
  15 |         "import seaborn as sns"
  16 |       ],
  17 |       "outputs": [],
  18 |       "execution_count": 2,
  19 |       "metadata": {}
  20 |     },
  21 |     {
  22 |       "cell_type": "code",
  23 |       "source": [
  24 |         "tips = sns.load_dataset('tips')"
  25 |       ],
  26 |       "outputs": [],
  27 |       "execution_count": 3,
  28 |       "metadata": {}
  29 |     },
  30 |     {
  31 |       "cell_type": "code",
  32 |       "source": [
  33 |         "tips.head()"
  34 |       ],
  35 |       "outputs": [
  36 |         {
  37 |           "output_type": "execute_result",
  38 |           "execution_count": 4,
  39 |           "data": {
  40 |             "text/plain": [
  41 |               "   total_bill   tip     sex smoker  day    time  size\n",
  42 |               "0       16.99  1.01  Female     No  Sun  Dinner     2\n",
  43 |               "1       10.34  1.66    Male     No  Sun  Dinner     3\n",
  44 |               "2       21.01  3.50    Male     No  Sun  Dinner     3\n",
  45 |               "3       23.68  3.31    Male     No  Sun  Dinner     2\n",
  46 |               "4       24.59  3.61  Female     No  Sun  Dinner     4"
  47 |             ],
  48 |             "text/html": [
  49 |               "<div>\n",
  50 |               "<style scoped>\n",
  51 |               "    .dataframe tbody tr th:only-of-type {\n",
  52 |               "        vertical-align: middle;\n",
  53 |               "    }\n",
  54 |               "\n",
  55 |               "    .dataframe tbody tr th {\n",
  56 |               "        vertical-align: top;\n",
  57 |               "    }\n",
  58 |               "\n",
  59 |               "    .dataframe thead th {\n",
  60 |               "        text-align: right;\n",
  61 |               "    }\n",
  62 |               "</style>\n",
  63 |               "<table border=\"1\" class=\"dataframe\">\n",
  64 |               "  <thead>\n",
  65 |               "    <tr style=\"text-align: right;\">\n",
  66 |               "      <th></th>\n",
  67 |               "      <th>total_bill</th>\n",
  68 |               "      <th>tip</th>\n",
  69 |               "      <th>sex</th>\n",
  70 |               "      <th>smoker</th>\n",
  71 |               "      <th>day</th>\n",
  72 |               "      <th>time</th>\n",
  73 |               "      <th>size</th>\n",
  74 |               "    </tr>\n",
  75 |               "  </thead>\n",
  76 |               "  <tbody>\n",
  77 |               "    <tr>\n",
  78 |               "      <th>0</th>\n",
  79 |               "      <td>16.99</td>\n",
  80 |               "      <td>1.01</td>\n",
  81 |               "      <td>Female</td>\n",
  82 |               "      <td>No</td>\n",
  83 |               "      <td>Sun</td>\n",
  84 |               "      <td>Dinner</td>\n",
  85 |               "      <td>2</td>\n",
  86 |               "    </tr>\n",
  87 |               "    <tr>\n",
  88 |               "      <th>1</th>\n",
  89 |               "      <td>10.34</td>\n",
  90 |               "      <td>1.66</td>\n",
  91 |               "      <td>Male</td>\n",
  92 |               "      <td>No</td>\n",
  93 |               "      <td>Sun</td>\n",
  94 |               "      <td>Dinner</td>\n",
  95 |               "      <td>3</td>\n",
  96 |               "    </tr>\n",
  97 |               "    <tr>\n",
  98 |               "      <th>2</th>\n",
  99 |               "      <td>21.01</td>\n",
 100 |               "      <td>3.50</td>\n",
 101 |               "      <td>Male</td>\n",
 102 |               "      <td>No</td>\n",
 103 |               "      <td>Sun</td>\n",
 104 |               "      <td>Dinner</td>\n",
 105 |               "      <td>3</td>\n",
 106 |               "    </tr>\n",
 107 |               "    <tr>\n",
 108 |               "      <th>3</th>\n",
 109 |               "      <td>23.68</td>\n",
 110 |               "      <td>3.31</td>\n",
 111 |               "      <td>Male</td>\n",
 112 |               "      <td>No</td>\n",
 113 |               "      <td>Sun</td>\n",
 114 |               "      <td>Dinner</td>\n",
 115 |               "      <td>2</td>\n",
 116 |               "    </tr>\n",
 117 |               "    <tr>\n",
 118 |               "      <th>4</th>\n",
 119 |               "      <td>24.59</td>\n",
 120 |               "      <td>3.61</td>\n",
 121 |               "      <td>Female</td>\n",
 122 |               "      <td>No</td>\n",
 123 |               "      <td>Sun</td>\n",
 124 |               "      <td>Dinner</td>\n",
 125 |               "      <td>4</td>\n",
 126 |               "    </tr>\n",
 127 |               "  </tbody>\n",
 128 |               "</table>\n",
 129 |               "</div>"
 130 |             ]
 131 |           },
 132 |           "metadata": {}
 133 |         }
 134 |       ],
 135 |       "execution_count": 4,
 136 |       "metadata": {}
 137 |     },
 138 |     {
 139 |       "cell_type": "code",
 140 |       "source": [
 141 |         "from sklearn import linear_model"
 142 |       ],
 143 |       "outputs": [],
 144 |       "execution_count": 5,
 145 |       "metadata": {}
 146 |     },
 147 |     {
 148 |       "cell_type": "code",
 149 |       "source": [
 150 |         "lr = linear_model.LinearRegression()"
 151 |       ],
 152 |       "outputs": [],
 153 |       "execution_count": 6,
 154 |       "metadata": {}
 155 |     },
 156 |     {
 157 |       "cell_type": "code",
 158 |       "source": [
 159 |         "lr.fit(X=tips['total_bill'], y=tips['tip'])"
 160 |       ],
 161 |       "outputs": [
 162 |         {
 163 |           "output_type": "error",
 164 |           "ename": "ValueError",
 165 |           "evalue": "Expected 2D array, got 1D array instead:\narray=[16.99 10.34 21.01 23.68 24.59 25.29  8.77 26.88 15.04 14.78 10.27 35.26\n 15.42 18.43 14.83 21.58 10.33 16.29 16.97 20.65 17.92 20.29 15.77 39.42\n 19.82 17.81 13.37 12.69 21.7  19.65  9.55 18.35 15.06 20.69 17.78 24.06\n 16.31 16.93 18.69 31.27 16.04 17.46 13.94  9.68 30.4  18.29 22.23 32.4\n 28.55 18.04 12.54 10.29 34.81  9.94 25.56 19.49 38.01 26.41 11.24 48.27\n 20.29 13.81 11.02 18.29 17.59 20.08 16.45  3.07 20.23 15.01 12.02 17.07\n 26.86 25.28 14.73 10.51 17.92 27.2  22.76 17.29 19.44 16.66 10.07 32.68\n 15.98 34.83 13.03 18.28 24.71 21.16 28.97 22.49  5.75 16.32 22.75 40.17\n 27.28 12.03 21.01 12.46 11.35 15.38 44.3  22.42 20.92 15.36 20.49 25.21\n 18.24 14.31 14.    7.25 38.07 23.95 25.71 17.31 29.93 10.65 12.43 24.08\n 11.69 13.42 14.26 15.95 12.48 29.8   8.52 14.52 11.38 22.82 19.08 20.27\n 11.17 12.26 18.26  8.51 10.33 14.15 16.   13.16 17.47 34.3  41.19 27.05\n 16.43  8.35 18.64 11.87  9.78  7.51 14.07 13.13 17.26 24.55 19.77 29.85\n 48.17 25.   13.39 16.49 21.5  12.66 16.21 13.81 17.51 24.52 20.76 31.71\n 10.59 10.63 50.81 15.81  7.25 31.85 16.82 32.9  17.89 14.48  9.6  34.63\n 34.65 23.33 45.35 23.17 40.55 20.69 20.9  30.46 18.15 23.1  15.69 19.81\n 28.44 15.48 16.58  7.56 10.34 43.11 13.   13.51 18.71 12.74 13.   16.4\n 20.53 16.47 26.59 38.73 24.27 12.76 30.06 25.89 48.33 13.27 28.17 12.9\n 28.15 11.59  7.74 30.14 12.16 13.42  8.58 15.98 13.42 16.27 10.09 20.45\n 13.28 22.12 24.01 15.69 11.61 10.77 15.53 10.07 12.6  32.83 35.83 29.03\n 27.18 22.67 17.82 18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.",
 166 |           "traceback": [
 167 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 168 |             "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
 169 |             "\u001b[0;32m<ipython-input-7-6744e5443be8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_bill'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tip'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 170 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    456\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    457\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 458\u001b[0;31m                          y_numeric=True, multi_output=True)\n\u001b[0m\u001b[1;32m    459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    460\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 171 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    754\u001b[0m                     \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    755\u001b[0m                     \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 756\u001b[0;31m                     estimator=estimator)\n\u001b[0m\u001b[1;32m    757\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    758\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
 172 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    550\u001b[0m                     \u001b[0;34m\"Reshape your data either using array.reshape(-1, 1) if \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    551\u001b[0m                     \u001b[0;34m\"your data has a single feature or array.reshape(1, -1) \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 552\u001b[0;31m                     \"if it contains a single sample.\".format(array))\n\u001b[0m\u001b[1;32m    553\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    554\u001b[0m         \u001b[0;31m# in the future np.flexible dtypes will be handled like object dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 173 |             "\u001b[0;31mValueError\u001b[0m: Expected 2D array, got 1D array instead:\narray=[16.99 10.34 21.01 23.68 24.59 25.29  8.77 26.88 15.04 14.78 10.27 35.26\n 15.42 18.43 14.83 21.58 10.33 16.29 16.97 20.65 17.92 20.29 15.77 39.42\n 19.82 17.81 13.37 12.69 21.7  19.65  9.55 18.35 15.06 20.69 17.78 24.06\n 16.31 16.93 18.69 31.27 16.04 17.46 13.94  9.68 30.4  18.29 22.23 32.4\n 28.55 18.04 12.54 10.29 34.81  9.94 25.56 19.49 38.01 26.41 11.24 48.27\n 20.29 13.81 11.02 18.29 17.59 20.08 16.45  3.07 20.23 15.01 12.02 17.07\n 26.86 25.28 14.73 10.51 17.92 27.2  22.76 17.29 19.44 16.66 10.07 32.68\n 15.98 34.83 13.03 18.28 24.71 21.16 28.97 22.49  5.75 16.32 22.75 40.17\n 27.28 12.03 21.01 12.46 11.35 15.38 44.3  22.42 20.92 15.36 20.49 25.21\n 18.24 14.31 14.    7.25 38.07 23.95 25.71 17.31 29.93 10.65 12.43 24.08\n 11.69 13.42 14.26 15.95 12.48 29.8   8.52 14.52 11.38 22.82 19.08 20.27\n 11.17 12.26 18.26  8.51 10.33 14.15 16.   13.16 17.47 34.3  41.19 27.05\n 16.43  8.35 18.64 11.87  9.78  7.51 14.07 13.13 17.26 24.55 19.77 29.85\n 48.17 25.   13.39 16.49 21.5  12.66 16.21 13.81 17.51 24.52 20.76 31.71\n 10.59 10.63 50.81 15.81  7.25 31.85 16.82 32.9  17.89 14.48  9.6  34.63\n 34.65 23.33 45.35 23.17 40.55 20.69 20.9  30.46 18.15 23.1  15.69 19.81\n 28.44 15.48 16.58  7.56 10.34 43.11 13.   13.51 18.71 12.74 13.   16.4\n 20.53 16.47 26.59 38.73 24.27 12.76 30.06 25.89 48.33 13.27 28.17 12.9\n 28.15 11.59  7.74 30.14 12.16 13.42  8.58 15.98 13.42 16.27 10.09 20.45\n 13.28 22.12 24.01 15.69 11.61 10.77 15.53 10.07 12.6  32.83 35.83 29.03\n 27.18 22.67 17.82 18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample."
 174 |           ]
 175 |         }
 176 |       ],
 177 |       "execution_count": 7,
 178 |       "metadata": {}
 179 |     },
 180 |     {
 181 |       "cell_type": "code",
 182 |       "source": [
 183 |         "lr.fit(X=tips['total_bill'].values.reshape(-1, 1), y=tips['tip'])"
 184 |       ],
 185 |       "outputs": [
 186 |         {
 187 |           "output_type": "execute_result",
 188 |           "execution_count": 8,
 189 |           "data": {
 190 |             "text/plain": [
 191 |               "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
 192 |               "         normalize=False)"
 193 |             ]
 194 |           },
 195 |           "metadata": {}
 196 |         }
 197 |       ],
 198 |       "execution_count": 8,
 199 |       "metadata": {}
 200 |     },
 201 |     {
 202 |       "cell_type": "code",
 203 |       "source": [
 204 |         "lr.coef_"
 205 |       ],
 206 |       "outputs": [
 207 |         {
 208 |           "output_type": "execute_result",
 209 |           "execution_count": 9,
 210 |           "data": {
 211 |             "text/plain": [
 212 |               "array([0.10502452])"
 213 |             ]
 214 |           },
 215 |           "metadata": {}
 216 |         }
 217 |       ],
 218 |       "execution_count": 9,
 219 |       "metadata": {}
 220 |     },
 221 |     {
 222 |       "cell_type": "code",
 223 |       "source": [
 224 |         "lr.intercept_"
 225 |       ],
 226 |       "outputs": [
 227 |         {
 228 |           "output_type": "execute_result",
 229 |           "execution_count": 10,
 230 |           "data": {
 231 |             "text/plain": [
 232 |               "0.9202696135546731"
 233 |             ]
 234 |           },
 235 |           "metadata": {}
 236 |         }
 237 |       ],
 238 |       "execution_count": 10,
 239 |       "metadata": {}
 240 |     },
 241 |     {
 242 |       "cell_type": "code",
 243 |       "source": [
 244 |         "lr.fit(X=tips[['total_bill', 'size']], y=tips['tip'])"
 245 |       ],
 246 |       "outputs": [
 247 |         {
 248 |           "output_type": "execute_result",
 249 |           "execution_count": 11,
 250 |           "data": {
 251 |             "text/plain": [
 252 |               "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
 253 |               "         normalize=False)"
 254 |             ]
 255 |           },
 256 |           "metadata": {}
 257 |         }
 258 |       ],
 259 |       "execution_count": 11,
 260 |       "metadata": {}
 261 |     },
 262 |     {
 263 |       "cell_type": "code",
 264 |       "source": [
 265 |         "lr.coef_"
 266 |       ],
 267 |       "outputs": [
 268 |         {
 269 |           "output_type": "execute_result",
 270 |           "execution_count": 12,
 271 |           "data": {
 272 |             "text/plain": [
 273 |               "array([0.09271334, 0.19259779])"
 274 |             ]
 275 |           },
 276 |           "metadata": {}
 277 |         }
 278 |       ],
 279 |       "execution_count": 12,
 280 |       "metadata": {}
 281 |     },
 282 |     {
 283 |       "cell_type": "code",
 284 |       "source": [
 285 |         "tips"
 286 |       ],
 287 |       "outputs": [
 288 |         {
 289 |           "output_type": "execute_result",
 290 |           "execution_count": 13,
 291 |           "data": {
 292 |             "text/plain": [
 293 |               "     total_bill   tip     sex smoker   day    time  size\n",
 294 |               "0         16.99  1.01  Female     No   Sun  Dinner     2\n",
 295 |               "1         10.34  1.66    Male     No   Sun  Dinner     3\n",
 296 |               "2         21.01  3.50    Male     No   Sun  Dinner     3\n",
 297 |               "3         23.68  3.31    Male     No   Sun  Dinner     2\n",
 298 |               "4         24.59  3.61  Female     No   Sun  Dinner     4\n",
 299 |               "5         25.29  4.71    Male     No   Sun  Dinner     4\n",
 300 |               "6          8.77  2.00    Male     No   Sun  Dinner     2\n",
 301 |               "7         26.88  3.12    Male     No   Sun  Dinner     4\n",
 302 |               "8         15.04  1.96    Male     No   Sun  Dinner     2\n",
 303 |               "9         14.78  3.23    Male     No   Sun  Dinner     2\n",
 304 |               "10        10.27  1.71    Male     No   Sun  Dinner     2\n",
 305 |               "11        35.26  5.00  Female     No   Sun  Dinner     4\n",
 306 |               "12        15.42  1.57    Male     No   Sun  Dinner     2\n",
 307 |               "13        18.43  3.00    Male     No   Sun  Dinner     4\n",
 308 |               "14        14.83  3.02  Female     No   Sun  Dinner     2\n",
 309 |               "15        21.58  3.92    Male     No   Sun  Dinner     2\n",
 310 |               "16        10.33  1.67  Female     No   Sun  Dinner     3\n",
 311 |               "17        16.29  3.71    Male     No   Sun  Dinner     3\n",
 312 |               "18        16.97  3.50  Female     No   Sun  Dinner     3\n",
 313 |               "19        20.65  3.35    Male     No   Sat  Dinner     3\n",
 314 |               "20        17.92  4.08    Male     No   Sat  Dinner     2\n",
 315 |               "21        20.29  2.75  Female     No   Sat  Dinner     2\n",
 316 |               "22        15.77  2.23  Female     No   Sat  Dinner     2\n",
 317 |               "23        39.42  7.58    Male     No   Sat  Dinner     4\n",
 318 |               "24        19.82  3.18    Male     No   Sat  Dinner     2\n",
 319 |               "25        17.81  2.34    Male     No   Sat  Dinner     4\n",
 320 |               "26        13.37  2.00    Male     No   Sat  Dinner     2\n",
 321 |               "27        12.69  2.00    Male     No   Sat  Dinner     2\n",
 322 |               "28        21.70  4.30    Male     No   Sat  Dinner     2\n",
 323 |               "29        19.65  3.00  Female     No   Sat  Dinner     2\n",
 324 |               "..          ...   ...     ...    ...   ...     ...   ...\n",
 325 |               "214       28.17  6.50  Female    Yes   Sat  Dinner     3\n",
 326 |               "215       12.90  1.10  Female    Yes   Sat  Dinner     2\n",
 327 |               "216       28.15  3.00    Male    Yes   Sat  Dinner     5\n",
 328 |               "217       11.59  1.50    Male    Yes   Sat  Dinner     2\n",
 329 |               "218        7.74  1.44    Male    Yes   Sat  Dinner     2\n",
 330 |               "219       30.14  3.09  Female    Yes   Sat  Dinner     4\n",
 331 |               "220       12.16  2.20    Male    Yes   Fri   Lunch     2\n",
 332 |               "221       13.42  3.48  Female    Yes   Fri   Lunch     2\n",
 333 |               "222        8.58  1.92    Male    Yes   Fri   Lunch     1\n",
 334 |               "223       15.98  3.00  Female     No   Fri   Lunch     3\n",
 335 |               "224       13.42  1.58    Male    Yes   Fri   Lunch     2\n",
 336 |               "225       16.27  2.50  Female    Yes   Fri   Lunch     2\n",
 337 |               "226       10.09  2.00  Female    Yes   Fri   Lunch     2\n",
 338 |               "227       20.45  3.00    Male     No   Sat  Dinner     4\n",
 339 |               "228       13.28  2.72    Male     No   Sat  Dinner     2\n",
 340 |               "229       22.12  2.88  Female    Yes   Sat  Dinner     2\n",
 341 |               "230       24.01  2.00    Male    Yes   Sat  Dinner     4\n",
 342 |               "231       15.69  3.00    Male    Yes   Sat  Dinner     3\n",
 343 |               "232       11.61  3.39    Male     No   Sat  Dinner     2\n",
 344 |               "233       10.77  1.47    Male     No   Sat  Dinner     2\n",
 345 |               "234       15.53  3.00    Male    Yes   Sat  Dinner     2\n",
 346 |               "235       10.07  1.25    Male     No   Sat  Dinner     2\n",
 347 |               "236       12.60  1.00    Male    Yes   Sat  Dinner     2\n",
 348 |               "237       32.83  1.17    Male    Yes   Sat  Dinner     2\n",
 349 |               "238       35.83  4.67  Female     No   Sat  Dinner     3\n",
 350 |               "239       29.03  5.92    Male     No   Sat  Dinner     3\n",
 351 |               "240       27.18  2.00  Female    Yes   Sat  Dinner     2\n",
 352 |               "241       22.67  2.00    Male    Yes   Sat  Dinner     2\n",
 353 |               "242       17.82  1.75    Male     No   Sat  Dinner     2\n",
 354 |               "243       18.78  3.00  Female     No  Thur  Dinner     2\n",
 355 |               "\n",
 356 |               "[244 rows x 7 columns]"
 357 |             ],
 358 |             "text/html": [
 359 |               "<div>\n",
 360 |               "<style scoped>\n",
 361 |               "    .dataframe tbody tr th:only-of-type {\n",
 362 |               "        vertical-align: middle;\n",
 363 |               "    }\n",
 364 |               "\n",
 365 |               "    .dataframe tbody tr th {\n",
 366 |               "        vertical-align: top;\n",
 367 |               "    }\n",
 368 |               "\n",
 369 |               "    .dataframe thead th {\n",
 370 |               "        text-align: right;\n",
 371 |               "    }\n",
 372 |               "</style>\n",
 373 |               "<table border=\"1\" class=\"dataframe\">\n",
 374 |               "  <thead>\n",
 375 |               "    <tr style=\"text-align: right;\">\n",
 376 |               "      <th></th>\n",
 377 |               "      <th>total_bill</th>\n",
 378 |               "      <th>tip</th>\n",
 379 |               "      <th>sex</th>\n",
 380 |               "      <th>smoker</th>\n",
 381 |               "      <th>day</th>\n",
 382 |               "      <th>time</th>\n",
 383 |               "      <th>size</th>\n",
 384 |               "    </tr>\n",
 385 |               "  </thead>\n",
 386 |               "  <tbody>\n",
 387 |               "    <tr>\n",
 388 |               "      <th>0</th>\n",
 389 |               "      <td>16.99</td>\n",
 390 |               "      <td>1.01</td>\n",
 391 |               "      <td>Female</td>\n",
 392 |               "      <td>No</td>\n",
 393 |               "      <td>Sun</td>\n",
 394 |               "      <td>Dinner</td>\n",
 395 |               "      <td>2</td>\n",
 396 |               "    </tr>\n",
 397 |               "    <tr>\n",
 398 |               "      <th>1</th>\n",
 399 |               "      <td>10.34</td>\n",
 400 |               "      <td>1.66</td>\n",
 401 |               "      <td>Male</td>\n",
 402 |               "      <td>No</td>\n",
 403 |               "      <td>Sun</td>\n",
 404 |               "      <td>Dinner</td>\n",
 405 |               "      <td>3</td>\n",
 406 |               "    </tr>\n",
 407 |               "    <tr>\n",
 408 |               "      <th>2</th>\n",
 409 |               "      <td>21.01</td>\n",
 410 |               "      <td>3.50</td>\n",
 411 |               "      <td>Male</td>\n",
 412 |               "      <td>No</td>\n",
 413 |               "      <td>Sun</td>\n",
 414 |               "      <td>Dinner</td>\n",
 415 |               "      <td>3</td>\n",
 416 |               "    </tr>\n",
 417 |               "    <tr>\n",
 418 |               "      <th>3</th>\n",
 419 |               "      <td>23.68</td>\n",
 420 |               "      <td>3.31</td>\n",
 421 |               "      <td>Male</td>\n",
 422 |               "      <td>No</td>\n",
 423 |               "      <td>Sun</td>\n",
 424 |               "      <td>Dinner</td>\n",
 425 |               "      <td>2</td>\n",
 426 |               "    </tr>\n",
 427 |               "    <tr>\n",
 428 |               "      <th>4</th>\n",
 429 |               "      <td>24.59</td>\n",
 430 |               "      <td>3.61</td>\n",
 431 |               "      <td>Female</td>\n",
 432 |               "      <td>No</td>\n",
 433 |               "      <td>Sun</td>\n",
 434 |               "      <td>Dinner</td>\n",
 435 |               "      <td>4</td>\n",
 436 |               "    </tr>\n",
 437 |               "    <tr>\n",
 438 |               "      <th>5</th>\n",
 439 |               "      <td>25.29</td>\n",
 440 |               "      <td>4.71</td>\n",
 441 |               "      <td>Male</td>\n",
 442 |               "      <td>No</td>\n",
 443 |               "      <td>Sun</td>\n",
 444 |               "      <td>Dinner</td>\n",
 445 |               "      <td>4</td>\n",
 446 |               "    </tr>\n",
 447 |               "    <tr>\n",
 448 |               "      <th>6</th>\n",
 449 |               "      <td>8.77</td>\n",
 450 |               "      <td>2.00</td>\n",
 451 |               "      <td>Male</td>\n",
 452 |               "      <td>No</td>\n",
 453 |               "      <td>Sun</td>\n",
 454 |               "      <td>Dinner</td>\n",
 455 |               "      <td>2</td>\n",
 456 |               "    </tr>\n",
 457 |               "    <tr>\n",
 458 |               "      <th>7</th>\n",
 459 |               "      <td>26.88</td>\n",
 460 |               "      <td>3.12</td>\n",
 461 |               "      <td>Male</td>\n",
 462 |               "      <td>No</td>\n",
 463 |               "      <td>Sun</td>\n",
 464 |               "      <td>Dinner</td>\n",
 465 |               "      <td>4</td>\n",
 466 |               "    </tr>\n",
 467 |               "    <tr>\n",
 468 |               "      <th>8</th>\n",
 469 |               "      <td>15.04</td>\n",
 470 |               "      <td>1.96</td>\n",
 471 |               "      <td>Male</td>\n",
 472 |               "      <td>No</td>\n",
 473 |               "      <td>Sun</td>\n",
 474 |               "      <td>Dinner</td>\n",
 475 |               "      <td>2</td>\n",
 476 |               "    </tr>\n",
 477 |               "    <tr>\n",
 478 |               "      <th>9</th>\n",
 479 |               "      <td>14.78</td>\n",
 480 |               "      <td>3.23</td>\n",
 481 |               "      <td>Male</td>\n",
 482 |               "      <td>No</td>\n",
 483 |               "      <td>Sun</td>\n",
 484 |               "      <td>Dinner</td>\n",
 485 |               "      <td>2</td>\n",
 486 |               "    </tr>\n",
 487 |               "    <tr>\n",
 488 |               "      <th>10</th>\n",
 489 |               "      <td>10.27</td>\n",
 490 |               "      <td>1.71</td>\n",
 491 |               "      <td>Male</td>\n",
 492 |               "      <td>No</td>\n",
 493 |               "      <td>Sun</td>\n",
 494 |               "      <td>Dinner</td>\n",
 495 |               "      <td>2</td>\n",
 496 |               "    </tr>\n",
 497 |               "    <tr>\n",
 498 |               "      <th>11</th>\n",
 499 |               "      <td>35.26</td>\n",
 500 |               "      <td>5.00</td>\n",
 501 |               "      <td>Female</td>\n",
 502 |               "      <td>No</td>\n",
 503 |               "      <td>Sun</td>\n",
 504 |               "      <td>Dinner</td>\n",
 505 |               "      <td>4</td>\n",
 506 |               "    </tr>\n",
 507 |               "    <tr>\n",
 508 |               "      <th>12</th>\n",
 509 |               "      <td>15.42</td>\n",
 510 |               "      <td>1.57</td>\n",
 511 |               "      <td>Male</td>\n",
 512 |               "      <td>No</td>\n",
 513 |               "      <td>Sun</td>\n",
 514 |               "      <td>Dinner</td>\n",
 515 |               "      <td>2</td>\n",
 516 |               "    </tr>\n",
 517 |               "    <tr>\n",
 518 |               "      <th>13</th>\n",
 519 |               "      <td>18.43</td>\n",
 520 |               "      <td>3.00</td>\n",
 521 |               "      <td>Male</td>\n",
 522 |               "      <td>No</td>\n",
 523 |               "      <td>Sun</td>\n",
 524 |               "      <td>Dinner</td>\n",
 525 |               "      <td>4</td>\n",
 526 |               "    </tr>\n",
 527 |               "    <tr>\n",
 528 |               "      <th>14</th>\n",
 529 |               "      <td>14.83</td>\n",
 530 |               "      <td>3.02</td>\n",
 531 |               "      <td>Female</td>\n",
 532 |               "      <td>No</td>\n",
 533 |               "      <td>Sun</td>\n",
 534 |               "      <td>Dinner</td>\n",
 535 |               "      <td>2</td>\n",
 536 |               "    </tr>\n",
 537 |               "    <tr>\n",
 538 |               "      <th>15</th>\n",
 539 |               "      <td>21.58</td>\n",
 540 |               "      <td>3.92</td>\n",
 541 |               "      <td>Male</td>\n",
 542 |               "      <td>No</td>\n",
 543 |               "      <td>Sun</td>\n",
 544 |               "      <td>Dinner</td>\n",
 545 |               "      <td>2</td>\n",
 546 |               "    </tr>\n",
 547 |               "    <tr>\n",
 548 |               "      <th>16</th>\n",
 549 |               "      <td>10.33</td>\n",
 550 |               "      <td>1.67</td>\n",
 551 |               "      <td>Female</td>\n",
 552 |               "      <td>No</td>\n",
 553 |               "      <td>Sun</td>\n",
 554 |               "      <td>Dinner</td>\n",
 555 |               "      <td>3</td>\n",
 556 |               "    </tr>\n",
 557 |               "    <tr>\n",
 558 |               "      <th>17</th>\n",
 559 |               "      <td>16.29</td>\n",
 560 |               "      <td>3.71</td>\n",
 561 |               "      <td>Male</td>\n",
 562 |               "      <td>No</td>\n",
 563 |               "      <td>Sun</td>\n",
 564 |               "      <td>Dinner</td>\n",
 565 |               "      <td>3</td>\n",
 566 |               "    </tr>\n",
 567 |               "    <tr>\n",
 568 |               "      <th>18</th>\n",
 569 |               "      <td>16.97</td>\n",
 570 |               "      <td>3.50</td>\n",
 571 |               "      <td>Female</td>\n",
 572 |               "      <td>No</td>\n",
 573 |               "      <td>Sun</td>\n",
 574 |               "      <td>Dinner</td>\n",
 575 |               "      <td>3</td>\n",
 576 |               "    </tr>\n",
 577 |               "    <tr>\n",
 578 |               "      <th>19</th>\n",
 579 |               "      <td>20.65</td>\n",
 580 |               "      <td>3.35</td>\n",
 581 |               "      <td>Male</td>\n",
 582 |               "      <td>No</td>\n",
 583 |               "      <td>Sat</td>\n",
 584 |               "      <td>Dinner</td>\n",
 585 |               "      <td>3</td>\n",
 586 |               "    </tr>\n",
 587 |               "    <tr>\n",
 588 |               "      <th>20</th>\n",
 589 |               "      <td>17.92</td>\n",
 590 |               "      <td>4.08</td>\n",
 591 |               "      <td>Male</td>\n",
 592 |               "      <td>No</td>\n",
 593 |               "      <td>Sat</td>\n",
 594 |               "      <td>Dinner</td>\n",
 595 |               "      <td>2</td>\n",
 596 |               "    </tr>\n",
 597 |               "    <tr>\n",
 598 |               "      <th>21</th>\n",
 599 |               "      <td>20.29</td>\n",
 600 |               "      <td>2.75</td>\n",
 601 |               "      <td>Female</td>\n",
 602 |               "      <td>No</td>\n",
 603 |               "      <td>Sat</td>\n",
 604 |               "      <td>Dinner</td>\n",
 605 |               "      <td>2</td>\n",
 606 |               "    </tr>\n",
 607 |               "    <tr>\n",
 608 |               "      <th>22</th>\n",
 609 |               "      <td>15.77</td>\n",
 610 |               "      <td>2.23</td>\n",
 611 |               "      <td>Female</td>\n",
 612 |               "      <td>No</td>\n",
 613 |               "      <td>Sat</td>\n",
 614 |               "      <td>Dinner</td>\n",
 615 |               "      <td>2</td>\n",
 616 |               "    </tr>\n",
 617 |               "    <tr>\n",
 618 |               "      <th>23</th>\n",
 619 |               "      <td>39.42</td>\n",
 620 |               "      <td>7.58</td>\n",
 621 |               "      <td>Male</td>\n",
 622 |               "      <td>No</td>\n",
 623 |               "      <td>Sat</td>\n",
 624 |               "      <td>Dinner</td>\n",
 625 |               "      <td>4</td>\n",
 626 |               "    </tr>\n",
 627 |               "    <tr>\n",
 628 |               "      <th>24</th>\n",
 629 |               "      <td>19.82</td>\n",
 630 |               "      <td>3.18</td>\n",
 631 |               "      <td>Male</td>\n",
 632 |               "      <td>No</td>\n",
 633 |               "      <td>Sat</td>\n",
 634 |               "      <td>Dinner</td>\n",
 635 |               "      <td>2</td>\n",
 636 |               "    </tr>\n",
 637 |               "    <tr>\n",
 638 |               "      <th>25</th>\n",
 639 |               "      <td>17.81</td>\n",
 640 |               "      <td>2.34</td>\n",
 641 |               "      <td>Male</td>\n",
 642 |               "      <td>No</td>\n",
 643 |               "      <td>Sat</td>\n",
 644 |               "      <td>Dinner</td>\n",
 645 |               "      <td>4</td>\n",
 646 |               "    </tr>\n",
 647 |               "    <tr>\n",
 648 |               "      <th>26</th>\n",
 649 |               "      <td>13.37</td>\n",
 650 |               "      <td>2.00</td>\n",
 651 |               "      <td>Male</td>\n",
 652 |               "      <td>No</td>\n",
 653 |               "      <td>Sat</td>\n",
 654 |               "      <td>Dinner</td>\n",
 655 |               "      <td>2</td>\n",
 656 |               "    </tr>\n",
 657 |               "    <tr>\n",
 658 |               "      <th>27</th>\n",
 659 |               "      <td>12.69</td>\n",
 660 |               "      <td>2.00</td>\n",
 661 |               "      <td>Male</td>\n",
 662 |               "      <td>No</td>\n",
 663 |               "      <td>Sat</td>\n",
 664 |               "      <td>Dinner</td>\n",
 665 |               "      <td>2</td>\n",
 666 |               "    </tr>\n",
 667 |               "    <tr>\n",
 668 |               "      <th>28</th>\n",
 669 |               "      <td>21.70</td>\n",
 670 |               "      <td>4.30</td>\n",
 671 |               "      <td>Male</td>\n",
 672 |               "      <td>No</td>\n",
 673 |               "      <td>Sat</td>\n",
 674 |               "      <td>Dinner</td>\n",
 675 |               "      <td>2</td>\n",
 676 |               "    </tr>\n",
 677 |               "    <tr>\n",
 678 |               "      <th>29</th>\n",
 679 |               "      <td>19.65</td>\n",
 680 |               "      <td>3.00</td>\n",
 681 |               "      <td>Female</td>\n",
 682 |               "      <td>No</td>\n",
 683 |               "      <td>Sat</td>\n",
 684 |               "      <td>Dinner</td>\n",
 685 |               "      <td>2</td>\n",
 686 |               "    </tr>\n",
 687 |               "    <tr>\n",
 688 |               "      <th>...</th>\n",
 689 |               "      <td>...</td>\n",
 690 |               "      <td>...</td>\n",
 691 |               "      <td>...</td>\n",
 692 |               "      <td>...</td>\n",
 693 |               "      <td>...</td>\n",
 694 |               "      <td>...</td>\n",
 695 |               "      <td>...</td>\n",
 696 |               "    </tr>\n",
 697 |               "    <tr>\n",
 698 |               "      <th>214</th>\n",
 699 |               "      <td>28.17</td>\n",
 700 |               "      <td>6.50</td>\n",
 701 |               "      <td>Female</td>\n",
 702 |               "      <td>Yes</td>\n",
 703 |               "      <td>Sat</td>\n",
 704 |               "      <td>Dinner</td>\n",
 705 |               "      <td>3</td>\n",
 706 |               "    </tr>\n",
 707 |               "    <tr>\n",
 708 |               "      <th>215</th>\n",
 709 |               "      <td>12.90</td>\n",
 710 |               "      <td>1.10</td>\n",
 711 |               "      <td>Female</td>\n",
 712 |               "      <td>Yes</td>\n",
 713 |               "      <td>Sat</td>\n",
 714 |               "      <td>Dinner</td>\n",
 715 |               "      <td>2</td>\n",
 716 |               "    </tr>\n",
 717 |               "    <tr>\n",
 718 |               "      <th>216</th>\n",
 719 |               "      <td>28.15</td>\n",
 720 |               "      <td>3.00</td>\n",
 721 |               "      <td>Male</td>\n",
 722 |               "      <td>Yes</td>\n",
 723 |               "      <td>Sat</td>\n",
 724 |               "      <td>Dinner</td>\n",
 725 |               "      <td>5</td>\n",
 726 |               "    </tr>\n",
 727 |               "    <tr>\n",
 728 |               "      <th>217</th>\n",
 729 |               "      <td>11.59</td>\n",
 730 |               "      <td>1.50</td>\n",
 731 |               "      <td>Male</td>\n",
 732 |               "      <td>Yes</td>\n",
 733 |               "      <td>Sat</td>\n",
 734 |               "      <td>Dinner</td>\n",
 735 |               "      <td>2</td>\n",
 736 |               "    </tr>\n",
 737 |               "    <tr>\n",
 738 |               "      <th>218</th>\n",
 739 |               "      <td>7.74</td>\n",
 740 |               "      <td>1.44</td>\n",
 741 |               "      <td>Male</td>\n",
 742 |               "      <td>Yes</td>\n",
 743 |               "      <td>Sat</td>\n",
 744 |               "      <td>Dinner</td>\n",
 745 |               "      <td>2</td>\n",
 746 |               "    </tr>\n",
 747 |               "    <tr>\n",
 748 |               "      <th>219</th>\n",
 749 |               "      <td>30.14</td>\n",
 750 |               "      <td>3.09</td>\n",
 751 |               "      <td>Female</td>\n",
 752 |               "      <td>Yes</td>\n",
 753 |               "      <td>Sat</td>\n",
 754 |               "      <td>Dinner</td>\n",
 755 |               "      <td>4</td>\n",
 756 |               "    </tr>\n",
 757 |               "    <tr>\n",
 758 |               "      <th>220</th>\n",
 759 |               "      <td>12.16</td>\n",
 760 |               "      <td>2.20</td>\n",
 761 |               "      <td>Male</td>\n",
 762 |               "      <td>Yes</td>\n",
 763 |               "      <td>Fri</td>\n",
 764 |               "      <td>Lunch</td>\n",
 765 |               "      <td>2</td>\n",
 766 |               "    </tr>\n",
 767 |               "    <tr>\n",
 768 |               "      <th>221</th>\n",
 769 |               "      <td>13.42</td>\n",
 770 |               "      <td>3.48</td>\n",
 771 |               "      <td>Female</td>\n",
 772 |               "      <td>Yes</td>\n",
 773 |               "      <td>Fri</td>\n",
 774 |               "      <td>Lunch</td>\n",
 775 |               "      <td>2</td>\n",
 776 |               "    </tr>\n",
 777 |               "    <tr>\n",
 778 |               "      <th>222</th>\n",
 779 |               "      <td>8.58</td>\n",
 780 |               "      <td>1.92</td>\n",
 781 |               "      <td>Male</td>\n",
 782 |               "      <td>Yes</td>\n",
 783 |               "      <td>Fri</td>\n",
 784 |               "      <td>Lunch</td>\n",
 785 |               "      <td>1</td>\n",
 786 |               "    </tr>\n",
 787 |               "    <tr>\n",
 788 |               "      <th>223</th>\n",
 789 |               "      <td>15.98</td>\n",
 790 |               "      <td>3.00</td>\n",
 791 |               "      <td>Female</td>\n",
 792 |               "      <td>No</td>\n",
 793 |               "      <td>Fri</td>\n",
 794 |               "      <td>Lunch</td>\n",
 795 |               "      <td>3</td>\n",
 796 |               "    </tr>\n",
 797 |               "    <tr>\n",
 798 |               "      <th>224</th>\n",
 799 |               "      <td>13.42</td>\n",
 800 |               "      <td>1.58</td>\n",
 801 |               "      <td>Male</td>\n",
 802 |               "      <td>Yes</td>\n",
 803 |               "      <td>Fri</td>\n",
 804 |               "      <td>Lunch</td>\n",
 805 |               "      <td>2</td>\n",
 806 |               "    </tr>\n",
 807 |               "    <tr>\n",
 808 |               "      <th>225</th>\n",
 809 |               "      <td>16.27</td>\n",
 810 |               "      <td>2.50</td>\n",
 811 |               "      <td>Female</td>\n",
 812 |               "      <td>Yes</td>\n",
 813 |               "      <td>Fri</td>\n",
 814 |               "      <td>Lunch</td>\n",
 815 |               "      <td>2</td>\n",
 816 |               "    </tr>\n",
 817 |               "    <tr>\n",
 818 |               "      <th>226</th>\n",
 819 |               "      <td>10.09</td>\n",
 820 |               "      <td>2.00</td>\n",
 821 |               "      <td>Female</td>\n",
 822 |               "      <td>Yes</td>\n",
 823 |               "      <td>Fri</td>\n",
 824 |               "      <td>Lunch</td>\n",
 825 |               "      <td>2</td>\n",
 826 |               "    </tr>\n",
 827 |               "    <tr>\n",
 828 |               "      <th>227</th>\n",
 829 |               "      <td>20.45</td>\n",
 830 |               "      <td>3.00</td>\n",
 831 |               "      <td>Male</td>\n",
 832 |               "      <td>No</td>\n",
 833 |               "      <td>Sat</td>\n",
 834 |               "      <td>Dinner</td>\n",
 835 |               "      <td>4</td>\n",
 836 |               "    </tr>\n",
 837 |               "    <tr>\n",
 838 |               "      <th>228</th>\n",
 839 |               "      <td>13.28</td>\n",
 840 |               "      <td>2.72</td>\n",
 841 |               "      <td>Male</td>\n",
 842 |               "      <td>No</td>\n",
 843 |               "      <td>Sat</td>\n",
 844 |               "      <td>Dinner</td>\n",
 845 |               "      <td>2</td>\n",
 846 |               "    </tr>\n",
 847 |               "    <tr>\n",
 848 |               "      <th>229</th>\n",
 849 |               "      <td>22.12</td>\n",
 850 |               "      <td>2.88</td>\n",
 851 |               "      <td>Female</td>\n",
 852 |               "      <td>Yes</td>\n",
 853 |               "      <td>Sat</td>\n",
 854 |               "      <td>Dinner</td>\n",
 855 |               "      <td>2</td>\n",
 856 |               "    </tr>\n",
 857 |               "    <tr>\n",
 858 |               "      <th>230</th>\n",
 859 |               "      <td>24.01</td>\n",
 860 |               "      <td>2.00</td>\n",
 861 |               "      <td>Male</td>\n",
 862 |               "      <td>Yes</td>\n",
 863 |               "      <td>Sat</td>\n",
 864 |               "      <td>Dinner</td>\n",
 865 |               "      <td>4</td>\n",
 866 |               "    </tr>\n",
 867 |               "    <tr>\n",
 868 |               "      <th>231</th>\n",
 869 |               "      <td>15.69</td>\n",
 870 |               "      <td>3.00</td>\n",
 871 |               "      <td>Male</td>\n",
 872 |               "      <td>Yes</td>\n",
 873 |               "      <td>Sat</td>\n",
 874 |               "      <td>Dinner</td>\n",
 875 |               "      <td>3</td>\n",
 876 |               "    </tr>\n",
 877 |               "    <tr>\n",
 878 |               "      <th>232</th>\n",
 879 |               "      <td>11.61</td>\n",
 880 |               "      <td>3.39</td>\n",
 881 |               "      <td>Male</td>\n",
 882 |               "      <td>No</td>\n",
 883 |               "      <td>Sat</td>\n",
 884 |               "      <td>Dinner</td>\n",
 885 |               "      <td>2</td>\n",
 886 |               "    </tr>\n",
 887 |               "    <tr>\n",
 888 |               "      <th>233</th>\n",
 889 |               "      <td>10.77</td>\n",
 890 |               "      <td>1.47</td>\n",
 891 |               "      <td>Male</td>\n",
 892 |               "      <td>No</td>\n",
 893 |               "      <td>Sat</td>\n",
 894 |               "      <td>Dinner</td>\n",
 895 |               "      <td>2</td>\n",
 896 |               "    </tr>\n",
 897 |               "    <tr>\n",
 898 |               "      <th>234</th>\n",
 899 |               "      <td>15.53</td>\n",
 900 |               "      <td>3.00</td>\n",
 901 |               "      <td>Male</td>\n",
 902 |               "      <td>Yes</td>\n",
 903 |               "      <td>Sat</td>\n",
 904 |               "      <td>Dinner</td>\n",
 905 |               "      <td>2</td>\n",
 906 |               "    </tr>\n",
 907 |               "    <tr>\n",
 908 |               "      <th>235</th>\n",
 909 |               "      <td>10.07</td>\n",
 910 |               "      <td>1.25</td>\n",
 911 |               "      <td>Male</td>\n",
 912 |               "      <td>No</td>\n",
 913 |               "      <td>Sat</td>\n",
 914 |               "      <td>Dinner</td>\n",
 915 |               "      <td>2</td>\n",
 916 |               "    </tr>\n",
 917 |               "    <tr>\n",
 918 |               "      <th>236</th>\n",
 919 |               "      <td>12.60</td>\n",
 920 |               "      <td>1.00</td>\n",
 921 |               "      <td>Male</td>\n",
 922 |               "      <td>Yes</td>\n",
 923 |               "      <td>Sat</td>\n",
 924 |               "      <td>Dinner</td>\n",
 925 |               "      <td>2</td>\n",
 926 |               "    </tr>\n",
 927 |               "    <tr>\n",
 928 |               "      <th>237</th>\n",
 929 |               "      <td>32.83</td>\n",
 930 |               "      <td>1.17</td>\n",
 931 |               "      <td>Male</td>\n",
 932 |               "      <td>Yes</td>\n",
 933 |               "      <td>Sat</td>\n",
 934 |               "      <td>Dinner</td>\n",
 935 |               "      <td>2</td>\n",
 936 |               "    </tr>\n",
 937 |               "    <tr>\n",
 938 |               "      <th>238</th>\n",
 939 |               "      <td>35.83</td>\n",
 940 |               "      <td>4.67</td>\n",
 941 |               "      <td>Female</td>\n",
 942 |               "      <td>No</td>\n",
 943 |               "      <td>Sat</td>\n",
 944 |               "      <td>Dinner</td>\n",
 945 |               "      <td>3</td>\n",
 946 |               "    </tr>\n",
 947 |               "    <tr>\n",
 948 |               "      <th>239</th>\n",
 949 |               "      <td>29.03</td>\n",
 950 |               "      <td>5.92</td>\n",
 951 |               "      <td>Male</td>\n",
 952 |               "      <td>No</td>\n",
 953 |               "      <td>Sat</td>\n",
 954 |               "      <td>Dinner</td>\n",
 955 |               "      <td>3</td>\n",
 956 |               "    </tr>\n",
 957 |               "    <tr>\n",
 958 |               "      <th>240</th>\n",
 959 |               "      <td>27.18</td>\n",
 960 |               "      <td>2.00</td>\n",
 961 |               "      <td>Female</td>\n",
 962 |               "      <td>Yes</td>\n",
 963 |               "      <td>Sat</td>\n",
 964 |               "      <td>Dinner</td>\n",
 965 |               "      <td>2</td>\n",
 966 |               "    </tr>\n",
 967 |               "    <tr>\n",
 968 |               "      <th>241</th>\n",
 969 |               "      <td>22.67</td>\n",
 970 |               "      <td>2.00</td>\n",
 971 |               "      <td>Male</td>\n",
 972 |               "      <td>Yes</td>\n",
 973 |               "      <td>Sat</td>\n",
 974 |               "      <td>Dinner</td>\n",
 975 |               "      <td>2</td>\n",
 976 |               "    </tr>\n",
 977 |               "    <tr>\n",
 978 |               "      <th>242</th>\n",
 979 |               "      <td>17.82</td>\n",
 980 |               "      <td>1.75</td>\n",
 981 |               "      <td>Male</td>\n",
 982 |               "      <td>No</td>\n",
 983 |               "      <td>Sat</td>\n",
 984 |               "      <td>Dinner</td>\n",
 985 |               "      <td>2</td>\n",
 986 |               "    </tr>\n",
 987 |               "    <tr>\n",
 988 |               "      <th>243</th>\n",
 989 |               "      <td>18.78</td>\n",
 990 |               "      <td>3.00</td>\n",
 991 |               "      <td>Female</td>\n",
 992 |               "      <td>No</td>\n",
 993 |               "      <td>Thur</td>\n",
 994 |               "      <td>Dinner</td>\n",
 995 |               "      <td>2</td>\n",
 996 |               "    </tr>\n",
 997 |               "  </tbody>\n",
 998 |               "</table>\n",
 999 |               "<p>244 rows × 7 columns</p>\n",
1000 |               "</div>"
1001 |             ]
1002 |           },
1003 |           "metadata": {}
1004 |         }
1005 |       ],
1006 |       "execution_count": 13,
1007 |       "metadata": {}
1008 |     },
1009 |     {
1010 |       "cell_type": "code",
1011 |       "source": [
1012 |         "# dummy encoding\n",
1013 |         "# one-hot encoding"
1014 |       ],
1015 |       "outputs": [],
1016 |       "execution_count": 14,
1017 |       "metadata": {}
1018 |     },
1019 |     {
1020 |       "cell_type": "code",
1021 |       "source": [
1022 |         "lr.fit(X=tips[['total_bill', 'sex']], y=tips['tip'])"
1023 |       ],
1024 |       "outputs": [
1025 |         {
1026 |           "output_type": "error",
1027 |           "ename": "ValueError",
1028 |           "evalue": "could not convert string to float: 'Female'",
1029 |           "traceback": [
1030 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1031 |             "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
1032 |             "\u001b[0;32m<ipython-input-15-e50af409ffe4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_bill'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sex'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tip'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1033 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    456\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    457\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 458\u001b[0;31m                          y_numeric=True, multi_output=True)\n\u001b[0m\u001b[1;32m    459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    460\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1034 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    754\u001b[0m                     \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    755\u001b[0m                     \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 756\u001b[0;31m                     estimator=estimator)\n\u001b[0m\u001b[1;32m    757\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    758\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
1035 |             "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    565\u001b[0m         \u001b[0;31m# make sure we actually converted to numeric:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    566\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdtype_numeric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"O\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 567\u001b[0;31m             \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    568\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mallow_nd\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    569\u001b[0m             raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n",
1036 |             "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Female'"
1037 |           ]
1038 |         }
1039 |       ],
1040 |       "execution_count": 15,
1041 |       "metadata": {}
1042 |     },
1043 |     {
1044 |       "cell_type": "code",
1045 |       "source": [
1046 |         "tips_dummy = tips[['tip', 'total_bill', 'sex']]"
1047 |       ],
1048 |       "outputs": [],
1049 |       "execution_count": 16,
1050 |       "metadata": {}
1051 |     },
1052 |     {
1053 |       "cell_type": "code",
1054 |       "source": [
1055 |         "tips_dummy.head()"
1056 |       ],
1057 |       "outputs": [
1058 |         {
1059 |           "output_type": "execute_result",
1060 |           "execution_count": 17,
1061 |           "data": {
1062 |             "text/plain": [
1063 |               "    tip  total_bill     sex\n",
1064 |               "0  1.01       16.99  Female\n",
1065 |               "1  1.66       10.34    Male\n",
1066 |               "2  3.50       21.01    Male\n",
1067 |               "3  3.31       23.68    Male\n",
1068 |               "4  3.61       24.59  Female"
1069 |             ],
1070 |             "text/html": [
1071 |               "<div>\n",
1072 |               "<style scoped>\n",
1073 |               "    .dataframe tbody tr th:only-of-type {\n",
1074 |               "        vertical-align: middle;\n",
1075 |               "    }\n",
1076 |               "\n",
1077 |               "    .dataframe tbody tr th {\n",
1078 |               "        vertical-align: top;\n",
1079 |               "    }\n",
1080 |               "\n",
1081 |               "    .dataframe thead th {\n",
1082 |               "        text-align: right;\n",
1083 |               "    }\n",
1084 |               "</style>\n",
1085 |               "<table border=\"1\" class=\"dataframe\">\n",
1086 |               "  <thead>\n",
1087 |               "    <tr style=\"text-align: right;\">\n",
1088 |               "      <th></th>\n",
1089 |               "      <th>tip</th>\n",
1090 |               "      <th>total_bill</th>\n",
1091 |               "      <th>sex</th>\n",
1092 |               "    </tr>\n",
1093 |               "  </thead>\n",
1094 |               "  <tbody>\n",
1095 |               "    <tr>\n",
1096 |               "      <th>0</th>\n",
1097 |               "      <td>1.01</td>\n",
1098 |               "      <td>16.99</td>\n",
1099 |               "      <td>Female</td>\n",
1100 |               "    </tr>\n",
1101 |               "    <tr>\n",
1102 |               "      <th>1</th>\n",
1103 |               "      <td>1.66</td>\n",
1104 |               "      <td>10.34</td>\n",
1105 |               "      <td>Male</td>\n",
1106 |               "    </tr>\n",
1107 |               "    <tr>\n",
1108 |               "      <th>2</th>\n",
1109 |               "      <td>3.50</td>\n",
1110 |               "      <td>21.01</td>\n",
1111 |               "      <td>Male</td>\n",
1112 |               "    </tr>\n",
1113 |               "    <tr>\n",
1114 |               "      <th>3</th>\n",
1115 |               "      <td>3.31</td>\n",
1116 |               "      <td>23.68</td>\n",
1117 |               "      <td>Male</td>\n",
1118 |               "    </tr>\n",
1119 |               "    <tr>\n",
1120 |               "      <th>4</th>\n",
1121 |               "      <td>3.61</td>\n",
1122 |               "      <td>24.59</td>\n",
1123 |               "      <td>Female</td>\n",
1124 |               "    </tr>\n",
1125 |               "  </tbody>\n",
1126 |               "</table>\n",
1127 |               "</div>"
1128 |             ]
1129 |           },
1130 |           "metadata": {}
1131 |         }
1132 |       ],
1133 |       "execution_count": 17,
1134 |       "metadata": {}
1135 |     },
1136 |     {
1137 |       "cell_type": "code",
1138 |       "source": [
1139 |         "tips_dummy = pd.get_dummies(tips_dummy, drop_first=True)"
1140 |       ],
1141 |       "outputs": [],
1142 |       "execution_count": 18,
1143 |       "metadata": {}
1144 |     },
1145 |     {
1146 |       "cell_type": "code",
1147 |       "source": [
1148 |         "tips_dummy.head()"
1149 |       ],
1150 |       "outputs": [
1151 |         {
1152 |           "output_type": "execute_result",
1153 |           "execution_count": 19,
1154 |           "data": {
1155 |             "text/plain": [
1156 |               "    tip  total_bill  sex_Female\n",
1157 |               "0  1.01       16.99           1\n",
1158 |               "1  1.66       10.34           0\n",
1159 |               "2  3.50       21.01           0\n",
1160 |               "3  3.31       23.68           0\n",
1161 |               "4  3.61       24.59           1"
1162 |             ],
1163 |             "text/html": [
1164 |               "<div>\n",
1165 |               "<style scoped>\n",
1166 |               "    .dataframe tbody tr th:only-of-type {\n",
1167 |               "        vertical-align: middle;\n",
1168 |               "    }\n",
1169 |               "\n",
1170 |               "    .dataframe tbody tr th {\n",
1171 |               "        vertical-align: top;\n",
1172 |               "    }\n",
1173 |               "\n",
1174 |               "    .dataframe thead th {\n",
1175 |               "        text-align: right;\n",
1176 |               "    }\n",
1177 |               "</style>\n",
1178 |               "<table border=\"1\" class=\"dataframe\">\n",
1179 |               "  <thead>\n",
1180 |               "    <tr style=\"text-align: right;\">\n",
1181 |               "      <th></th>\n",
1182 |               "      <th>tip</th>\n",
1183 |               "      <th>total_bill</th>\n",
1184 |               "      <th>sex_Female</th>\n",
1185 |               "    </tr>\n",
1186 |               "  </thead>\n",
1187 |               "  <tbody>\n",
1188 |               "    <tr>\n",
1189 |               "      <th>0</th>\n",
1190 |               "      <td>1.01</td>\n",
1191 |               "      <td>16.99</td>\n",
1192 |               "      <td>1</td>\n",
1193 |               "    </tr>\n",
1194 |               "    <tr>\n",
1195 |               "      <th>1</th>\n",
1196 |               "      <td>1.66</td>\n",
1197 |               "      <td>10.34</td>\n",
1198 |               "      <td>0</td>\n",
1199 |               "    </tr>\n",
1200 |               "    <tr>\n",
1201 |               "      <th>2</th>\n",
1202 |               "      <td>3.50</td>\n",
1203 |               "      <td>21.01</td>\n",
1204 |               "      <td>0</td>\n",
1205 |               "    </tr>\n",
1206 |               "    <tr>\n",
1207 |               "      <th>3</th>\n",
1208 |               "      <td>3.31</td>\n",
1209 |               "      <td>23.68</td>\n",
1210 |               "      <td>0</td>\n",
1211 |               "    </tr>\n",
1212 |               "    <tr>\n",
1213 |               "      <th>4</th>\n",
1214 |               "      <td>3.61</td>\n",
1215 |               "      <td>24.59</td>\n",
1216 |               "      <td>1</td>\n",
1217 |               "    </tr>\n",
1218 |               "  </tbody>\n",
1219 |               "</table>\n",
1220 |               "</div>"
1221 |             ]
1222 |           },
1223 |           "metadata": {}
1224 |         }
1225 |       ],
1226 |       "execution_count": 19,
1227 |       "metadata": {}
1228 |     },
1229 |     {
1230 |       "cell_type": "code",
1231 |       "source": [
1232 |         "lr.fit(X=tips_dummy.iloc[:, 1:], y=tips_dummy.iloc[:, 0])"
1233 |       ],
1234 |       "outputs": [
1235 |         {
1236 |           "output_type": "execute_result",
1237 |           "execution_count": 20,
1238 |           "data": {
1239 |             "text/plain": [
1240 |               "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
1241 |               "         normalize=False)"
1242 |             ]
1243 |           },
1244 |           "metadata": {}
1245 |         }
1246 |       ],
1247 |       "execution_count": 20,
1248 |       "metadata": {}
1249 |     },
1250 |     {
1251 |       "cell_type": "code",
1252 |       "source": [
1253 |         "tip_money = lr.coef_[0]"
1254 |       ],
1255 |       "outputs": [],
1256 |       "execution_count": 21,
1257 |       "metadata": {}
1258 |     },
1259 |     {
1260 |       "cell_type": "code",
1261 |       "source": [
1262 |         "tip_money"
1263 |       ],
1264 |       "outputs": [
1265 |         {
1266 |           "output_type": "execute_result",
1267 |           "execution_count": 22,
1268 |           "data": {
1269 |             "text/plain": [
1270 |               "0.10523235686615456"
1271 |             ]
1272 |           },
1273 |           "metadata": {}
1274 |         }
1275 |       ],
1276 |       "execution_count": 22,
1277 |       "metadata": {}
1278 |     },
1279 |     {
1280 |       "cell_type": "code",
1281 |       "source": [],
1282 |       "outputs": [],
1283 |       "execution_count": 24,
1284 |       "metadata": {}
1285 |     }
1286 |   ],
1287 |   "metadata": {
1288 |     "kernelspec": {
1289 |       "name": "python3",
1290 |       "language": "python",
1291 |       "display_name": "Python 3"
1292 |     },
1293 |     "language_info": {
1294 |       "name": "python",
1295 |       "version": "3.7.3",
1296 |       "mimetype": "text/x-python",
1297 |       "codemirror_mode": {
1298 |         "name": "ipython",
1299 |         "version": 3
1300 |       },
1301 |       "pygments_lexer": "ipython3",
1302 |       "nbconvert_exporter": "python",
1303 |       "file_extension": ".py"
1304 |     },
1305 |     "kernel_info": {
1306 |       "name": "python3"
1307 |     },
1308 |     "nteract": {
1309 |       "version": "0.14.3"
1310 |     }
1311 |   },
1312 |   "nbformat": 4,
1313 |   "nbformat_minor": 2
1314 | }


--------------------------------------------------------------------------------
/test_installation.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 | import sklearn as sk
4 | 


--------------------------------------------------------------------------------