├── .gitignore
├── LICENSE
├── README.md
├── data.zip
├── data
├── billboard.csv
├── country_timeseries.csv
├── gapminder.tsv
├── pew.csv
├── table1.csv
├── table2.csv
├── table3.csv
├── table4a.csv
├── table4b.csv
└── weather.csv
├── exercises
└── exercises.ipynb
├── notebooks
├── .gitkeep
├── 01-intro.ipynb
├── 02-tidy.ipynb
├── 03-apply.ipynb
├── 04-plots.ipynb
└── 05-model.ipynb
├── notes
├── 01-intro.ipynb
├── 02-tidy.ipynb
├── 03-apply.ipynb
├── 04-plots.ipynb
└── 05-models.ipynb
└── test_installation.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Daniel Chen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # scipy-2019-pandas
2 | Pandas tutorial for SciPy 2019
3 |
4 |
5 | # Installation
6 |
7 | 1. Install anaconda (use the Python 3 version): https://www.anaconda.com/distribution/
8 | 2. See the Software-Carpentry Installations for `bash`, `git`, `python`, and `text editor`: https://carpentries.github.io/workshop-template/
9 |
10 | # Testing your installation
11 |
12 | 1. Run the `test_installation.py` script (or copy/paste the import statments into a python interpreter)
13 |
14 | ## How to run the Jupyter Notebook
15 |
16 | #### Windows/Mac
17 |
18 | There will be an [Anaconda Navigator](https://docs.continuum.io/anaconda/navigator/) application that installs to your system.
19 | You can launch the Jupyter notebook from there to run your python code.
20 |
21 | #### Linux
22 |
23 | Anaconda's Python installation should be your system's default python.
24 | Make sure you open a new terminal window for this to take effect.
25 | You can launch python by typing `jupyter notebook`
26 |
27 | ## Creating a Notebook
28 |
29 | Once you have the Jupyter notebook launched, there's a button towards the top right called `new`.
30 | Click this and select `Python 3`.
31 |
32 | # Get Data
33 |
34 | 1. Download or Clone the this repository.
35 | - Press the green button towards the top right
36 | - click download zip
37 | - extract
38 | - celebrate
39 |
--------------------------------------------------------------------------------
/data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chendaniely/scipy-2019-pandas/7e515b7561fd5076ee462dd75af9488beadf6148/data.zip
--------------------------------------------------------------------------------
/data/country_timeseries.csv:
--------------------------------------------------------------------------------
1 | Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali
2 | 1/5/2015,289,2776,,10030,,,,,,1786,,2977,,,,,
3 | 1/4/2015,288,2775,,9780,,,,,,1781,,2943,,,,,
4 | 1/3/2015,287,2769,8166,9722,,,,,,1767,3496,2915,,,,,
5 | 1/2/2015,286,,8157,,,,,,,,3496,,,,,,
6 | 12/31/2014,284,2730,8115,9633,,,,,,1739,3471,2827,,,,,
7 | 12/28/2014,281,2706,8018,9446,,,,,,1708,3423,2758,,,,,
8 | 12/27/2014,280,2695,,9409,,,,,,1697,,2732,,,,,
9 | 12/24/2014,277,2630,7977,9203,,,,,,,3413,2655,,,,,
10 | 12/21/2014,273,2597,,9004,,,,,,1607,,2582,,,,,
11 | 12/20/2014,272,2571,7862,8939,,,,,,1586,3384,2556,,,,,
12 | 12/18/2014,271,,7830,,,,,,,,3376,,,,,,
13 | 12/14/2014,267,2416,,8356,,,,,,1525,,2085,,,,,
14 | 12/9/2014,262,,7797,,,,,,,,3290,,,,,,
15 | 12/7/2014,260,2292,,7897,20,1,4,1,7,1428,,1768,8,0,1,0,6
16 | 12/3/2014,256,,7719,,,,,,,,3177,,,,,,
17 | 11/30/2014,253,2164,,7312,20,1,4,1,7,1327,,1583,8,0,1,0,6
18 | 11/28/2014,251,,7635,,,,,,,,3145,,,,,,
19 | 11/23/2014,246,2134,,6599,20,1,4,1,7,1260,,1398,8,0,1,0,6
20 | 11/22/2014,245,,7168,,,,,,,,3016,,,,,,
21 | 11/18/2014,241,2047,7082,6190,20,1,4,1,6,1214,2963,1267,8,0,1,0,6
22 | 11/16/2014,239,1971,,6073,20,1,4,1,5,1192,,1250,8,0,1,0,5
23 | 11/15/2014,238,,7069,,,,,,,,2964,,,,,,
24 | 11/11/2014,234,1919,,5586,20,1,4,1,4,1166,,1187,8,0,1,0,3
25 | 11/10/2014,233,,6878,,,,,,,,2812,,,,,,
26 | 11/9/2014,232,1878,,5368,20,1,4,1,1,1142,,1169,8,0,1,0,1
27 | 11/8/2014,231,,6822,,,,,,,,2836,,,,,,
28 | 11/4/2014,227,,6619,4862,20,1,4,1,1,,2766,1130,8,0,1,0,1
29 | 11/3/2014,226,1760,,,,,,,,1054,,,,,,,
30 | 11/2/2014,225,1731,,4759,20,1,4,1,1,1041,,1070,8,0,1,0,1
31 | 10/31/2014,222,,6525,,,,,,,,2697,,,,,,
32 | 10/29/2014,220,1667,,5338,20,1,4,1,1,1018,,1510,8,0,1,0,1
33 | 10/27/2014,218,1906,,5235,20,1,4,1,1,997,,1500,8,0,1,0,1
34 | 10/25/2014,216,,6535,,,,,,,,2413,,,,,,
35 | 10/22/2014,214,,,3896,,,4,1,1,,,1281,,,1,0,1
36 | 10/21/2014,213,1553,,,,,,,,926,,,,,,,
37 | 10/19/2014,211,1540,,3706,20,1,3,1,,904,,1259,8,0,1,0,
38 | 10/18/2014,210,,4665,,,,,,,,2705,,,,,,
39 | 10/14/2014,206,1519,,3410,20,1,3,1,,862,,1200,8,0,0,1,
40 | 10/13/2014,205,,4262,,,,,,,,2484,,,,,,
41 | 10/12/2014,204,1472,,3252,20,1,2,1,,843,,1183,8,0,1,1,
42 | 10/11/2014,203,,4249,,,,,,,,2458,,,,,,
43 | 10/8/2014,200,,,2950,20,1,1,1,,,,930,8,0,1,1,
44 | 10/7/2014,199,1350,4076,,,,,,,778,2316,,,,,,
45 | 10/5/2014,197,1298,,2789,20,1,1,,,768,,879,8,0,0,,
46 | 10/4/2014,196,,3924,,,,,,,,2210,,,,,,
47 | 10/1/2014,193,1199,3834,2437,20,1,1,,,739,2069,623,8,0,0,,
48 | 9/28/2014,190,1157,3696,2304,20,1,,,,710,1998,622,8,0,,,
49 | 9/23/2014,185,1074,3458,2021,20,1,,,,648,1830,605,8,0,,,
50 | 9/21/2014,183,1022,3280,1940,20,1,,,,635,1677,597,8,0,,,
51 | 9/20/2014,182,,,1813,,,,,,,,593,,,,,
52 | 9/19/2014,181,1008,,,,,,,,632,,,,,,,
53 | 9/17/2014,179,,3022,,,,,,,,1578,,,,,,
54 | 9/14/2014,176,942,2710,1673,,,,,,601,1459,562,,,,,
55 | 9/13/2014,175,936,,1620,21,1,,,,595,1296,562,8,0,,,
56 | 9/10/2014,172,899,,1478,21,1,,,,568,,536,8,,,,
57 | 9/9/2014,171,,2407,,,,,,,,,,,,,,
58 | 9/7/2014,169,861,2081,1424,21,3,,,,557,1137,524,8,0,,,
59 | 9/5/2014,167,812,1871,1261,22,1,,,,517,1089,491,8,,,,
60 | 8/31/2014,162,771,1698,1216,21,1,,,,494,871,476,7,,,,
61 | 8/26/2014,157,648,1378,1026,17,,,,,430,694,422,6,,,,
62 | 8/20/2014,151,607,1082,910,16,,,,,406,624,392,5,,,,
63 | 8/18/2014,149,579,972,907,15,,,,,396,576,374,4,,,,
64 | 8/16/2014,147,543,834,848,15,,,,,394,466,365,4,,,,
65 | 8/13/2014,144,519,786,810,12,,,,,380,413,348,4,,,,
66 | 8/11/2014,142,510,670,783,12,,,,,377,355,334,3,,,,
67 | 8/9/2014,140,506,599,730,13,,,,,373,323,315,2,,,,
68 | 8/6/2014,137,495,554,717,13,,,,,367,294,298,2,,,,
69 | 8/4/2014,135,495,516,691,9,,,,,363,282,286,1,,,,
70 | 8/1/2014,132,485,468,646,4,,,,,358,255,273,1,,,,
71 | 7/30/2014,129,472,391,574,3,,,,,346,227,252,1,,,,
72 | 7/27/2014,126,460,329,533,1,,,,,339,156,233,1,,,,
73 | 7/23/2014,123,427,249,525,0,,,,,319,129,224,0,,,,
74 | 7/20/2014,120,415,224,454,,,,,,314,127,219,,,,,
75 | 7/17/2014,117,410,196,442,,,,,,310,116,206,,,,,
76 | 7/14/2014,114,411,174,397,,,,,,310,106,197,,,,,
77 | 7/12/2014,112,406,172,386,,,,,,304,105,194,,,,,
78 | 7/8/2014,108,409,142,337,,,,,,309,88,142,,,,,
79 | 7/6/2014,106,408,131,305,,,,,,307,84,127,,,,,
80 | 7/2/2014,102,412,115,252,,,,,,305,75,101,,,,,
81 | 6/30/2014,100,413,107,239,,,,,,303,65,99,,,,,
82 | 6/22/2014,92,,51,,,,,,,,34,,,,,,
83 | 6/20/2014,90,390,,158,,,,,,270,,34,,,,,
84 | 6/19/2014,89,,41,,,,,,,,25,,,,,,
85 | 6/18/2014,88,390,,136,,,,,,267,,28,,,,,
86 | 6/17/2014,87,,,97,,,,,,,,49,,,,,
87 | 6/16/2014,86,398,33,,,,,,,264,24,,,,,,
88 | 6/10/2014,80,351,13,89,,,,,,226,24,7,,,,,
89 | 6/5/2014,75,,13,81,,,,,,,,6,,,,,
90 | 6/3/2014,73,344,13,,,,,,,215,12,6,,,,,
91 | 6/1/2014,71,328,13,79,,,,,,208,12,6,,,,,
92 | 5/28/2014,67,291,13,50,,,,,,193,12,6,,,,,
93 | 5/27/2014,66,281,12,16,,,,,,186,11,5,,,,,
94 | 5/23/2014,62,258,12,0,,,,,,174,11,0,,,,,
95 | 5/12/2014,51,248,12,0,,,,,,171,11,0,,,,,
96 | 5/10/2014,49,233,12,0,,,,,,157,11,0,,,,,
97 | 5/7/2014,46,236,13,0,,,,,,158,11,0,,,,,
98 | 5/5/2014,44,235,13,0,,,,,,157,11,0,,,,,
99 | 5/3/2014,42,231,13,0,,,,,,155,11,0,,,,,
100 | 5/1/2014,40,226,13,0,,,,,,149,11,0,,,,,
101 | 4/26/2014,35,224,,0,,,,,,143,,0,,,,,
102 | 4/24/2014,33,,35,0,,,,,,,,0,,,,,
103 | 4/23/2014,32,218,,0,,,,,,141,,0,,,,,
104 | 4/22/2014,31,,,0,,,,,,,,0,,,,,
105 | 4/21/2014,30,,34,,,,,,,,11,,,,,,
106 | 4/20/2014,29,208,,,,,,,,136,6,,,,,,
107 | 4/17/2014,26,203,27,,,,,,,129,,,,,,,
108 | 4/16/2014,25,197,27,,,,,,,122,13,,,,,,
109 | 4/15/2014,24,,,12,,,,,,,,,,,,,
110 | 4/14/2014,23,168,,,,,,,,108,,,,,,,
111 | 4/11/2014,20,159,26,2,,,,,,106,13,2,,,,,
112 | 4/9/2014,18,158,25,2,,,,,,101,12,2,,,,,
113 | 4/7/2014,16,151,21,2,,,,,,95,10,2,,,,,
114 | 4/4/2014,13,143,18,2,,,,,,86,7,2,,,,,
115 | 4/1/2014,10,127,8,2,,,,,,83,5,2,,,,,
116 | 3/31/2014,9,122,8,2,,,,,,80,4,2,,,,,
117 | 3/29/2014,7,112,7,,,,,,,70,2,,,,,,
118 | 3/28/2014,6,112,3,2,,,,,,70,3,2,,,,,
119 | 3/27/2014,5,103,8,6,,,,,,66,6,5,,,,,
120 | 3/26/2014,4,86,,,,,,,,62,,,,,,,
121 | 3/25/2014,3,86,,,,,,,,60,,,,,,,
122 | 3/24/2014,2,86,,,,,,,,59,,,,,,,
123 | 3/22/2014,0,49,,,,,,,,29,,,,,,,
--------------------------------------------------------------------------------
/data/pew.csv:
--------------------------------------------------------------------------------
1 | "religion","<$10k","$10-20k","$20-30k","$30-40k","$40-50k","$50-75k","$75-100k","$100-150k",">150k","Don't know/refused"
2 | "Agnostic",27,34,60,81,76,137,122,109,84,96
3 | "Atheist",12,27,37,52,35,70,73,59,74,76
4 | "Buddhist",27,21,30,34,33,58,62,39,53,54
5 | "Catholic",418,617,732,670,638,1116,949,792,633,1489
6 | "Don’t know/refused",15,14,15,11,10,35,21,17,18,116
7 | "Evangelical Prot",575,869,1064,982,881,1486,949,723,414,1529
8 | "Hindu",1,9,7,9,11,34,47,48,54,37
9 | "Historically Black Prot",228,244,236,238,197,223,131,81,78,339
10 | "Jehovah's Witness",20,27,24,24,21,30,15,11,6,37
11 | "Jewish",19,19,25,25,30,95,69,87,151,162
12 | "Mainline Prot",289,495,619,655,651,1107,939,753,634,1328
13 | "Mormon",29,40,48,51,56,112,85,49,42,69
14 | "Muslim",6,7,9,10,9,23,16,8,6,22
15 | "Orthodox",13,17,23,32,32,47,38,42,46,73
16 | "Other Christian",9,7,11,13,13,14,18,14,12,18
17 | "Other Faiths",20,33,40,46,49,63,46,40,41,71
18 | "Other World Religions",5,2,3,4,2,7,3,4,4,8
19 | "Unaffiliated",217,299,374,365,341,528,407,321,258,597
20 |
--------------------------------------------------------------------------------
/data/table1.csv:
--------------------------------------------------------------------------------
1 | "country","year","cases","population"
2 | "Afghanistan",1999,745,19987071
3 | "Afghanistan",2000,2666,20595360
4 | "Brazil",1999,37737,172006362
5 | "Brazil",2000,80488,174504898
6 | "China",1999,212258,1272915272
7 | "China",2000,213766,1280428583
8 |
--------------------------------------------------------------------------------
/data/table2.csv:
--------------------------------------------------------------------------------
1 | "country","year","type","count"
2 | "Afghanistan",1999,"cases",745
3 | "Afghanistan",1999,"population",19987071
4 | "Afghanistan",2000,"cases",2666
5 | "Afghanistan",2000,"population",20595360
6 | "Brazil",1999,"cases",37737
7 | "Brazil",1999,"population",172006362
8 | "Brazil",2000,"cases",80488
9 | "Brazil",2000,"population",174504898
10 | "China",1999,"cases",212258
11 | "China",1999,"population",1272915272
12 | "China",2000,"cases",213766
13 | "China",2000,"population",1280428583
14 |
--------------------------------------------------------------------------------
/data/table3.csv:
--------------------------------------------------------------------------------
1 | "country","year","rate"
2 | "Afghanistan",1999,"745/19987071"
3 | "Afghanistan",2000,"2666/20595360"
4 | "Brazil",1999,"37737/172006362"
5 | "Brazil",2000,"80488/174504898"
6 | "China",1999,"212258/1272915272"
7 | "China",2000,"213766/1280428583"
8 |
--------------------------------------------------------------------------------
/data/table4a.csv:
--------------------------------------------------------------------------------
1 | "country","1999","2000"
2 | "Afghanistan",745,2666
3 | "Brazil",37737,80488
4 | "China",212258,213766
5 |
--------------------------------------------------------------------------------
/data/table4b.csv:
--------------------------------------------------------------------------------
1 | "country","1999","2000"
2 | "Afghanistan",19987071,20595360
3 | "Brazil",172006362,174504898
4 | "China",1272915272,1280428583
5 |
--------------------------------------------------------------------------------
/data/weather.csv:
--------------------------------------------------------------------------------
1 | "id","year","month","element","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31"
2 | "MX17004",2010,1,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,27.8,NA
3 | "MX17004",2010,1,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,14.5,NA
4 | "MX17004",2010,2,"tmax",NA,27.3,24.1,NA,NA,NA,NA,NA,NA,NA,29.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA
5 | "MX17004",2010,2,"tmin",NA,14.4,14.4,NA,NA,NA,NA,NA,NA,NA,13.4,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,10.7,NA,NA,NA,NA,NA,NA,NA,NA
6 | "MX17004",2010,3,"tmax",NA,NA,NA,NA,32.1,NA,NA,NA,NA,34.5,NA,NA,NA,NA,NA,31.1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
7 | "MX17004",2010,3,"tmin",NA,NA,NA,NA,14.2,NA,NA,NA,NA,16.8,NA,NA,NA,NA,NA,17.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
8 | "MX17004",2010,4,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,36.3,NA,NA,NA,NA
9 | "MX17004",2010,4,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.7,NA,NA,NA,NA
10 | "MX17004",2010,5,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,33.2,NA,NA,NA,NA
11 | "MX17004",2010,5,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18.2,NA,NA,NA,NA
12 | "MX17004",2010,6,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,30.1,NA,NA
13 | "MX17004",2010,6,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18,NA,NA
14 | "MX17004",2010,7,"tmax",NA,NA,28.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
15 | "MX17004",2010,7,"tmin",NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
16 | "MX17004",2010,8,"tmax",NA,NA,NA,NA,29.6,NA,NA,29,NA,NA,NA,NA,29.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,26.4,NA,29.7,NA,NA,NA,28,NA,25.4
17 | "MX17004",2010,8,"tmin",NA,NA,NA,NA,15.8,NA,NA,17.3,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,15.6,NA,NA,NA,15.3,NA,15.4
18 | "MX17004",2010,10,"tmax",NA,NA,NA,NA,27,NA,28.1,NA,NA,NA,NA,NA,NA,29.5,28.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,31.2,NA,NA,NA
19 | "MX17004",2010,10,"tmin",NA,NA,NA,NA,14,NA,12.9,NA,NA,NA,NA,NA,NA,13,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,NA,NA
20 | "MX17004",2010,11,"tmax",NA,31.3,NA,27.2,26.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28.1,27.7,NA,NA,NA,NA
21 | "MX17004",2010,11,"tmin",NA,16.3,NA,12,7.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,12.1,14.2,NA,NA,NA,NA
22 | "MX17004",2010,12,"tmax",29.9,NA,NA,NA,NA,27.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
23 | "MX17004",2010,12,"tmin",13.8,NA,NA,NA,NA,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
24 |
--------------------------------------------------------------------------------
/notebooks/.gitkeep:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/notebooks/03-apply.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "def my_function(x, y):\n",
10 | " pass"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 4,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "def my_sq(x):\n",
20 | " return x ** 2"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 5,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "4"
32 | ]
33 | },
34 | "execution_count": 5,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "my_sq(2)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 6,
46 | "metadata": {},
47 | "outputs": [
48 | {
49 | "data": {
50 | "text/plain": [
51 | "16"
52 | ]
53 | },
54 | "execution_count": 6,
55 | "metadata": {},
56 | "output_type": "execute_result"
57 | }
58 | ],
59 | "source": [
60 | "my_sq(4)"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 11,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "assert my_sq(4) == 16"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 12,
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "def avg_2(x, y):\n",
79 | " return (x + y) / 2"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 13,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "data": {
89 | "text/plain": [
90 | "15.0"
91 | ]
92 | },
93 | "execution_count": 13,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "avg_2(10, 20)"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 18,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "import pandas as pd"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 19,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "df = pd.DataFrame({\n",
118 | " 'a': [10, 20, 30],\n",
119 | " 'b': [20, 30, 40]\n",
120 | "})"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 20,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/html": [
131 | "
\n",
132 | "\n",
145 | "
\n",
146 | " \n",
147 | " \n",
148 | " | \n",
149 | " a | \n",
150 | " b | \n",
151 | "
\n",
152 | " \n",
153 | " \n",
154 | " \n",
155 | " 0 | \n",
156 | " 10 | \n",
157 | " 20 | \n",
158 | "
\n",
159 | " \n",
160 | " 1 | \n",
161 | " 20 | \n",
162 | " 30 | \n",
163 | "
\n",
164 | " \n",
165 | " 2 | \n",
166 | " 30 | \n",
167 | " 40 | \n",
168 | "
\n",
169 | " \n",
170 | "
\n",
171 | "
"
172 | ],
173 | "text/plain": [
174 | " a b\n",
175 | "0 10 20\n",
176 | "1 20 30\n",
177 | "2 30 40"
178 | ]
179 | },
180 | "execution_count": 20,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "df"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 22,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "0 100\n",
198 | "1 400\n",
199 | "2 900\n",
200 | "Name: a, dtype: int64"
201 | ]
202 | },
203 | "execution_count": 22,
204 | "metadata": {},
205 | "output_type": "execute_result"
206 | }
207 | ],
208 | "source": [
209 | "df['a'] ** 2"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 23,
215 | "metadata": {},
216 | "outputs": [
217 | {
218 | "data": {
219 | "text/plain": [
220 | ""
221 | ]
222 | },
223 | "execution_count": 23,
224 | "metadata": {},
225 | "output_type": "execute_result"
226 | }
227 | ],
228 | "source": [
229 | "my_sq"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 24,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "data": {
239 | "text/plain": [
240 | "0 100\n",
241 | "1 400\n",
242 | "2 900\n",
243 | "Name: a, dtype: int64"
244 | ]
245 | },
246 | "execution_count": 24,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "df['a'].apply(my_sq)"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 25,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "def my_exp(x, e):\n",
262 | " return x ** e"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 26,
268 | "metadata": {},
269 | "outputs": [
270 | {
271 | "data": {
272 | "text/plain": [
273 | "1024"
274 | ]
275 | },
276 | "execution_count": 26,
277 | "metadata": {},
278 | "output_type": "execute_result"
279 | }
280 | ],
281 | "source": [
282 | "my_exp(2, 10)"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 27,
288 | "metadata": {},
289 | "outputs": [
290 | {
291 | "data": {
292 | "text/plain": [
293 | "0 10000\n",
294 | "1 160000\n",
295 | "2 810000\n",
296 | "Name: a, dtype: int64"
297 | ]
298 | },
299 | "execution_count": 27,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "df['a'].apply(my_exp, e=4)"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 28,
311 | "metadata": {},
312 | "outputs": [],
313 | "source": [
314 | "def print_me(x):\n",
315 | " print(x)"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": 29,
321 | "metadata": {},
322 | "outputs": [
323 | {
324 | "name": "stdout",
325 | "output_type": "stream",
326 | "text": [
327 | "0 10\n",
328 | "1 20\n",
329 | "2 30\n",
330 | "Name: a, dtype: int64\n",
331 | "0 20\n",
332 | "1 30\n",
333 | "2 40\n",
334 | "Name: b, dtype: int64\n"
335 | ]
336 | },
337 | {
338 | "data": {
339 | "text/plain": [
340 | "a None\n",
341 | "b None\n",
342 | "dtype: object"
343 | ]
344 | },
345 | "execution_count": 29,
346 | "metadata": {},
347 | "output_type": "execute_result"
348 | }
349 | ],
350 | "source": [
351 | "df.apply(print_me)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 32,
357 | "metadata": {},
358 | "outputs": [
359 | {
360 | "data": {
361 | "text/html": [
362 | "\n",
363 | "\n",
376 | "
\n",
377 | " \n",
378 | " \n",
379 | " | \n",
380 | " a | \n",
381 | " b | \n",
382 | "
\n",
383 | " \n",
384 | " \n",
385 | " \n",
386 | " 0 | \n",
387 | " 10 | \n",
388 | " 20 | \n",
389 | "
\n",
390 | " \n",
391 | " 1 | \n",
392 | " 20 | \n",
393 | " 30 | \n",
394 | "
\n",
395 | " \n",
396 | " 2 | \n",
397 | " 30 | \n",
398 | " 40 | \n",
399 | "
\n",
400 | " \n",
401 | "
\n",
402 | "
"
403 | ],
404 | "text/plain": [
405 | " a b\n",
406 | "0 10 20\n",
407 | "1 20 30\n",
408 | "2 30 40"
409 | ]
410 | },
411 | "execution_count": 32,
412 | "metadata": {},
413 | "output_type": "execute_result"
414 | }
415 | ],
416 | "source": [
417 | "df"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": 30,
423 | "metadata": {},
424 | "outputs": [],
425 | "source": [
426 | "def avg_3(x, y, z):\n",
427 | " return (x + y + z) / 3"
428 | ]
429 | },
430 | {
431 | "cell_type": "code",
432 | "execution_count": 31,
433 | "metadata": {},
434 | "outputs": [
435 | {
436 | "ename": "TypeError",
437 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')",
438 | "output_type": "error",
439 | "traceback": [
440 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
441 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
442 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
443 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
444 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
445 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
446 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
447 | "\u001b[0;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')"
448 | ]
449 | }
450 | ],
451 | "source": [
452 | "df.apply(avg_3)"
453 | ]
454 | },
455 | {
456 | "cell_type": "code",
457 | "execution_count": 33,
458 | "metadata": {},
459 | "outputs": [],
460 | "source": [
461 | "import numpy as np"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 34,
467 | "metadata": {},
468 | "outputs": [],
469 | "source": [
470 | "def avg_3_apply(col):\n",
471 | " return np.mean(col)"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": 35,
477 | "metadata": {},
478 | "outputs": [
479 | {
480 | "data": {
481 | "text/plain": [
482 | "a 20.0\n",
483 | "b 30.0\n",
484 | "dtype: float64"
485 | ]
486 | },
487 | "execution_count": 35,
488 | "metadata": {},
489 | "output_type": "execute_result"
490 | }
491 | ],
492 | "source": [
493 | "df.apply(avg_3_apply)"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 40,
499 | "metadata": {},
500 | "outputs": [
501 | {
502 | "data": {
503 | "text/html": [
504 | "\n",
505 | "\n",
518 | "
\n",
519 | " \n",
520 | " \n",
521 | " | \n",
522 | " a | \n",
523 | " b | \n",
524 | "
\n",
525 | " \n",
526 | " \n",
527 | " \n",
528 | " 0 | \n",
529 | " 10 | \n",
530 | " 20 | \n",
531 | "
\n",
532 | " \n",
533 | " 1 | \n",
534 | " 20 | \n",
535 | " 30 | \n",
536 | "
\n",
537 | " \n",
538 | " 2 | \n",
539 | " 30 | \n",
540 | " 40 | \n",
541 | "
\n",
542 | " \n",
543 | "
\n",
544 | "
"
545 | ],
546 | "text/plain": [
547 | " a b\n",
548 | "0 10 20\n",
549 | "1 20 30\n",
550 | "2 30 40"
551 | ]
552 | },
553 | "execution_count": 40,
554 | "metadata": {},
555 | "output_type": "execute_result"
556 | }
557 | ],
558 | "source": [
559 | "df"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": 37,
565 | "metadata": {},
566 | "outputs": [],
567 | "source": [
568 | "def avg_3_apply(col):\n",
569 | " x = col[0]\n",
570 | " y = col[1]\n",
571 | " z = col[2]\n",
572 | " return (x + y + z) / 3"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 38,
578 | "metadata": {},
579 | "outputs": [
580 | {
581 | "data": {
582 | "text/plain": [
583 | "a 20.0\n",
584 | "b 30.0\n",
585 | "dtype: float64"
586 | ]
587 | },
588 | "execution_count": 38,
589 | "metadata": {},
590 | "output_type": "execute_result"
591 | }
592 | ],
593 | "source": [
594 | "df.apply(avg_3_apply)"
595 | ]
596 | },
597 | {
598 | "cell_type": "code",
599 | "execution_count": 39,
600 | "metadata": {},
601 | "outputs": [
602 | {
603 | "ename": "IndexError",
604 | "evalue": "('index out of bounds', 'occurred at index 0')",
605 | "output_type": "error",
606 | "traceback": [
607 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
608 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
609 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4374\u001b[0m return self._engine.get_value(s, k,\n\u001b[0;32m-> 4375\u001b[0;31m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m 4376\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
610 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
611 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
612 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
613 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
614 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
615 | "\u001b[0;31mKeyError\u001b[0m: 2",
616 | "\nDuring handling of the above exception, another exception occurred:\n",
617 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
618 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3_apply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'columns'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
619 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
620 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
621 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
622 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
623 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_3_apply\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
624 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 866\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 868\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 869\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
625 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4380\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4381\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_box\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4382\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4383\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
626 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_box\u001b[0;34m()\u001b[0m\n",
627 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
628 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n",
629 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n",
630 | "\u001b[0;31mIndexError\u001b[0m: ('index out of bounds', 'occurred at index 0')"
631 | ]
632 | }
633 | ],
634 | "source": [
635 | "df.apply(avg_3_apply, axis='columns')"
636 | ]
637 | },
638 | {
639 | "cell_type": "code",
640 | "execution_count": 41,
641 | "metadata": {},
642 | "outputs": [
643 | {
644 | "data": {
645 | "text/plain": [
646 | "20.0"
647 | ]
648 | },
649 | "execution_count": 41,
650 | "metadata": {},
651 | "output_type": "execute_result"
652 | }
653 | ],
654 | "source": [
655 | "df['a'].mean()"
656 | ]
657 | },
658 | {
659 | "cell_type": "code",
660 | "execution_count": 42,
661 | "metadata": {},
662 | "outputs": [
663 | {
664 | "data": {
665 | "text/plain": [
666 | "0 30\n",
667 | "1 50\n",
668 | "2 70\n",
669 | "dtype: int64"
670 | ]
671 | },
672 | "execution_count": 42,
673 | "metadata": {},
674 | "output_type": "execute_result"
675 | }
676 | ],
677 | "source": [
678 | "df['a'] + df['b']"
679 | ]
680 | },
681 | {
682 | "cell_type": "code",
683 | "execution_count": 45,
684 | "metadata": {},
685 | "outputs": [],
686 | "source": [
687 | "def avg_2_mod(x, y):\n",
688 | " if (x == 20):\n",
689 | " return np.NaN #np.NAN np.nan\n",
690 | " else:\n",
691 | " return(x + y) / 2"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 46,
697 | "metadata": {},
698 | "outputs": [
699 | {
700 | "data": {
701 | "text/html": [
702 | "\n",
703 | "\n",
716 | "
\n",
717 | " \n",
718 | " \n",
719 | " | \n",
720 | " a | \n",
721 | " b | \n",
722 | "
\n",
723 | " \n",
724 | " \n",
725 | " \n",
726 | " 0 | \n",
727 | " 10 | \n",
728 | " 20 | \n",
729 | "
\n",
730 | " \n",
731 | " 1 | \n",
732 | " 20 | \n",
733 | " 30 | \n",
734 | "
\n",
735 | " \n",
736 | " 2 | \n",
737 | " 30 | \n",
738 | " 40 | \n",
739 | "
\n",
740 | " \n",
741 | "
\n",
742 | "
"
743 | ],
744 | "text/plain": [
745 | " a b\n",
746 | "0 10 20\n",
747 | "1 20 30\n",
748 | "2 30 40"
749 | ]
750 | },
751 | "execution_count": 46,
752 | "metadata": {},
753 | "output_type": "execute_result"
754 | }
755 | ],
756 | "source": [
757 | "df"
758 | ]
759 | },
760 | {
761 | "cell_type": "code",
762 | "execution_count": 47,
763 | "metadata": {},
764 | "outputs": [
765 | {
766 | "ename": "ValueError",
767 | "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().",
768 | "output_type": "error",
769 | "traceback": [
770 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
771 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
772 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
773 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_2_mod\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m \u001b[0;31m#np.NAN np.nan\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
774 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1476\u001b[0m raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m 1477\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1478\u001b[0;31m .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m 1479\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
775 | "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
776 | ]
777 | }
778 | ],
779 | "source": [
780 | "avg_2_mod(df['a'], df['b'])"
781 | ]
782 | },
783 | {
784 | "cell_type": "code",
785 | "execution_count": 48,
786 | "metadata": {},
787 | "outputs": [],
788 | "source": [
789 | "import numpy as np"
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": 49,
795 | "metadata": {},
796 | "outputs": [],
797 | "source": [
798 | "avg_2_mod_vec = np.vectorize(avg_2_mod)"
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": 51,
804 | "metadata": {},
805 | "outputs": [
806 | {
807 | "data": {
808 | "text/html": [
809 | "\n",
810 | "\n",
823 | "
\n",
824 | " \n",
825 | " \n",
826 | " | \n",
827 | " a | \n",
828 | " b | \n",
829 | "
\n",
830 | " \n",
831 | " \n",
832 | " \n",
833 | " 0 | \n",
834 | " 10 | \n",
835 | " 20 | \n",
836 | "
\n",
837 | " \n",
838 | " 1 | \n",
839 | " 20 | \n",
840 | " 30 | \n",
841 | "
\n",
842 | " \n",
843 | " 2 | \n",
844 | " 30 | \n",
845 | " 40 | \n",
846 | "
\n",
847 | " \n",
848 | "
\n",
849 | "
"
850 | ],
851 | "text/plain": [
852 | " a b\n",
853 | "0 10 20\n",
854 | "1 20 30\n",
855 | "2 30 40"
856 | ]
857 | },
858 | "execution_count": 51,
859 | "metadata": {},
860 | "output_type": "execute_result"
861 | }
862 | ],
863 | "source": [
864 | "df"
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": 50,
870 | "metadata": {},
871 | "outputs": [
872 | {
873 | "data": {
874 | "text/plain": [
875 | "array([15., nan, 35.])"
876 | ]
877 | },
878 | "execution_count": 50,
879 | "metadata": {},
880 | "output_type": "execute_result"
881 | }
882 | ],
883 | "source": [
884 | "avg_2_mod_vec(df['a'], df['b'])"
885 | ]
886 | },
887 | {
888 | "cell_type": "code",
889 | "execution_count": 53,
890 | "metadata": {},
891 | "outputs": [],
892 | "source": [
893 | "@np.vectorize\n",
894 | "def avg_2_mod(x, y):\n",
895 | " if (x == 20):\n",
896 | " return np.NaN #np.NAN np.nan\n",
897 | " else:\n",
898 | " return(x + y) / 2"
899 | ]
900 | },
901 | {
902 | "cell_type": "code",
903 | "execution_count": 54,
904 | "metadata": {},
905 | "outputs": [
906 | {
907 | "data": {
908 | "text/plain": [
909 | "array([15., nan, 35.])"
910 | ]
911 | },
912 | "execution_count": 54,
913 | "metadata": {},
914 | "output_type": "execute_result"
915 | }
916 | ],
917 | "source": [
918 | "avg_2_mod(df['a'], df['b'])"
919 | ]
920 | },
921 | {
922 | "cell_type": "code",
923 | "execution_count": 55,
924 | "metadata": {},
925 | "outputs": [],
926 | "source": [
927 | "import numba"
928 | ]
929 | },
930 | {
931 | "cell_type": "code",
932 | "execution_count": 59,
933 | "metadata": {},
934 | "outputs": [],
935 | "source": [
936 | "@numba.vectorize\n",
937 | "def avg_2_mod_numba(x, y):\n",
938 | " if (x == 20):\n",
939 | " return np.NaN\n",
940 | " else:\n",
941 | " return(x + y) / 2"
942 | ]
943 | },
944 | {
945 | "cell_type": "code",
946 | "execution_count": 60,
947 | "metadata": {},
948 | "outputs": [
949 | {
950 | "data": {
951 | "text/plain": [
952 | "array([15., nan, 35.])"
953 | ]
954 | },
955 | "execution_count": 60,
956 | "metadata": {},
957 | "output_type": "execute_result"
958 | }
959 | ],
960 | "source": [
961 | "avg_2_mod_numba(df['a'].values, df['b'].values)"
962 | ]
963 | },
964 | {
965 | "cell_type": "code",
966 | "execution_count": 62,
967 | "metadata": {},
968 | "outputs": [
969 | {
970 | "name": "stdout",
971 | "output_type": "stream",
972 | "text": [
973 | "445 µs ± 7.79 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
974 | ]
975 | }
976 | ],
977 | "source": [
978 | "%%timeit\n",
979 | "avg_2(df['a'], df['b'])"
980 | ]
981 | },
982 | {
983 | "cell_type": "code",
984 | "execution_count": 63,
985 | "metadata": {},
986 | "outputs": [
987 | {
988 | "name": "stdout",
989 | "output_type": "stream",
990 | "text": [
991 | "211 µs ± 7.73 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
992 | ]
993 | }
994 | ],
995 | "source": [
996 | "%%timeit\n",
997 | "avg_2_mod(df['a'], df['b'])"
998 | ]
999 | },
1000 | {
1001 | "cell_type": "code",
1002 | "execution_count": 64,
1003 | "metadata": {},
1004 | "outputs": [
1005 | {
1006 | "name": "stdout",
1007 | "output_type": "stream",
1008 | "text": [
1009 | "8.01 µs ± 226 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
1010 | ]
1011 | }
1012 | ],
1013 | "source": [
1014 | "%%timeit\n",
1015 | "avg_2_mod_numba(df['a'].values, df['b'].values)"
1016 | ]
1017 | },
1018 | {
1019 | "cell_type": "code",
1020 | "execution_count": null,
1021 | "metadata": {},
1022 | "outputs": [],
1023 | "source": []
1024 | }
1025 | ],
1026 | "metadata": {
1027 | "kernelspec": {
1028 | "display_name": "Python 3",
1029 | "language": "python",
1030 | "name": "python3"
1031 | },
1032 | "language_info": {
1033 | "codemirror_mode": {
1034 | "name": "ipython",
1035 | "version": 3
1036 | },
1037 | "file_extension": ".py",
1038 | "mimetype": "text/x-python",
1039 | "name": "python",
1040 | "nbconvert_exporter": "python",
1041 | "pygments_lexer": "ipython3",
1042 | "version": "3.7.3"
1043 | }
1044 | },
1045 | "nbformat": 4,
1046 | "nbformat_minor": 2
1047 | }
1048 |
--------------------------------------------------------------------------------
/notebooks/05-model.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import seaborn as sns\n",
11 | "from sklearn import linear_model"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 4,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "tips = sns.load_dataset('tips')"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 5,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/html": [
31 | "\n",
32 | "\n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " | \n",
49 | " total_bill | \n",
50 | " tip | \n",
51 | " sex | \n",
52 | " smoker | \n",
53 | " day | \n",
54 | " time | \n",
55 | " size | \n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " \n",
60 | " 0 | \n",
61 | " 16.99 | \n",
62 | " 1.01 | \n",
63 | " Female | \n",
64 | " No | \n",
65 | " Sun | \n",
66 | " Dinner | \n",
67 | " 2 | \n",
68 | "
\n",
69 | " \n",
70 | " 1 | \n",
71 | " 10.34 | \n",
72 | " 1.66 | \n",
73 | " Male | \n",
74 | " No | \n",
75 | " Sun | \n",
76 | " Dinner | \n",
77 | " 3 | \n",
78 | "
\n",
79 | " \n",
80 | " 2 | \n",
81 | " 21.01 | \n",
82 | " 3.50 | \n",
83 | " Male | \n",
84 | " No | \n",
85 | " Sun | \n",
86 | " Dinner | \n",
87 | " 3 | \n",
88 | "
\n",
89 | " \n",
90 | " 3 | \n",
91 | " 23.68 | \n",
92 | " 3.31 | \n",
93 | " Male | \n",
94 | " No | \n",
95 | " Sun | \n",
96 | " Dinner | \n",
97 | " 2 | \n",
98 | "
\n",
99 | " \n",
100 | " 4 | \n",
101 | " 24.59 | \n",
102 | " 3.61 | \n",
103 | " Female | \n",
104 | " No | \n",
105 | " Sun | \n",
106 | " Dinner | \n",
107 | " 4 | \n",
108 | "
\n",
109 | " \n",
110 | "
\n",
111 | "
"
112 | ],
113 | "text/plain": [
114 | " total_bill tip sex smoker day time size\n",
115 | "0 16.99 1.01 Female No Sun Dinner 2\n",
116 | "1 10.34 1.66 Male No Sun Dinner 3\n",
117 | "2 21.01 3.50 Male No Sun Dinner 3\n",
118 | "3 23.68 3.31 Male No Sun Dinner 2\n",
119 | "4 24.59 3.61 Female No Sun Dinner 4"
120 | ]
121 | },
122 | "execution_count": 5,
123 | "metadata": {},
124 | "output_type": "execute_result"
125 | }
126 | ],
127 | "source": [
128 | "tips.head()"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 6,
134 | "metadata": {},
135 | "outputs": [],
136 | "source": [
137 | "lr = linear_model.LinearRegression()"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 11,
143 | "metadata": {},
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/plain": [
148 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
149 | " normalize=False)"
150 | ]
151 | },
152 | "execution_count": 11,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "lr.fit(X=tips[['total_bill', 'size']], y=tips['tip'])"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 8,
164 | "metadata": {},
165 | "outputs": [
166 | {
167 | "data": {
168 | "text/plain": [
169 | "array([0.09271334, 0.19259779])"
170 | ]
171 | },
172 | "execution_count": 8,
173 | "metadata": {},
174 | "output_type": "execute_result"
175 | }
176 | ],
177 | "source": [
178 | "lr.coef_"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 9,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/plain": [
189 | "0.6689447408125027"
190 | ]
191 | },
192 | "execution_count": 9,
193 | "metadata": {},
194 | "output_type": "execute_result"
195 | }
196 | ],
197 | "source": [
198 | "lr.intercept_"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 12,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "# dummy encoding\n",
208 | "# one-hot encoding"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 16,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/html": [
219 | "\n",
220 | "\n",
233 | "
\n",
234 | " \n",
235 | " \n",
236 | " | \n",
237 | " total_bill | \n",
238 | " tip | \n",
239 | " size | \n",
240 | " sex_Female | \n",
241 | " smoker_No | \n",
242 | " day_Fri | \n",
243 | " day_Sat | \n",
244 | " day_Sun | \n",
245 | " time_Dinner | \n",
246 | "
\n",
247 | " \n",
248 | " \n",
249 | " \n",
250 | " 0 | \n",
251 | " 16.99 | \n",
252 | " 1.01 | \n",
253 | " 2 | \n",
254 | " 1 | \n",
255 | " 1 | \n",
256 | " 0 | \n",
257 | " 0 | \n",
258 | " 1 | \n",
259 | " 1 | \n",
260 | "
\n",
261 | " \n",
262 | " 1 | \n",
263 | " 10.34 | \n",
264 | " 1.66 | \n",
265 | " 3 | \n",
266 | " 0 | \n",
267 | " 1 | \n",
268 | " 0 | \n",
269 | " 0 | \n",
270 | " 1 | \n",
271 | " 1 | \n",
272 | "
\n",
273 | " \n",
274 | " 2 | \n",
275 | " 21.01 | \n",
276 | " 3.50 | \n",
277 | " 3 | \n",
278 | " 0 | \n",
279 | " 1 | \n",
280 | " 0 | \n",
281 | " 0 | \n",
282 | " 1 | \n",
283 | " 1 | \n",
284 | "
\n",
285 | " \n",
286 | " 3 | \n",
287 | " 23.68 | \n",
288 | " 3.31 | \n",
289 | " 2 | \n",
290 | " 0 | \n",
291 | " 1 | \n",
292 | " 0 | \n",
293 | " 0 | \n",
294 | " 1 | \n",
295 | " 1 | \n",
296 | "
\n",
297 | " \n",
298 | " 4 | \n",
299 | " 24.59 | \n",
300 | " 3.61 | \n",
301 | " 4 | \n",
302 | " 1 | \n",
303 | " 1 | \n",
304 | " 0 | \n",
305 | " 0 | \n",
306 | " 1 | \n",
307 | " 1 | \n",
308 | "
\n",
309 | " \n",
310 | "
\n",
311 | "
"
312 | ],
313 | "text/plain": [
314 | " total_bill tip size sex_Female smoker_No day_Fri day_Sat day_Sun \\\n",
315 | "0 16.99 1.01 2 1 1 0 0 1 \n",
316 | "1 10.34 1.66 3 0 1 0 0 1 \n",
317 | "2 21.01 3.50 3 0 1 0 0 1 \n",
318 | "3 23.68 3.31 2 0 1 0 0 1 \n",
319 | "4 24.59 3.61 4 1 1 0 0 1 \n",
320 | "\n",
321 | " time_Dinner \n",
322 | "0 1 \n",
323 | "1 1 \n",
324 | "2 1 \n",
325 | "3 1 \n",
326 | "4 1 "
327 | ]
328 | },
329 | "execution_count": 16,
330 | "metadata": {},
331 | "output_type": "execute_result"
332 | }
333 | ],
334 | "source": [
335 | "tips_dummy = pd.get_dummies(tips, drop_first=True)\n",
336 | "tips_dummy.head()"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 19,
342 | "metadata": {},
343 | "outputs": [
344 | {
345 | "data": {
346 | "text/plain": [
347 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
348 | " normalize=False)"
349 | ]
350 | },
351 | "execution_count": 19,
352 | "metadata": {},
353 | "output_type": "execute_result"
354 | }
355 | ],
356 | "source": [
357 | "lr = linear_model.LinearRegression()\n",
358 | "lr.fit(X=tips_dummy.iloc[:, 2:], y=tips_dummy['tip'])"
359 | ]
360 | },
361 | {
362 | "cell_type": "code",
363 | "execution_count": 20,
364 | "metadata": {},
365 | "outputs": [
366 | {
367 | "data": {
368 | "text/plain": [
369 | "array([ 0.71001644, -0.10057881, -0.20916402, -0.20180568, -0.36603136,\n",
370 | " -0.29452609, 0.48575489])"
371 | ]
372 | },
373 | "execution_count": 20,
374 | "metadata": {},
375 | "output_type": "execute_result"
376 | }
377 | ],
378 | "source": [
379 | "lr.coef_"
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "execution_count": null,
385 | "metadata": {},
386 | "outputs": [],
387 | "source": []
388 | }
389 | ],
390 | "metadata": {
391 | "kernelspec": {
392 | "display_name": "Python 3",
393 | "language": "python",
394 | "name": "python3"
395 | },
396 | "language_info": {
397 | "codemirror_mode": {
398 | "name": "ipython",
399 | "version": 3
400 | },
401 | "file_extension": ".py",
402 | "mimetype": "text/x-python",
403 | "name": "python",
404 | "nbconvert_exporter": "python",
405 | "pygments_lexer": "ipython3",
406 | "version": "3.7.3"
407 | }
408 | },
409 | "nbformat": 4,
410 | "nbformat_minor": 2
411 | }
412 |
--------------------------------------------------------------------------------
/notes/03-apply.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "source": [
6 | "# writing a python function"
7 | ],
8 | "outputs": [],
9 | "execution_count": 1,
10 | "metadata": {}
11 | },
12 | {
13 | "cell_type": "code",
14 | "source": [
15 | "def my_function():\n",
16 | " pass"
17 | ],
18 | "outputs": [],
19 | "execution_count": 2,
20 | "metadata": {}
21 | },
22 | {
23 | "cell_type": "code",
24 | "source": [
25 | "def my_sq(x):\n",
26 | " return x ** 2"
27 | ],
28 | "outputs": [],
29 | "execution_count": 3,
30 | "metadata": {}
31 | },
32 | {
33 | "cell_type": "code",
34 | "source": [
35 | "my_sq(4)"
36 | ],
37 | "outputs": [
38 | {
39 | "output_type": "execute_result",
40 | "execution_count": 4,
41 | "data": {
42 | "text/plain": [
43 | "16"
44 | ]
45 | },
46 | "metadata": {}
47 | }
48 | ],
49 | "execution_count": 4,
50 | "metadata": {}
51 | },
52 | {
53 | "cell_type": "code",
54 | "source": [
55 | "assert my_sq(4) == 16"
56 | ],
57 | "outputs": [],
58 | "execution_count": 5,
59 | "metadata": {}
60 | },
61 | {
62 | "cell_type": "code",
63 | "source": [
64 | "def avg_2(x, y):\n",
65 | " return (x + y) / 2"
66 | ],
67 | "outputs": [],
68 | "execution_count": 6,
69 | "metadata": {}
70 | },
71 | {
72 | "cell_type": "code",
73 | "source": [
74 | "avg_2(10, 20)"
75 | ],
76 | "outputs": [
77 | {
78 | "output_type": "execute_result",
79 | "execution_count": 7,
80 | "data": {
81 | "text/plain": [
82 | "15.0"
83 | ]
84 | },
85 | "metadata": {}
86 | }
87 | ],
88 | "execution_count": 7,
89 | "metadata": {}
90 | },
91 | {
92 | "cell_type": "code",
93 | "source": [
94 | "import pandas as pd"
95 | ],
96 | "outputs": [],
97 | "execution_count": 8,
98 | "metadata": {}
99 | },
100 | {
101 | "cell_type": "code",
102 | "source": [
103 | "df = pd.DataFrame({\n",
104 | " 'a': [10, 20, 30],\n",
105 | " 'b': [20, 30, 40]\n",
106 | "})"
107 | ],
108 | "outputs": [],
109 | "execution_count": 9,
110 | "metadata": {}
111 | },
112 | {
113 | "cell_type": "code",
114 | "source": [
115 | "df['a'] ** 2"
116 | ],
117 | "outputs": [
118 | {
119 | "output_type": "execute_result",
120 | "execution_count": 10,
121 | "data": {
122 | "text/plain": [
123 | "0 100\n",
124 | "1 400\n",
125 | "2 900\n",
126 | "Name: a, dtype: int64"
127 | ]
128 | },
129 | "metadata": {}
130 | }
131 | ],
132 | "execution_count": 10,
133 | "metadata": {}
134 | },
135 | {
136 | "cell_type": "code",
137 | "source": [
138 | "df['a'].apply(my_sq)"
139 | ],
140 | "outputs": [
141 | {
142 | "output_type": "execute_result",
143 | "execution_count": 11,
144 | "data": {
145 | "text/plain": [
146 | "0 100\n",
147 | "1 400\n",
148 | "2 900\n",
149 | "Name: a, dtype: int64"
150 | ]
151 | },
152 | "metadata": {}
153 | }
154 | ],
155 | "execution_count": 11,
156 | "metadata": {}
157 | },
158 | {
159 | "cell_type": "code",
160 | "source": [
161 | "def my_exp(x, e):\n",
162 | " return x ** e"
163 | ],
164 | "outputs": [],
165 | "execution_count": 12,
166 | "metadata": {}
167 | },
168 | {
169 | "cell_type": "code",
170 | "source": [
171 | "my_exp(4, 2)"
172 | ],
173 | "outputs": [
174 | {
175 | "output_type": "execute_result",
176 | "execution_count": 13,
177 | "data": {
178 | "text/plain": [
179 | "16"
180 | ]
181 | },
182 | "metadata": {}
183 | }
184 | ],
185 | "execution_count": 13,
186 | "metadata": {}
187 | },
188 | {
189 | "cell_type": "code",
190 | "source": [
191 | "my_exp(4, 3)"
192 | ],
193 | "outputs": [
194 | {
195 | "output_type": "execute_result",
196 | "execution_count": 14,
197 | "data": {
198 | "text/plain": [
199 | "64"
200 | ]
201 | },
202 | "metadata": {}
203 | }
204 | ],
205 | "execution_count": 14,
206 | "metadata": {}
207 | },
208 | {
209 | "cell_type": "code",
210 | "source": [
211 | "df['a'].apply(my_exp, e=4)"
212 | ],
213 | "outputs": [
214 | {
215 | "output_type": "execute_result",
216 | "execution_count": 15,
217 | "data": {
218 | "text/plain": [
219 | "0 10000\n",
220 | "1 160000\n",
221 | "2 810000\n",
222 | "Name: a, dtype: int64"
223 | ]
224 | },
225 | "metadata": {}
226 | }
227 | ],
228 | "execution_count": 15,
229 | "metadata": {}
230 | },
231 | {
232 | "cell_type": "code",
233 | "source": [
234 | "def print_me(x):\n",
235 | " print(x)"
236 | ],
237 | "outputs": [],
238 | "execution_count": 16,
239 | "metadata": {}
240 | },
241 | {
242 | "cell_type": "code",
243 | "source": [
244 | "df.apply(print_me)"
245 | ],
246 | "outputs": [
247 | {
248 | "output_type": "stream",
249 | "name": "stdout",
250 | "text": [
251 | "0 10\n",
252 | "1 20\n",
253 | "2 30\n",
254 | "Name: a, dtype: int64\n",
255 | "0 20\n",
256 | "1 30\n",
257 | "2 40\n",
258 | "Name: b, dtype: int64\n"
259 | ]
260 | },
261 | {
262 | "output_type": "execute_result",
263 | "execution_count": 17,
264 | "data": {
265 | "text/plain": [
266 | "a None\n",
267 | "b None\n",
268 | "dtype: object"
269 | ]
270 | },
271 | "metadata": {}
272 | }
273 | ],
274 | "execution_count": 17,
275 | "metadata": {}
276 | },
277 | {
278 | "cell_type": "code",
279 | "source": [
280 | "def avg_3(x, y, z):\n",
281 | " return (x + y + z) / 3"
282 | ],
283 | "outputs": [],
284 | "execution_count": 18,
285 | "metadata": {}
286 | },
287 | {
288 | "cell_type": "code",
289 | "source": [
290 | "df.apply(avg_3)"
291 | ],
292 | "outputs": [
293 | {
294 | "output_type": "error",
295 | "ename": "TypeError",
296 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')",
297 | "traceback": [
298 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
299 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
300 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
301 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
302 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
303 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
304 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
305 | "\u001b[0;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')"
306 | ]
307 | }
308 | ],
309 | "execution_count": 19,
310 | "metadata": {}
311 | },
312 | {
313 | "cell_type": "code",
314 | "source": [
315 | "import numpy as np"
316 | ],
317 | "outputs": [],
318 | "execution_count": 20,
319 | "metadata": {}
320 | },
321 | {
322 | "cell_type": "code",
323 | "source": [
324 | "def avg_3_apply(col):\n",
325 | " return np.mean(col)"
326 | ],
327 | "outputs": [],
328 | "execution_count": 21,
329 | "metadata": {}
330 | },
331 | {
332 | "cell_type": "code",
333 | "source": [
334 | "df.apply(avg_3_apply)"
335 | ],
336 | "outputs": [
337 | {
338 | "output_type": "execute_result",
339 | "execution_count": 22,
340 | "data": {
341 | "text/plain": [
342 | "a 20.0\n",
343 | "b 30.0\n",
344 | "dtype: float64"
345 | ]
346 | },
347 | "metadata": {}
348 | }
349 | ],
350 | "execution_count": 22,
351 | "metadata": {}
352 | },
353 | {
354 | "cell_type": "code",
355 | "source": [
356 | "def avg_3_apply(col):\n",
357 | " x = col[0]\n",
358 | " y = col[1]\n",
359 | " z = col[2]\n",
360 | " return (x + y + z) / 3"
361 | ],
362 | "outputs": [],
363 | "execution_count": 23,
364 | "metadata": {}
365 | },
366 | {
367 | "cell_type": "code",
368 | "source": [
369 | "df.apply(avg_3_apply)"
370 | ],
371 | "outputs": [
372 | {
373 | "output_type": "execute_result",
374 | "execution_count": 24,
375 | "data": {
376 | "text/plain": [
377 | "a 20.0\n",
378 | "b 30.0\n",
379 | "dtype: float64"
380 | ]
381 | },
382 | "metadata": {}
383 | }
384 | ],
385 | "execution_count": 24,
386 | "metadata": {}
387 | },
388 | {
389 | "cell_type": "code",
390 | "source": [
391 | "df.apply(avg_3_apply, axis='columns')"
392 | ],
393 | "outputs": [
394 | {
395 | "output_type": "error",
396 | "ename": "IndexError",
397 | "evalue": "('index out of bounds', 'occurred at index 0')",
398 | "traceback": [
399 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
400 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
401 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4374\u001b[0m return self._engine.get_value(s, k,\n\u001b[0;32m-> 4375\u001b[0;31m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m 4376\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
402 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
403 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
404 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
405 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
406 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
407 | "\u001b[0;31mKeyError\u001b[0m: 2",
408 | "\nDuring handling of the above exception, another exception occurred:\n",
409 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
410 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3_apply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'columns'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
411 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
412 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
413 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
414 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
415 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_3_apply\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
416 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 866\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 868\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 869\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
417 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4380\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4381\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_box\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4382\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4383\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
418 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_box\u001b[0;34m()\u001b[0m\n",
419 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
420 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n",
421 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n",
422 | "\u001b[0;31mIndexError\u001b[0m: ('index out of bounds', 'occurred at index 0')"
423 | ]
424 | }
425 | ],
426 | "execution_count": 25,
427 | "metadata": {}
428 | },
429 | {
430 | "cell_type": "code",
431 | "source": [
432 | "df['a'].mean()"
433 | ],
434 | "outputs": [
435 | {
436 | "output_type": "execute_result",
437 | "execution_count": 26,
438 | "data": {
439 | "text/plain": [
440 | "20.0"
441 | ]
442 | },
443 | "metadata": {}
444 | }
445 | ],
446 | "execution_count": 26,
447 | "metadata": {}
448 | },
449 | {
450 | "cell_type": "code",
451 | "source": [
452 | "df['a'] + df['b']"
453 | ],
454 | "outputs": [
455 | {
456 | "output_type": "execute_result",
457 | "execution_count": 27,
458 | "data": {
459 | "text/plain": [
460 | "0 30\n",
461 | "1 50\n",
462 | "2 70\n",
463 | "dtype: int64"
464 | ]
465 | },
466 | "metadata": {}
467 | }
468 | ],
469 | "execution_count": 27,
470 | "metadata": {}
471 | },
472 | {
473 | "cell_type": "code",
474 | "source": [
475 | "def avg_2_mod(x, y):\n",
476 | " if (x == 20):\n",
477 | " return np.NaN\n",
478 | " else:\n",
479 | " return (x + y) / 2"
480 | ],
481 | "outputs": [],
482 | "execution_count": 28,
483 | "metadata": {}
484 | },
485 | {
486 | "cell_type": "code",
487 | "source": [
488 | "avg_2(df['a'], df['b'])"
489 | ],
490 | "outputs": [
491 | {
492 | "output_type": "execute_result",
493 | "execution_count": 29,
494 | "data": {
495 | "text/plain": [
496 | "0 15.0\n",
497 | "1 25.0\n",
498 | "2 35.0\n",
499 | "dtype: float64"
500 | ]
501 | },
502 | "metadata": {}
503 | }
504 | ],
505 | "execution_count": 29,
506 | "metadata": {}
507 | },
508 | {
509 | "cell_type": "code",
510 | "source": [
511 | "avg_2_mod(df['a'], df['b'])"
512 | ],
513 | "outputs": [
514 | {
515 | "output_type": "error",
516 | "ename": "ValueError",
517 | "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().",
518 | "traceback": [
519 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
520 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
521 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
522 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_2_mod\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
523 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1476\u001b[0m raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m 1477\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1478\u001b[0;31m .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m 1479\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
524 | "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
525 | ]
526 | }
527 | ],
528 | "execution_count": 30,
529 | "metadata": {}
530 | },
531 | {
532 | "cell_type": "code",
533 | "source": [
534 | "import numpy as np"
535 | ],
536 | "outputs": [],
537 | "execution_count": 31,
538 | "metadata": {}
539 | },
540 | {
541 | "cell_type": "code",
542 | "source": [
543 | "avg_2_mod_vec = np.vectorize(avg_2_mod)"
544 | ],
545 | "outputs": [],
546 | "execution_count": 32,
547 | "metadata": {}
548 | },
549 | {
550 | "cell_type": "code",
551 | "source": [
552 | "avg_2_mod_vec(df['a'], df['b'])"
553 | ],
554 | "outputs": [
555 | {
556 | "output_type": "execute_result",
557 | "execution_count": 33,
558 | "data": {
559 | "text/plain": [
560 | "array([15., nan, 35.])"
561 | ]
562 | },
563 | "metadata": {}
564 | }
565 | ],
566 | "execution_count": 33,
567 | "metadata": {}
568 | },
569 | {
570 | "cell_type": "code",
571 | "source": [
572 | "@np.vectorize\n",
573 | "def v_avg_2_mod(x, y):\n",
574 | " if (x == 20):\n",
575 | " return np.NaN\n",
576 | " else:\n",
577 | " return (x + y) / 2"
578 | ],
579 | "outputs": [],
580 | "execution_count": 34,
581 | "metadata": {}
582 | },
583 | {
584 | "cell_type": "code",
585 | "source": [
586 | "v_avg_2_mod(df['a'], df['b'])"
587 | ],
588 | "outputs": [
589 | {
590 | "output_type": "execute_result",
591 | "execution_count": 35,
592 | "data": {
593 | "text/plain": [
594 | "array([15., nan, 35.])"
595 | ]
596 | },
597 | "metadata": {}
598 | }
599 | ],
600 | "execution_count": 35,
601 | "metadata": {}
602 | },
603 | {
604 | "cell_type": "code",
605 | "source": [
606 | "import numba"
607 | ],
608 | "outputs": [],
609 | "execution_count": 36,
610 | "metadata": {}
611 | },
612 | {
613 | "cell_type": "code",
614 | "source": [
615 | "@numba.vectorize\n",
616 | "def v_avg_2_mod_numba(x, y):\n",
617 | " if (x == 20):\n",
618 | " return np.NaN\n",
619 | " else:\n",
620 | " return (x + y) / 2"
621 | ],
622 | "outputs": [],
623 | "execution_count": 37,
624 | "metadata": {}
625 | },
626 | {
627 | "cell_type": "code",
628 | "source": [
629 | "v_avg_2_mod_numba(df['a'].values, df['b'].values)"
630 | ],
631 | "outputs": [
632 | {
633 | "output_type": "execute_result",
634 | "execution_count": 38,
635 | "data": {
636 | "text/plain": [
637 | "array([15., nan, 35.])"
638 | ]
639 | },
640 | "metadata": {}
641 | }
642 | ],
643 | "execution_count": 38,
644 | "metadata": {}
645 | },
646 | {
647 | "cell_type": "code",
648 | "source": [
649 | "%%timeit\n",
650 | "avg_2(df['a'], df['b'])"
651 | ],
652 | "outputs": [
653 | {
654 | "output_type": "stream",
655 | "name": "stdout",
656 | "text": [
657 | "519 µs ± 34.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
658 | ]
659 | }
660 | ],
661 | "execution_count": 39,
662 | "metadata": {}
663 | },
664 | {
665 | "cell_type": "code",
666 | "source": [
667 | "%%timeit\n",
668 | "v_avg_2_mod(df['a'], df['b'])"
669 | ],
670 | "outputs": [
671 | {
672 | "output_type": "stream",
673 | "name": "stdout",
674 | "text": [
675 | "236 µs ± 33.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
676 | ]
677 | }
678 | ],
679 | "execution_count": 40,
680 | "metadata": {}
681 | },
682 | {
683 | "cell_type": "code",
684 | "source": [
685 | "%%timeit\n",
686 | "v_avg_2_mod_numba(df['a'].values, df['b'].values)"
687 | ],
688 | "outputs": [
689 | {
690 | "output_type": "stream",
691 | "name": "stdout",
692 | "text": [
693 | "8.28 µs ± 550 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
694 | ]
695 | }
696 | ],
697 | "execution_count": 41,
698 | "metadata": {}
699 | },
700 | {
701 | "cell_type": "code",
702 | "source": [],
703 | "outputs": [],
704 | "execution_count": 42,
705 | "metadata": {}
706 | }
707 | ],
708 | "metadata": {
709 | "kernelspec": {
710 | "name": "python3",
711 | "language": "python",
712 | "display_name": "Python 3"
713 | },
714 | "language_info": {
715 | "name": "python",
716 | "version": "3.7.3",
717 | "mimetype": "text/x-python",
718 | "codemirror_mode": {
719 | "name": "ipython",
720 | "version": 3
721 | },
722 | "pygments_lexer": "ipython3",
723 | "nbconvert_exporter": "python",
724 | "file_extension": ".py"
725 | },
726 | "kernel_info": {
727 | "name": "python3"
728 | },
729 | "nteract": {
730 | "version": "0.14.3"
731 | }
732 | },
733 | "nbformat": 4,
734 | "nbformat_minor": 2
735 | }
--------------------------------------------------------------------------------
/notes/05-models.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "source": [
6 | "import pandas as pd"
7 | ],
8 | "outputs": [],
9 | "execution_count": 1,
10 | "metadata": {}
11 | },
12 | {
13 | "cell_type": "code",
14 | "source": [
15 | "import seaborn as sns"
16 | ],
17 | "outputs": [],
18 | "execution_count": 2,
19 | "metadata": {}
20 | },
21 | {
22 | "cell_type": "code",
23 | "source": [
24 | "tips = sns.load_dataset('tips')"
25 | ],
26 | "outputs": [],
27 | "execution_count": 3,
28 | "metadata": {}
29 | },
30 | {
31 | "cell_type": "code",
32 | "source": [
33 | "tips.head()"
34 | ],
35 | "outputs": [
36 | {
37 | "output_type": "execute_result",
38 | "execution_count": 4,
39 | "data": {
40 | "text/plain": [
41 | " total_bill tip sex smoker day time size\n",
42 | "0 16.99 1.01 Female No Sun Dinner 2\n",
43 | "1 10.34 1.66 Male No Sun Dinner 3\n",
44 | "2 21.01 3.50 Male No Sun Dinner 3\n",
45 | "3 23.68 3.31 Male No Sun Dinner 2\n",
46 | "4 24.59 3.61 Female No Sun Dinner 4"
47 | ],
48 | "text/html": [
49 | "\n",
50 | "\n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " | \n",
67 | " total_bill | \n",
68 | " tip | \n",
69 | " sex | \n",
70 | " smoker | \n",
71 | " day | \n",
72 | " time | \n",
73 | " size | \n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " \n",
78 | " 0 | \n",
79 | " 16.99 | \n",
80 | " 1.01 | \n",
81 | " Female | \n",
82 | " No | \n",
83 | " Sun | \n",
84 | " Dinner | \n",
85 | " 2 | \n",
86 | "
\n",
87 | " \n",
88 | " 1 | \n",
89 | " 10.34 | \n",
90 | " 1.66 | \n",
91 | " Male | \n",
92 | " No | \n",
93 | " Sun | \n",
94 | " Dinner | \n",
95 | " 3 | \n",
96 | "
\n",
97 | " \n",
98 | " 2 | \n",
99 | " 21.01 | \n",
100 | " 3.50 | \n",
101 | " Male | \n",
102 | " No | \n",
103 | " Sun | \n",
104 | " Dinner | \n",
105 | " 3 | \n",
106 | "
\n",
107 | " \n",
108 | " 3 | \n",
109 | " 23.68 | \n",
110 | " 3.31 | \n",
111 | " Male | \n",
112 | " No | \n",
113 | " Sun | \n",
114 | " Dinner | \n",
115 | " 2 | \n",
116 | "
\n",
117 | " \n",
118 | " 4 | \n",
119 | " 24.59 | \n",
120 | " 3.61 | \n",
121 | " Female | \n",
122 | " No | \n",
123 | " Sun | \n",
124 | " Dinner | \n",
125 | " 4 | \n",
126 | "
\n",
127 | " \n",
128 | "
\n",
129 | "
"
130 | ]
131 | },
132 | "metadata": {}
133 | }
134 | ],
135 | "execution_count": 4,
136 | "metadata": {}
137 | },
138 | {
139 | "cell_type": "code",
140 | "source": [
141 | "from sklearn import linear_model"
142 | ],
143 | "outputs": [],
144 | "execution_count": 5,
145 | "metadata": {}
146 | },
147 | {
148 | "cell_type": "code",
149 | "source": [
150 | "lr = linear_model.LinearRegression()"
151 | ],
152 | "outputs": [],
153 | "execution_count": 6,
154 | "metadata": {}
155 | },
156 | {
157 | "cell_type": "code",
158 | "source": [
159 | "lr.fit(X=tips['total_bill'], y=tips['tip'])"
160 | ],
161 | "outputs": [
162 | {
163 | "output_type": "error",
164 | "ename": "ValueError",
165 | "evalue": "Expected 2D array, got 1D array instead:\narray=[16.99 10.34 21.01 23.68 24.59 25.29 8.77 26.88 15.04 14.78 10.27 35.26\n 15.42 18.43 14.83 21.58 10.33 16.29 16.97 20.65 17.92 20.29 15.77 39.42\n 19.82 17.81 13.37 12.69 21.7 19.65 9.55 18.35 15.06 20.69 17.78 24.06\n 16.31 16.93 18.69 31.27 16.04 17.46 13.94 9.68 30.4 18.29 22.23 32.4\n 28.55 18.04 12.54 10.29 34.81 9.94 25.56 19.49 38.01 26.41 11.24 48.27\n 20.29 13.81 11.02 18.29 17.59 20.08 16.45 3.07 20.23 15.01 12.02 17.07\n 26.86 25.28 14.73 10.51 17.92 27.2 22.76 17.29 19.44 16.66 10.07 32.68\n 15.98 34.83 13.03 18.28 24.71 21.16 28.97 22.49 5.75 16.32 22.75 40.17\n 27.28 12.03 21.01 12.46 11.35 15.38 44.3 22.42 20.92 15.36 20.49 25.21\n 18.24 14.31 14. 7.25 38.07 23.95 25.71 17.31 29.93 10.65 12.43 24.08\n 11.69 13.42 14.26 15.95 12.48 29.8 8.52 14.52 11.38 22.82 19.08 20.27\n 11.17 12.26 18.26 8.51 10.33 14.15 16. 13.16 17.47 34.3 41.19 27.05\n 16.43 8.35 18.64 11.87 9.78 7.51 14.07 13.13 17.26 24.55 19.77 29.85\n 48.17 25. 13.39 16.49 21.5 12.66 16.21 13.81 17.51 24.52 20.76 31.71\n 10.59 10.63 50.81 15.81 7.25 31.85 16.82 32.9 17.89 14.48 9.6 34.63\n 34.65 23.33 45.35 23.17 40.55 20.69 20.9 30.46 18.15 23.1 15.69 19.81\n 28.44 15.48 16.58 7.56 10.34 43.11 13. 13.51 18.71 12.74 13. 16.4\n 20.53 16.47 26.59 38.73 24.27 12.76 30.06 25.89 48.33 13.27 28.17 12.9\n 28.15 11.59 7.74 30.14 12.16 13.42 8.58 15.98 13.42 16.27 10.09 20.45\n 13.28 22.12 24.01 15.69 11.61 10.77 15.53 10.07 12.6 32.83 35.83 29.03\n 27.18 22.67 17.82 18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.",
166 | "traceback": [
167 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
168 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
169 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_bill'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tip'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
170 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 458\u001b[0;31m y_numeric=True, multi_output=True)\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
171 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 754\u001b[0m \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 756\u001b[0;31m estimator=estimator)\n\u001b[0m\u001b[1;32m 757\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 758\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
172 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0;34m\"Reshape your data either using array.reshape(-1, 1) if \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 551\u001b[0m \u001b[0;34m\"your data has a single feature or array.reshape(1, -1) \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 552\u001b[0;31m \"if it contains a single sample.\".format(array))\n\u001b[0m\u001b[1;32m 553\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;31m# in the future np.flexible dtypes will be handled like object dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
173 | "\u001b[0;31mValueError\u001b[0m: Expected 2D array, got 1D array instead:\narray=[16.99 10.34 21.01 23.68 24.59 25.29 8.77 26.88 15.04 14.78 10.27 35.26\n 15.42 18.43 14.83 21.58 10.33 16.29 16.97 20.65 17.92 20.29 15.77 39.42\n 19.82 17.81 13.37 12.69 21.7 19.65 9.55 18.35 15.06 20.69 17.78 24.06\n 16.31 16.93 18.69 31.27 16.04 17.46 13.94 9.68 30.4 18.29 22.23 32.4\n 28.55 18.04 12.54 10.29 34.81 9.94 25.56 19.49 38.01 26.41 11.24 48.27\n 20.29 13.81 11.02 18.29 17.59 20.08 16.45 3.07 20.23 15.01 12.02 17.07\n 26.86 25.28 14.73 10.51 17.92 27.2 22.76 17.29 19.44 16.66 10.07 32.68\n 15.98 34.83 13.03 18.28 24.71 21.16 28.97 22.49 5.75 16.32 22.75 40.17\n 27.28 12.03 21.01 12.46 11.35 15.38 44.3 22.42 20.92 15.36 20.49 25.21\n 18.24 14.31 14. 7.25 38.07 23.95 25.71 17.31 29.93 10.65 12.43 24.08\n 11.69 13.42 14.26 15.95 12.48 29.8 8.52 14.52 11.38 22.82 19.08 20.27\n 11.17 12.26 18.26 8.51 10.33 14.15 16. 13.16 17.47 34.3 41.19 27.05\n 16.43 8.35 18.64 11.87 9.78 7.51 14.07 13.13 17.26 24.55 19.77 29.85\n 48.17 25. 13.39 16.49 21.5 12.66 16.21 13.81 17.51 24.52 20.76 31.71\n 10.59 10.63 50.81 15.81 7.25 31.85 16.82 32.9 17.89 14.48 9.6 34.63\n 34.65 23.33 45.35 23.17 40.55 20.69 20.9 30.46 18.15 23.1 15.69 19.81\n 28.44 15.48 16.58 7.56 10.34 43.11 13. 13.51 18.71 12.74 13. 16.4\n 20.53 16.47 26.59 38.73 24.27 12.76 30.06 25.89 48.33 13.27 28.17 12.9\n 28.15 11.59 7.74 30.14 12.16 13.42 8.58 15.98 13.42 16.27 10.09 20.45\n 13.28 22.12 24.01 15.69 11.61 10.77 15.53 10.07 12.6 32.83 35.83 29.03\n 27.18 22.67 17.82 18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample."
174 | ]
175 | }
176 | ],
177 | "execution_count": 7,
178 | "metadata": {}
179 | },
180 | {
181 | "cell_type": "code",
182 | "source": [
183 | "lr.fit(X=tips['total_bill'].values.reshape(-1, 1), y=tips['tip'])"
184 | ],
185 | "outputs": [
186 | {
187 | "output_type": "execute_result",
188 | "execution_count": 8,
189 | "data": {
190 | "text/plain": [
191 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
192 | " normalize=False)"
193 | ]
194 | },
195 | "metadata": {}
196 | }
197 | ],
198 | "execution_count": 8,
199 | "metadata": {}
200 | },
201 | {
202 | "cell_type": "code",
203 | "source": [
204 | "lr.coef_"
205 | ],
206 | "outputs": [
207 | {
208 | "output_type": "execute_result",
209 | "execution_count": 9,
210 | "data": {
211 | "text/plain": [
212 | "array([0.10502452])"
213 | ]
214 | },
215 | "metadata": {}
216 | }
217 | ],
218 | "execution_count": 9,
219 | "metadata": {}
220 | },
221 | {
222 | "cell_type": "code",
223 | "source": [
224 | "lr.intercept_"
225 | ],
226 | "outputs": [
227 | {
228 | "output_type": "execute_result",
229 | "execution_count": 10,
230 | "data": {
231 | "text/plain": [
232 | "0.9202696135546731"
233 | ]
234 | },
235 | "metadata": {}
236 | }
237 | ],
238 | "execution_count": 10,
239 | "metadata": {}
240 | },
241 | {
242 | "cell_type": "code",
243 | "source": [
244 | "lr.fit(X=tips[['total_bill', 'size']], y=tips['tip'])"
245 | ],
246 | "outputs": [
247 | {
248 | "output_type": "execute_result",
249 | "execution_count": 11,
250 | "data": {
251 | "text/plain": [
252 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
253 | " normalize=False)"
254 | ]
255 | },
256 | "metadata": {}
257 | }
258 | ],
259 | "execution_count": 11,
260 | "metadata": {}
261 | },
262 | {
263 | "cell_type": "code",
264 | "source": [
265 | "lr.coef_"
266 | ],
267 | "outputs": [
268 | {
269 | "output_type": "execute_result",
270 | "execution_count": 12,
271 | "data": {
272 | "text/plain": [
273 | "array([0.09271334, 0.19259779])"
274 | ]
275 | },
276 | "metadata": {}
277 | }
278 | ],
279 | "execution_count": 12,
280 | "metadata": {}
281 | },
282 | {
283 | "cell_type": "code",
284 | "source": [
285 | "tips"
286 | ],
287 | "outputs": [
288 | {
289 | "output_type": "execute_result",
290 | "execution_count": 13,
291 | "data": {
292 | "text/plain": [
293 | " total_bill tip sex smoker day time size\n",
294 | "0 16.99 1.01 Female No Sun Dinner 2\n",
295 | "1 10.34 1.66 Male No Sun Dinner 3\n",
296 | "2 21.01 3.50 Male No Sun Dinner 3\n",
297 | "3 23.68 3.31 Male No Sun Dinner 2\n",
298 | "4 24.59 3.61 Female No Sun Dinner 4\n",
299 | "5 25.29 4.71 Male No Sun Dinner 4\n",
300 | "6 8.77 2.00 Male No Sun Dinner 2\n",
301 | "7 26.88 3.12 Male No Sun Dinner 4\n",
302 | "8 15.04 1.96 Male No Sun Dinner 2\n",
303 | "9 14.78 3.23 Male No Sun Dinner 2\n",
304 | "10 10.27 1.71 Male No Sun Dinner 2\n",
305 | "11 35.26 5.00 Female No Sun Dinner 4\n",
306 | "12 15.42 1.57 Male No Sun Dinner 2\n",
307 | "13 18.43 3.00 Male No Sun Dinner 4\n",
308 | "14 14.83 3.02 Female No Sun Dinner 2\n",
309 | "15 21.58 3.92 Male No Sun Dinner 2\n",
310 | "16 10.33 1.67 Female No Sun Dinner 3\n",
311 | "17 16.29 3.71 Male No Sun Dinner 3\n",
312 | "18 16.97 3.50 Female No Sun Dinner 3\n",
313 | "19 20.65 3.35 Male No Sat Dinner 3\n",
314 | "20 17.92 4.08 Male No Sat Dinner 2\n",
315 | "21 20.29 2.75 Female No Sat Dinner 2\n",
316 | "22 15.77 2.23 Female No Sat Dinner 2\n",
317 | "23 39.42 7.58 Male No Sat Dinner 4\n",
318 | "24 19.82 3.18 Male No Sat Dinner 2\n",
319 | "25 17.81 2.34 Male No Sat Dinner 4\n",
320 | "26 13.37 2.00 Male No Sat Dinner 2\n",
321 | "27 12.69 2.00 Male No Sat Dinner 2\n",
322 | "28 21.70 4.30 Male No Sat Dinner 2\n",
323 | "29 19.65 3.00 Female No Sat Dinner 2\n",
324 | ".. ... ... ... ... ... ... ...\n",
325 | "214 28.17 6.50 Female Yes Sat Dinner 3\n",
326 | "215 12.90 1.10 Female Yes Sat Dinner 2\n",
327 | "216 28.15 3.00 Male Yes Sat Dinner 5\n",
328 | "217 11.59 1.50 Male Yes Sat Dinner 2\n",
329 | "218 7.74 1.44 Male Yes Sat Dinner 2\n",
330 | "219 30.14 3.09 Female Yes Sat Dinner 4\n",
331 | "220 12.16 2.20 Male Yes Fri Lunch 2\n",
332 | "221 13.42 3.48 Female Yes Fri Lunch 2\n",
333 | "222 8.58 1.92 Male Yes Fri Lunch 1\n",
334 | "223 15.98 3.00 Female No Fri Lunch 3\n",
335 | "224 13.42 1.58 Male Yes Fri Lunch 2\n",
336 | "225 16.27 2.50 Female Yes Fri Lunch 2\n",
337 | "226 10.09 2.00 Female Yes Fri Lunch 2\n",
338 | "227 20.45 3.00 Male No Sat Dinner 4\n",
339 | "228 13.28 2.72 Male No Sat Dinner 2\n",
340 | "229 22.12 2.88 Female Yes Sat Dinner 2\n",
341 | "230 24.01 2.00 Male Yes Sat Dinner 4\n",
342 | "231 15.69 3.00 Male Yes Sat Dinner 3\n",
343 | "232 11.61 3.39 Male No Sat Dinner 2\n",
344 | "233 10.77 1.47 Male No Sat Dinner 2\n",
345 | "234 15.53 3.00 Male Yes Sat Dinner 2\n",
346 | "235 10.07 1.25 Male No Sat Dinner 2\n",
347 | "236 12.60 1.00 Male Yes Sat Dinner 2\n",
348 | "237 32.83 1.17 Male Yes Sat Dinner 2\n",
349 | "238 35.83 4.67 Female No Sat Dinner 3\n",
350 | "239 29.03 5.92 Male No Sat Dinner 3\n",
351 | "240 27.18 2.00 Female Yes Sat Dinner 2\n",
352 | "241 22.67 2.00 Male Yes Sat Dinner 2\n",
353 | "242 17.82 1.75 Male No Sat Dinner 2\n",
354 | "243 18.78 3.00 Female No Thur Dinner 2\n",
355 | "\n",
356 | "[244 rows x 7 columns]"
357 | ],
358 | "text/html": [
359 | "\n",
360 | "\n",
373 | "
\n",
374 | " \n",
375 | " \n",
376 | " | \n",
377 | " total_bill | \n",
378 | " tip | \n",
379 | " sex | \n",
380 | " smoker | \n",
381 | " day | \n",
382 | " time | \n",
383 | " size | \n",
384 | "
\n",
385 | " \n",
386 | " \n",
387 | " \n",
388 | " 0 | \n",
389 | " 16.99 | \n",
390 | " 1.01 | \n",
391 | " Female | \n",
392 | " No | \n",
393 | " Sun | \n",
394 | " Dinner | \n",
395 | " 2 | \n",
396 | "
\n",
397 | " \n",
398 | " 1 | \n",
399 | " 10.34 | \n",
400 | " 1.66 | \n",
401 | " Male | \n",
402 | " No | \n",
403 | " Sun | \n",
404 | " Dinner | \n",
405 | " 3 | \n",
406 | "
\n",
407 | " \n",
408 | " 2 | \n",
409 | " 21.01 | \n",
410 | " 3.50 | \n",
411 | " Male | \n",
412 | " No | \n",
413 | " Sun | \n",
414 | " Dinner | \n",
415 | " 3 | \n",
416 | "
\n",
417 | " \n",
418 | " 3 | \n",
419 | " 23.68 | \n",
420 | " 3.31 | \n",
421 | " Male | \n",
422 | " No | \n",
423 | " Sun | \n",
424 | " Dinner | \n",
425 | " 2 | \n",
426 | "
\n",
427 | " \n",
428 | " 4 | \n",
429 | " 24.59 | \n",
430 | " 3.61 | \n",
431 | " Female | \n",
432 | " No | \n",
433 | " Sun | \n",
434 | " Dinner | \n",
435 | " 4 | \n",
436 | "
\n",
437 | " \n",
438 | " 5 | \n",
439 | " 25.29 | \n",
440 | " 4.71 | \n",
441 | " Male | \n",
442 | " No | \n",
443 | " Sun | \n",
444 | " Dinner | \n",
445 | " 4 | \n",
446 | "
\n",
447 | " \n",
448 | " 6 | \n",
449 | " 8.77 | \n",
450 | " 2.00 | \n",
451 | " Male | \n",
452 | " No | \n",
453 | " Sun | \n",
454 | " Dinner | \n",
455 | " 2 | \n",
456 | "
\n",
457 | " \n",
458 | " 7 | \n",
459 | " 26.88 | \n",
460 | " 3.12 | \n",
461 | " Male | \n",
462 | " No | \n",
463 | " Sun | \n",
464 | " Dinner | \n",
465 | " 4 | \n",
466 | "
\n",
467 | " \n",
468 | " 8 | \n",
469 | " 15.04 | \n",
470 | " 1.96 | \n",
471 | " Male | \n",
472 | " No | \n",
473 | " Sun | \n",
474 | " Dinner | \n",
475 | " 2 | \n",
476 | "
\n",
477 | " \n",
478 | " 9 | \n",
479 | " 14.78 | \n",
480 | " 3.23 | \n",
481 | " Male | \n",
482 | " No | \n",
483 | " Sun | \n",
484 | " Dinner | \n",
485 | " 2 | \n",
486 | "
\n",
487 | " \n",
488 | " 10 | \n",
489 | " 10.27 | \n",
490 | " 1.71 | \n",
491 | " Male | \n",
492 | " No | \n",
493 | " Sun | \n",
494 | " Dinner | \n",
495 | " 2 | \n",
496 | "
\n",
497 | " \n",
498 | " 11 | \n",
499 | " 35.26 | \n",
500 | " 5.00 | \n",
501 | " Female | \n",
502 | " No | \n",
503 | " Sun | \n",
504 | " Dinner | \n",
505 | " 4 | \n",
506 | "
\n",
507 | " \n",
508 | " 12 | \n",
509 | " 15.42 | \n",
510 | " 1.57 | \n",
511 | " Male | \n",
512 | " No | \n",
513 | " Sun | \n",
514 | " Dinner | \n",
515 | " 2 | \n",
516 | "
\n",
517 | " \n",
518 | " 13 | \n",
519 | " 18.43 | \n",
520 | " 3.00 | \n",
521 | " Male | \n",
522 | " No | \n",
523 | " Sun | \n",
524 | " Dinner | \n",
525 | " 4 | \n",
526 | "
\n",
527 | " \n",
528 | " 14 | \n",
529 | " 14.83 | \n",
530 | " 3.02 | \n",
531 | " Female | \n",
532 | " No | \n",
533 | " Sun | \n",
534 | " Dinner | \n",
535 | " 2 | \n",
536 | "
\n",
537 | " \n",
538 | " 15 | \n",
539 | " 21.58 | \n",
540 | " 3.92 | \n",
541 | " Male | \n",
542 | " No | \n",
543 | " Sun | \n",
544 | " Dinner | \n",
545 | " 2 | \n",
546 | "
\n",
547 | " \n",
548 | " 16 | \n",
549 | " 10.33 | \n",
550 | " 1.67 | \n",
551 | " Female | \n",
552 | " No | \n",
553 | " Sun | \n",
554 | " Dinner | \n",
555 | " 3 | \n",
556 | "
\n",
557 | " \n",
558 | " 17 | \n",
559 | " 16.29 | \n",
560 | " 3.71 | \n",
561 | " Male | \n",
562 | " No | \n",
563 | " Sun | \n",
564 | " Dinner | \n",
565 | " 3 | \n",
566 | "
\n",
567 | " \n",
568 | " 18 | \n",
569 | " 16.97 | \n",
570 | " 3.50 | \n",
571 | " Female | \n",
572 | " No | \n",
573 | " Sun | \n",
574 | " Dinner | \n",
575 | " 3 | \n",
576 | "
\n",
577 | " \n",
578 | " 19 | \n",
579 | " 20.65 | \n",
580 | " 3.35 | \n",
581 | " Male | \n",
582 | " No | \n",
583 | " Sat | \n",
584 | " Dinner | \n",
585 | " 3 | \n",
586 | "
\n",
587 | " \n",
588 | " 20 | \n",
589 | " 17.92 | \n",
590 | " 4.08 | \n",
591 | " Male | \n",
592 | " No | \n",
593 | " Sat | \n",
594 | " Dinner | \n",
595 | " 2 | \n",
596 | "
\n",
597 | " \n",
598 | " 21 | \n",
599 | " 20.29 | \n",
600 | " 2.75 | \n",
601 | " Female | \n",
602 | " No | \n",
603 | " Sat | \n",
604 | " Dinner | \n",
605 | " 2 | \n",
606 | "
\n",
607 | " \n",
608 | " 22 | \n",
609 | " 15.77 | \n",
610 | " 2.23 | \n",
611 | " Female | \n",
612 | " No | \n",
613 | " Sat | \n",
614 | " Dinner | \n",
615 | " 2 | \n",
616 | "
\n",
617 | " \n",
618 | " 23 | \n",
619 | " 39.42 | \n",
620 | " 7.58 | \n",
621 | " Male | \n",
622 | " No | \n",
623 | " Sat | \n",
624 | " Dinner | \n",
625 | " 4 | \n",
626 | "
\n",
627 | " \n",
628 | " 24 | \n",
629 | " 19.82 | \n",
630 | " 3.18 | \n",
631 | " Male | \n",
632 | " No | \n",
633 | " Sat | \n",
634 | " Dinner | \n",
635 | " 2 | \n",
636 | "
\n",
637 | " \n",
638 | " 25 | \n",
639 | " 17.81 | \n",
640 | " 2.34 | \n",
641 | " Male | \n",
642 | " No | \n",
643 | " Sat | \n",
644 | " Dinner | \n",
645 | " 4 | \n",
646 | "
\n",
647 | " \n",
648 | " 26 | \n",
649 | " 13.37 | \n",
650 | " 2.00 | \n",
651 | " Male | \n",
652 | " No | \n",
653 | " Sat | \n",
654 | " Dinner | \n",
655 | " 2 | \n",
656 | "
\n",
657 | " \n",
658 | " 27 | \n",
659 | " 12.69 | \n",
660 | " 2.00 | \n",
661 | " Male | \n",
662 | " No | \n",
663 | " Sat | \n",
664 | " Dinner | \n",
665 | " 2 | \n",
666 | "
\n",
667 | " \n",
668 | " 28 | \n",
669 | " 21.70 | \n",
670 | " 4.30 | \n",
671 | " Male | \n",
672 | " No | \n",
673 | " Sat | \n",
674 | " Dinner | \n",
675 | " 2 | \n",
676 | "
\n",
677 | " \n",
678 | " 29 | \n",
679 | " 19.65 | \n",
680 | " 3.00 | \n",
681 | " Female | \n",
682 | " No | \n",
683 | " Sat | \n",
684 | " Dinner | \n",
685 | " 2 | \n",
686 | "
\n",
687 | " \n",
688 | " ... | \n",
689 | " ... | \n",
690 | " ... | \n",
691 | " ... | \n",
692 | " ... | \n",
693 | " ... | \n",
694 | " ... | \n",
695 | " ... | \n",
696 | "
\n",
697 | " \n",
698 | " 214 | \n",
699 | " 28.17 | \n",
700 | " 6.50 | \n",
701 | " Female | \n",
702 | " Yes | \n",
703 | " Sat | \n",
704 | " Dinner | \n",
705 | " 3 | \n",
706 | "
\n",
707 | " \n",
708 | " 215 | \n",
709 | " 12.90 | \n",
710 | " 1.10 | \n",
711 | " Female | \n",
712 | " Yes | \n",
713 | " Sat | \n",
714 | " Dinner | \n",
715 | " 2 | \n",
716 | "
\n",
717 | " \n",
718 | " 216 | \n",
719 | " 28.15 | \n",
720 | " 3.00 | \n",
721 | " Male | \n",
722 | " Yes | \n",
723 | " Sat | \n",
724 | " Dinner | \n",
725 | " 5 | \n",
726 | "
\n",
727 | " \n",
728 | " 217 | \n",
729 | " 11.59 | \n",
730 | " 1.50 | \n",
731 | " Male | \n",
732 | " Yes | \n",
733 | " Sat | \n",
734 | " Dinner | \n",
735 | " 2 | \n",
736 | "
\n",
737 | " \n",
738 | " 218 | \n",
739 | " 7.74 | \n",
740 | " 1.44 | \n",
741 | " Male | \n",
742 | " Yes | \n",
743 | " Sat | \n",
744 | " Dinner | \n",
745 | " 2 | \n",
746 | "
\n",
747 | " \n",
748 | " 219 | \n",
749 | " 30.14 | \n",
750 | " 3.09 | \n",
751 | " Female | \n",
752 | " Yes | \n",
753 | " Sat | \n",
754 | " Dinner | \n",
755 | " 4 | \n",
756 | "
\n",
757 | " \n",
758 | " 220 | \n",
759 | " 12.16 | \n",
760 | " 2.20 | \n",
761 | " Male | \n",
762 | " Yes | \n",
763 | " Fri | \n",
764 | " Lunch | \n",
765 | " 2 | \n",
766 | "
\n",
767 | " \n",
768 | " 221 | \n",
769 | " 13.42 | \n",
770 | " 3.48 | \n",
771 | " Female | \n",
772 | " Yes | \n",
773 | " Fri | \n",
774 | " Lunch | \n",
775 | " 2 | \n",
776 | "
\n",
777 | " \n",
778 | " 222 | \n",
779 | " 8.58 | \n",
780 | " 1.92 | \n",
781 | " Male | \n",
782 | " Yes | \n",
783 | " Fri | \n",
784 | " Lunch | \n",
785 | " 1 | \n",
786 | "
\n",
787 | " \n",
788 | " 223 | \n",
789 | " 15.98 | \n",
790 | " 3.00 | \n",
791 | " Female | \n",
792 | " No | \n",
793 | " Fri | \n",
794 | " Lunch | \n",
795 | " 3 | \n",
796 | "
\n",
797 | " \n",
798 | " 224 | \n",
799 | " 13.42 | \n",
800 | " 1.58 | \n",
801 | " Male | \n",
802 | " Yes | \n",
803 | " Fri | \n",
804 | " Lunch | \n",
805 | " 2 | \n",
806 | "
\n",
807 | " \n",
808 | " 225 | \n",
809 | " 16.27 | \n",
810 | " 2.50 | \n",
811 | " Female | \n",
812 | " Yes | \n",
813 | " Fri | \n",
814 | " Lunch | \n",
815 | " 2 | \n",
816 | "
\n",
817 | " \n",
818 | " 226 | \n",
819 | " 10.09 | \n",
820 | " 2.00 | \n",
821 | " Female | \n",
822 | " Yes | \n",
823 | " Fri | \n",
824 | " Lunch | \n",
825 | " 2 | \n",
826 | "
\n",
827 | " \n",
828 | " 227 | \n",
829 | " 20.45 | \n",
830 | " 3.00 | \n",
831 | " Male | \n",
832 | " No | \n",
833 | " Sat | \n",
834 | " Dinner | \n",
835 | " 4 | \n",
836 | "
\n",
837 | " \n",
838 | " 228 | \n",
839 | " 13.28 | \n",
840 | " 2.72 | \n",
841 | " Male | \n",
842 | " No | \n",
843 | " Sat | \n",
844 | " Dinner | \n",
845 | " 2 | \n",
846 | "
\n",
847 | " \n",
848 | " 229 | \n",
849 | " 22.12 | \n",
850 | " 2.88 | \n",
851 | " Female | \n",
852 | " Yes | \n",
853 | " Sat | \n",
854 | " Dinner | \n",
855 | " 2 | \n",
856 | "
\n",
857 | " \n",
858 | " 230 | \n",
859 | " 24.01 | \n",
860 | " 2.00 | \n",
861 | " Male | \n",
862 | " Yes | \n",
863 | " Sat | \n",
864 | " Dinner | \n",
865 | " 4 | \n",
866 | "
\n",
867 | " \n",
868 | " 231 | \n",
869 | " 15.69 | \n",
870 | " 3.00 | \n",
871 | " Male | \n",
872 | " Yes | \n",
873 | " Sat | \n",
874 | " Dinner | \n",
875 | " 3 | \n",
876 | "
\n",
877 | " \n",
878 | " 232 | \n",
879 | " 11.61 | \n",
880 | " 3.39 | \n",
881 | " Male | \n",
882 | " No | \n",
883 | " Sat | \n",
884 | " Dinner | \n",
885 | " 2 | \n",
886 | "
\n",
887 | " \n",
888 | " 233 | \n",
889 | " 10.77 | \n",
890 | " 1.47 | \n",
891 | " Male | \n",
892 | " No | \n",
893 | " Sat | \n",
894 | " Dinner | \n",
895 | " 2 | \n",
896 | "
\n",
897 | " \n",
898 | " 234 | \n",
899 | " 15.53 | \n",
900 | " 3.00 | \n",
901 | " Male | \n",
902 | " Yes | \n",
903 | " Sat | \n",
904 | " Dinner | \n",
905 | " 2 | \n",
906 | "
\n",
907 | " \n",
908 | " 235 | \n",
909 | " 10.07 | \n",
910 | " 1.25 | \n",
911 | " Male | \n",
912 | " No | \n",
913 | " Sat | \n",
914 | " Dinner | \n",
915 | " 2 | \n",
916 | "
\n",
917 | " \n",
918 | " 236 | \n",
919 | " 12.60 | \n",
920 | " 1.00 | \n",
921 | " Male | \n",
922 | " Yes | \n",
923 | " Sat | \n",
924 | " Dinner | \n",
925 | " 2 | \n",
926 | "
\n",
927 | " \n",
928 | " 237 | \n",
929 | " 32.83 | \n",
930 | " 1.17 | \n",
931 | " Male | \n",
932 | " Yes | \n",
933 | " Sat | \n",
934 | " Dinner | \n",
935 | " 2 | \n",
936 | "
\n",
937 | " \n",
938 | " 238 | \n",
939 | " 35.83 | \n",
940 | " 4.67 | \n",
941 | " Female | \n",
942 | " No | \n",
943 | " Sat | \n",
944 | " Dinner | \n",
945 | " 3 | \n",
946 | "
\n",
947 | " \n",
948 | " 239 | \n",
949 | " 29.03 | \n",
950 | " 5.92 | \n",
951 | " Male | \n",
952 | " No | \n",
953 | " Sat | \n",
954 | " Dinner | \n",
955 | " 3 | \n",
956 | "
\n",
957 | " \n",
958 | " 240 | \n",
959 | " 27.18 | \n",
960 | " 2.00 | \n",
961 | " Female | \n",
962 | " Yes | \n",
963 | " Sat | \n",
964 | " Dinner | \n",
965 | " 2 | \n",
966 | "
\n",
967 | " \n",
968 | " 241 | \n",
969 | " 22.67 | \n",
970 | " 2.00 | \n",
971 | " Male | \n",
972 | " Yes | \n",
973 | " Sat | \n",
974 | " Dinner | \n",
975 | " 2 | \n",
976 | "
\n",
977 | " \n",
978 | " 242 | \n",
979 | " 17.82 | \n",
980 | " 1.75 | \n",
981 | " Male | \n",
982 | " No | \n",
983 | " Sat | \n",
984 | " Dinner | \n",
985 | " 2 | \n",
986 | "
\n",
987 | " \n",
988 | " 243 | \n",
989 | " 18.78 | \n",
990 | " 3.00 | \n",
991 | " Female | \n",
992 | " No | \n",
993 | " Thur | \n",
994 | " Dinner | \n",
995 | " 2 | \n",
996 | "
\n",
997 | " \n",
998 | "
\n",
999 | "
244 rows × 7 columns
\n",
1000 | "
"
1001 | ]
1002 | },
1003 | "metadata": {}
1004 | }
1005 | ],
1006 | "execution_count": 13,
1007 | "metadata": {}
1008 | },
1009 | {
1010 | "cell_type": "code",
1011 | "source": [
1012 | "# dummy encoding\n",
1013 | "# one-hot encoding"
1014 | ],
1015 | "outputs": [],
1016 | "execution_count": 14,
1017 | "metadata": {}
1018 | },
1019 | {
1020 | "cell_type": "code",
1021 | "source": [
1022 | "lr.fit(X=tips[['total_bill', 'sex']], y=tips['tip'])"
1023 | ],
1024 | "outputs": [
1025 | {
1026 | "output_type": "error",
1027 | "ename": "ValueError",
1028 | "evalue": "could not convert string to float: 'Female'",
1029 | "traceback": [
1030 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1031 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
1032 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_bill'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sex'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tip'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1033 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 458\u001b[0;31m y_numeric=True, multi_output=True)\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1034 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 754\u001b[0m \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 756\u001b[0;31m estimator=estimator)\n\u001b[0m\u001b[1;32m 757\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 758\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
1035 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0;31m# make sure we actually converted to numeric:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdtype_numeric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"O\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 567\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 568\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mallow_nd\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 569\u001b[0m raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n",
1036 | "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Female'"
1037 | ]
1038 | }
1039 | ],
1040 | "execution_count": 15,
1041 | "metadata": {}
1042 | },
1043 | {
1044 | "cell_type": "code",
1045 | "source": [
1046 | "tips_dummy = tips[['tip', 'total_bill', 'sex']]"
1047 | ],
1048 | "outputs": [],
1049 | "execution_count": 16,
1050 | "metadata": {}
1051 | },
1052 | {
1053 | "cell_type": "code",
1054 | "source": [
1055 | "tips_dummy.head()"
1056 | ],
1057 | "outputs": [
1058 | {
1059 | "output_type": "execute_result",
1060 | "execution_count": 17,
1061 | "data": {
1062 | "text/plain": [
1063 | " tip total_bill sex\n",
1064 | "0 1.01 16.99 Female\n",
1065 | "1 1.66 10.34 Male\n",
1066 | "2 3.50 21.01 Male\n",
1067 | "3 3.31 23.68 Male\n",
1068 | "4 3.61 24.59 Female"
1069 | ],
1070 | "text/html": [
1071 | "\n",
1072 | "\n",
1085 | "
\n",
1086 | " \n",
1087 | " \n",
1088 | " | \n",
1089 | " tip | \n",
1090 | " total_bill | \n",
1091 | " sex | \n",
1092 | "
\n",
1093 | " \n",
1094 | " \n",
1095 | " \n",
1096 | " 0 | \n",
1097 | " 1.01 | \n",
1098 | " 16.99 | \n",
1099 | " Female | \n",
1100 | "
\n",
1101 | " \n",
1102 | " 1 | \n",
1103 | " 1.66 | \n",
1104 | " 10.34 | \n",
1105 | " Male | \n",
1106 | "
\n",
1107 | " \n",
1108 | " 2 | \n",
1109 | " 3.50 | \n",
1110 | " 21.01 | \n",
1111 | " Male | \n",
1112 | "
\n",
1113 | " \n",
1114 | " 3 | \n",
1115 | " 3.31 | \n",
1116 | " 23.68 | \n",
1117 | " Male | \n",
1118 | "
\n",
1119 | " \n",
1120 | " 4 | \n",
1121 | " 3.61 | \n",
1122 | " 24.59 | \n",
1123 | " Female | \n",
1124 | "
\n",
1125 | " \n",
1126 | "
\n",
1127 | "
"
1128 | ]
1129 | },
1130 | "metadata": {}
1131 | }
1132 | ],
1133 | "execution_count": 17,
1134 | "metadata": {}
1135 | },
1136 | {
1137 | "cell_type": "code",
1138 | "source": [
1139 | "tips_dummy = pd.get_dummies(tips_dummy, drop_first=True)"
1140 | ],
1141 | "outputs": [],
1142 | "execution_count": 18,
1143 | "metadata": {}
1144 | },
1145 | {
1146 | "cell_type": "code",
1147 | "source": [
1148 | "tips_dummy.head()"
1149 | ],
1150 | "outputs": [
1151 | {
1152 | "output_type": "execute_result",
1153 | "execution_count": 19,
1154 | "data": {
1155 | "text/plain": [
1156 | " tip total_bill sex_Female\n",
1157 | "0 1.01 16.99 1\n",
1158 | "1 1.66 10.34 0\n",
1159 | "2 3.50 21.01 0\n",
1160 | "3 3.31 23.68 0\n",
1161 | "4 3.61 24.59 1"
1162 | ],
1163 | "text/html": [
1164 | "\n",
1165 | "\n",
1178 | "
\n",
1179 | " \n",
1180 | " \n",
1181 | " | \n",
1182 | " tip | \n",
1183 | " total_bill | \n",
1184 | " sex_Female | \n",
1185 | "
\n",
1186 | " \n",
1187 | " \n",
1188 | " \n",
1189 | " 0 | \n",
1190 | " 1.01 | \n",
1191 | " 16.99 | \n",
1192 | " 1 | \n",
1193 | "
\n",
1194 | " \n",
1195 | " 1 | \n",
1196 | " 1.66 | \n",
1197 | " 10.34 | \n",
1198 | " 0 | \n",
1199 | "
\n",
1200 | " \n",
1201 | " 2 | \n",
1202 | " 3.50 | \n",
1203 | " 21.01 | \n",
1204 | " 0 | \n",
1205 | "
\n",
1206 | " \n",
1207 | " 3 | \n",
1208 | " 3.31 | \n",
1209 | " 23.68 | \n",
1210 | " 0 | \n",
1211 | "
\n",
1212 | " \n",
1213 | " 4 | \n",
1214 | " 3.61 | \n",
1215 | " 24.59 | \n",
1216 | " 1 | \n",
1217 | "
\n",
1218 | " \n",
1219 | "
\n",
1220 | "
"
1221 | ]
1222 | },
1223 | "metadata": {}
1224 | }
1225 | ],
1226 | "execution_count": 19,
1227 | "metadata": {}
1228 | },
1229 | {
1230 | "cell_type": "code",
1231 | "source": [
1232 | "lr.fit(X=tips_dummy.iloc[:, 1:], y=tips_dummy.iloc[:, 0])"
1233 | ],
1234 | "outputs": [
1235 | {
1236 | "output_type": "execute_result",
1237 | "execution_count": 20,
1238 | "data": {
1239 | "text/plain": [
1240 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
1241 | " normalize=False)"
1242 | ]
1243 | },
1244 | "metadata": {}
1245 | }
1246 | ],
1247 | "execution_count": 20,
1248 | "metadata": {}
1249 | },
1250 | {
1251 | "cell_type": "code",
1252 | "source": [
1253 | "tip_money = lr.coef_[0]"
1254 | ],
1255 | "outputs": [],
1256 | "execution_count": 21,
1257 | "metadata": {}
1258 | },
1259 | {
1260 | "cell_type": "code",
1261 | "source": [
1262 | "tip_money"
1263 | ],
1264 | "outputs": [
1265 | {
1266 | "output_type": "execute_result",
1267 | "execution_count": 22,
1268 | "data": {
1269 | "text/plain": [
1270 | "0.10523235686615456"
1271 | ]
1272 | },
1273 | "metadata": {}
1274 | }
1275 | ],
1276 | "execution_count": 22,
1277 | "metadata": {}
1278 | },
1279 | {
1280 | "cell_type": "code",
1281 | "source": [],
1282 | "outputs": [],
1283 | "execution_count": 24,
1284 | "metadata": {}
1285 | }
1286 | ],
1287 | "metadata": {
1288 | "kernelspec": {
1289 | "name": "python3",
1290 | "language": "python",
1291 | "display_name": "Python 3"
1292 | },
1293 | "language_info": {
1294 | "name": "python",
1295 | "version": "3.7.3",
1296 | "mimetype": "text/x-python",
1297 | "codemirror_mode": {
1298 | "name": "ipython",
1299 | "version": 3
1300 | },
1301 | "pygments_lexer": "ipython3",
1302 | "nbconvert_exporter": "python",
1303 | "file_extension": ".py"
1304 | },
1305 | "kernel_info": {
1306 | "name": "python3"
1307 | },
1308 | "nteract": {
1309 | "version": "0.14.3"
1310 | }
1311 | },
1312 | "nbformat": 4,
1313 | "nbformat_minor": 2
1314 | }
--------------------------------------------------------------------------------
/test_installation.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 | import sklearn as sk
4 |
--------------------------------------------------------------------------------