├── .gitignore ├── LICENSE ├── README.md ├── data.zip ├── data ├── billboard.csv ├── country_timeseries.csv ├── gapminder.tsv ├── pew.csv ├── table1.csv ├── table2.csv ├── table3.csv ├── table4a.csv ├── table4b.csv └── weather.csv ├── exercises └── exercises.ipynb ├── notebooks ├── .gitkeep ├── 01-intro.ipynb ├── 02-tidy.ipynb ├── 03-apply.ipynb ├── 04-plots.ipynb └── 05-model.ipynb ├── notes ├── 01-intro.ipynb ├── 02-tidy.ipynb ├── 03-apply.ipynb ├── 04-plots.ipynb └── 05-models.ipynb └── test_installation.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Daniel Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scipy-2019-pandas 2 | Pandas tutorial for SciPy 2019 3 | 4 | 5 | # Installation 6 | 7 | 1. Install anaconda (use the Python 3 version): https://www.anaconda.com/distribution/ 8 | 2. See the Software-Carpentry Installations for `bash`, `git`, `python`, and `text editor`: https://carpentries.github.io/workshop-template/ 9 | 10 | # Testing your installation 11 | 12 | 1. Run the `test_installation.py` script (or copy/paste the import statments into a python interpreter) 13 | 14 | ## How to run the Jupyter Notebook 15 | 16 | #### Windows/Mac 17 | 18 | There will be an [Anaconda Navigator](https://docs.continuum.io/anaconda/navigator/) application that installs to your system. 19 | You can launch the Jupyter notebook from there to run your python code. 20 | 21 | #### Linux 22 | 23 | Anaconda's Python installation should be your system's default python. 24 | Make sure you open a new terminal window for this to take effect. 25 | You can launch python by typing `jupyter notebook` 26 | 27 | ## Creating a Notebook 28 | 29 | Once you have the Jupyter notebook launched, there's a button towards the top right called `new`. 30 | Click this and select `Python 3`. 31 | 32 | # Get Data 33 | 34 | 1. Download or Clone the this repository. 35 | - Press the green button towards the top right 36 | - click download zip 37 | - extract 38 | - celebrate 39 | -------------------------------------------------------------------------------- /data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chendaniely/scipy-2019-pandas/7e515b7561fd5076ee462dd75af9488beadf6148/data.zip -------------------------------------------------------------------------------- /data/country_timeseries.csv: -------------------------------------------------------------------------------- 1 | Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali 2 | 1/5/2015,289,2776,,10030,,,,,,1786,,2977,,,,, 3 | 1/4/2015,288,2775,,9780,,,,,,1781,,2943,,,,, 4 | 1/3/2015,287,2769,8166,9722,,,,,,1767,3496,2915,,,,, 5 | 1/2/2015,286,,8157,,,,,,,,3496,,,,,, 6 | 12/31/2014,284,2730,8115,9633,,,,,,1739,3471,2827,,,,, 7 | 12/28/2014,281,2706,8018,9446,,,,,,1708,3423,2758,,,,, 8 | 12/27/2014,280,2695,,9409,,,,,,1697,,2732,,,,, 9 | 12/24/2014,277,2630,7977,9203,,,,,,,3413,2655,,,,, 10 | 12/21/2014,273,2597,,9004,,,,,,1607,,2582,,,,, 11 | 12/20/2014,272,2571,7862,8939,,,,,,1586,3384,2556,,,,, 12 | 12/18/2014,271,,7830,,,,,,,,3376,,,,,, 13 | 12/14/2014,267,2416,,8356,,,,,,1525,,2085,,,,, 14 | 12/9/2014,262,,7797,,,,,,,,3290,,,,,, 15 | 12/7/2014,260,2292,,7897,20,1,4,1,7,1428,,1768,8,0,1,0,6 16 | 12/3/2014,256,,7719,,,,,,,,3177,,,,,, 17 | 11/30/2014,253,2164,,7312,20,1,4,1,7,1327,,1583,8,0,1,0,6 18 | 11/28/2014,251,,7635,,,,,,,,3145,,,,,, 19 | 11/23/2014,246,2134,,6599,20,1,4,1,7,1260,,1398,8,0,1,0,6 20 | 11/22/2014,245,,7168,,,,,,,,3016,,,,,, 21 | 11/18/2014,241,2047,7082,6190,20,1,4,1,6,1214,2963,1267,8,0,1,0,6 22 | 11/16/2014,239,1971,,6073,20,1,4,1,5,1192,,1250,8,0,1,0,5 23 | 11/15/2014,238,,7069,,,,,,,,2964,,,,,, 24 | 11/11/2014,234,1919,,5586,20,1,4,1,4,1166,,1187,8,0,1,0,3 25 | 11/10/2014,233,,6878,,,,,,,,2812,,,,,, 26 | 11/9/2014,232,1878,,5368,20,1,4,1,1,1142,,1169,8,0,1,0,1 27 | 11/8/2014,231,,6822,,,,,,,,2836,,,,,, 28 | 11/4/2014,227,,6619,4862,20,1,4,1,1,,2766,1130,8,0,1,0,1 29 | 11/3/2014,226,1760,,,,,,,,1054,,,,,,, 30 | 11/2/2014,225,1731,,4759,20,1,4,1,1,1041,,1070,8,0,1,0,1 31 | 10/31/2014,222,,6525,,,,,,,,2697,,,,,, 32 | 10/29/2014,220,1667,,5338,20,1,4,1,1,1018,,1510,8,0,1,0,1 33 | 10/27/2014,218,1906,,5235,20,1,4,1,1,997,,1500,8,0,1,0,1 34 | 10/25/2014,216,,6535,,,,,,,,2413,,,,,, 35 | 10/22/2014,214,,,3896,,,4,1,1,,,1281,,,1,0,1 36 | 10/21/2014,213,1553,,,,,,,,926,,,,,,, 37 | 10/19/2014,211,1540,,3706,20,1,3,1,,904,,1259,8,0,1,0, 38 | 10/18/2014,210,,4665,,,,,,,,2705,,,,,, 39 | 10/14/2014,206,1519,,3410,20,1,3,1,,862,,1200,8,0,0,1, 40 | 10/13/2014,205,,4262,,,,,,,,2484,,,,,, 41 | 10/12/2014,204,1472,,3252,20,1,2,1,,843,,1183,8,0,1,1, 42 | 10/11/2014,203,,4249,,,,,,,,2458,,,,,, 43 | 10/8/2014,200,,,2950,20,1,1,1,,,,930,8,0,1,1, 44 | 10/7/2014,199,1350,4076,,,,,,,778,2316,,,,,, 45 | 10/5/2014,197,1298,,2789,20,1,1,,,768,,879,8,0,0,, 46 | 10/4/2014,196,,3924,,,,,,,,2210,,,,,, 47 | 10/1/2014,193,1199,3834,2437,20,1,1,,,739,2069,623,8,0,0,, 48 | 9/28/2014,190,1157,3696,2304,20,1,,,,710,1998,622,8,0,,, 49 | 9/23/2014,185,1074,3458,2021,20,1,,,,648,1830,605,8,0,,, 50 | 9/21/2014,183,1022,3280,1940,20,1,,,,635,1677,597,8,0,,, 51 | 9/20/2014,182,,,1813,,,,,,,,593,,,,, 52 | 9/19/2014,181,1008,,,,,,,,632,,,,,,, 53 | 9/17/2014,179,,3022,,,,,,,,1578,,,,,, 54 | 9/14/2014,176,942,2710,1673,,,,,,601,1459,562,,,,, 55 | 9/13/2014,175,936,,1620,21,1,,,,595,1296,562,8,0,,, 56 | 9/10/2014,172,899,,1478,21,1,,,,568,,536,8,,,, 57 | 9/9/2014,171,,2407,,,,,,,,,,,,,, 58 | 9/7/2014,169,861,2081,1424,21,3,,,,557,1137,524,8,0,,, 59 | 9/5/2014,167,812,1871,1261,22,1,,,,517,1089,491,8,,,, 60 | 8/31/2014,162,771,1698,1216,21,1,,,,494,871,476,7,,,, 61 | 8/26/2014,157,648,1378,1026,17,,,,,430,694,422,6,,,, 62 | 8/20/2014,151,607,1082,910,16,,,,,406,624,392,5,,,, 63 | 8/18/2014,149,579,972,907,15,,,,,396,576,374,4,,,, 64 | 8/16/2014,147,543,834,848,15,,,,,394,466,365,4,,,, 65 | 8/13/2014,144,519,786,810,12,,,,,380,413,348,4,,,, 66 | 8/11/2014,142,510,670,783,12,,,,,377,355,334,3,,,, 67 | 8/9/2014,140,506,599,730,13,,,,,373,323,315,2,,,, 68 | 8/6/2014,137,495,554,717,13,,,,,367,294,298,2,,,, 69 | 8/4/2014,135,495,516,691,9,,,,,363,282,286,1,,,, 70 | 8/1/2014,132,485,468,646,4,,,,,358,255,273,1,,,, 71 | 7/30/2014,129,472,391,574,3,,,,,346,227,252,1,,,, 72 | 7/27/2014,126,460,329,533,1,,,,,339,156,233,1,,,, 73 | 7/23/2014,123,427,249,525,0,,,,,319,129,224,0,,,, 74 | 7/20/2014,120,415,224,454,,,,,,314,127,219,,,,, 75 | 7/17/2014,117,410,196,442,,,,,,310,116,206,,,,, 76 | 7/14/2014,114,411,174,397,,,,,,310,106,197,,,,, 77 | 7/12/2014,112,406,172,386,,,,,,304,105,194,,,,, 78 | 7/8/2014,108,409,142,337,,,,,,309,88,142,,,,, 79 | 7/6/2014,106,408,131,305,,,,,,307,84,127,,,,, 80 | 7/2/2014,102,412,115,252,,,,,,305,75,101,,,,, 81 | 6/30/2014,100,413,107,239,,,,,,303,65,99,,,,, 82 | 6/22/2014,92,,51,,,,,,,,34,,,,,, 83 | 6/20/2014,90,390,,158,,,,,,270,,34,,,,, 84 | 6/19/2014,89,,41,,,,,,,,25,,,,,, 85 | 6/18/2014,88,390,,136,,,,,,267,,28,,,,, 86 | 6/17/2014,87,,,97,,,,,,,,49,,,,, 87 | 6/16/2014,86,398,33,,,,,,,264,24,,,,,, 88 | 6/10/2014,80,351,13,89,,,,,,226,24,7,,,,, 89 | 6/5/2014,75,,13,81,,,,,,,,6,,,,, 90 | 6/3/2014,73,344,13,,,,,,,215,12,6,,,,, 91 | 6/1/2014,71,328,13,79,,,,,,208,12,6,,,,, 92 | 5/28/2014,67,291,13,50,,,,,,193,12,6,,,,, 93 | 5/27/2014,66,281,12,16,,,,,,186,11,5,,,,, 94 | 5/23/2014,62,258,12,0,,,,,,174,11,0,,,,, 95 | 5/12/2014,51,248,12,0,,,,,,171,11,0,,,,, 96 | 5/10/2014,49,233,12,0,,,,,,157,11,0,,,,, 97 | 5/7/2014,46,236,13,0,,,,,,158,11,0,,,,, 98 | 5/5/2014,44,235,13,0,,,,,,157,11,0,,,,, 99 | 5/3/2014,42,231,13,0,,,,,,155,11,0,,,,, 100 | 5/1/2014,40,226,13,0,,,,,,149,11,0,,,,, 101 | 4/26/2014,35,224,,0,,,,,,143,,0,,,,, 102 | 4/24/2014,33,,35,0,,,,,,,,0,,,,, 103 | 4/23/2014,32,218,,0,,,,,,141,,0,,,,, 104 | 4/22/2014,31,,,0,,,,,,,,0,,,,, 105 | 4/21/2014,30,,34,,,,,,,,11,,,,,, 106 | 4/20/2014,29,208,,,,,,,,136,6,,,,,, 107 | 4/17/2014,26,203,27,,,,,,,129,,,,,,, 108 | 4/16/2014,25,197,27,,,,,,,122,13,,,,,, 109 | 4/15/2014,24,,,12,,,,,,,,,,,,, 110 | 4/14/2014,23,168,,,,,,,,108,,,,,,, 111 | 4/11/2014,20,159,26,2,,,,,,106,13,2,,,,, 112 | 4/9/2014,18,158,25,2,,,,,,101,12,2,,,,, 113 | 4/7/2014,16,151,21,2,,,,,,95,10,2,,,,, 114 | 4/4/2014,13,143,18,2,,,,,,86,7,2,,,,, 115 | 4/1/2014,10,127,8,2,,,,,,83,5,2,,,,, 116 | 3/31/2014,9,122,8,2,,,,,,80,4,2,,,,, 117 | 3/29/2014,7,112,7,,,,,,,70,2,,,,,, 118 | 3/28/2014,6,112,3,2,,,,,,70,3,2,,,,, 119 | 3/27/2014,5,103,8,6,,,,,,66,6,5,,,,, 120 | 3/26/2014,4,86,,,,,,,,62,,,,,,, 121 | 3/25/2014,3,86,,,,,,,,60,,,,,,, 122 | 3/24/2014,2,86,,,,,,,,59,,,,,,, 123 | 3/22/2014,0,49,,,,,,,,29,,,,,,, -------------------------------------------------------------------------------- /data/pew.csv: -------------------------------------------------------------------------------- 1 | "religion","<$10k","$10-20k","$20-30k","$30-40k","$40-50k","$50-75k","$75-100k","$100-150k",">150k","Don't know/refused" 2 | "Agnostic",27,34,60,81,76,137,122,109,84,96 3 | "Atheist",12,27,37,52,35,70,73,59,74,76 4 | "Buddhist",27,21,30,34,33,58,62,39,53,54 5 | "Catholic",418,617,732,670,638,1116,949,792,633,1489 6 | "Don’t know/refused",15,14,15,11,10,35,21,17,18,116 7 | "Evangelical Prot",575,869,1064,982,881,1486,949,723,414,1529 8 | "Hindu",1,9,7,9,11,34,47,48,54,37 9 | "Historically Black Prot",228,244,236,238,197,223,131,81,78,339 10 | "Jehovah's Witness",20,27,24,24,21,30,15,11,6,37 11 | "Jewish",19,19,25,25,30,95,69,87,151,162 12 | "Mainline Prot",289,495,619,655,651,1107,939,753,634,1328 13 | "Mormon",29,40,48,51,56,112,85,49,42,69 14 | "Muslim",6,7,9,10,9,23,16,8,6,22 15 | "Orthodox",13,17,23,32,32,47,38,42,46,73 16 | "Other Christian",9,7,11,13,13,14,18,14,12,18 17 | "Other Faiths",20,33,40,46,49,63,46,40,41,71 18 | "Other World Religions",5,2,3,4,2,7,3,4,4,8 19 | "Unaffiliated",217,299,374,365,341,528,407,321,258,597 20 | -------------------------------------------------------------------------------- /data/table1.csv: -------------------------------------------------------------------------------- 1 | "country","year","cases","population" 2 | "Afghanistan",1999,745,19987071 3 | "Afghanistan",2000,2666,20595360 4 | "Brazil",1999,37737,172006362 5 | "Brazil",2000,80488,174504898 6 | "China",1999,212258,1272915272 7 | "China",2000,213766,1280428583 8 | -------------------------------------------------------------------------------- /data/table2.csv: -------------------------------------------------------------------------------- 1 | "country","year","type","count" 2 | "Afghanistan",1999,"cases",745 3 | "Afghanistan",1999,"population",19987071 4 | "Afghanistan",2000,"cases",2666 5 | "Afghanistan",2000,"population",20595360 6 | "Brazil",1999,"cases",37737 7 | "Brazil",1999,"population",172006362 8 | "Brazil",2000,"cases",80488 9 | "Brazil",2000,"population",174504898 10 | "China",1999,"cases",212258 11 | "China",1999,"population",1272915272 12 | "China",2000,"cases",213766 13 | "China",2000,"population",1280428583 14 | -------------------------------------------------------------------------------- /data/table3.csv: -------------------------------------------------------------------------------- 1 | "country","year","rate" 2 | "Afghanistan",1999,"745/19987071" 3 | "Afghanistan",2000,"2666/20595360" 4 | "Brazil",1999,"37737/172006362" 5 | "Brazil",2000,"80488/174504898" 6 | "China",1999,"212258/1272915272" 7 | "China",2000,"213766/1280428583" 8 | -------------------------------------------------------------------------------- /data/table4a.csv: -------------------------------------------------------------------------------- 1 | "country","1999","2000" 2 | "Afghanistan",745,2666 3 | "Brazil",37737,80488 4 | "China",212258,213766 5 | -------------------------------------------------------------------------------- /data/table4b.csv: -------------------------------------------------------------------------------- 1 | "country","1999","2000" 2 | "Afghanistan",19987071,20595360 3 | "Brazil",172006362,174504898 4 | "China",1272915272,1280428583 5 | -------------------------------------------------------------------------------- /data/weather.csv: -------------------------------------------------------------------------------- 1 | "id","year","month","element","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31" 2 | "MX17004",2010,1,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,27.8,NA 3 | "MX17004",2010,1,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,14.5,NA 4 | "MX17004",2010,2,"tmax",NA,27.3,24.1,NA,NA,NA,NA,NA,NA,NA,29.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA 5 | "MX17004",2010,2,"tmin",NA,14.4,14.4,NA,NA,NA,NA,NA,NA,NA,13.4,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,10.7,NA,NA,NA,NA,NA,NA,NA,NA 6 | "MX17004",2010,3,"tmax",NA,NA,NA,NA,32.1,NA,NA,NA,NA,34.5,NA,NA,NA,NA,NA,31.1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 7 | "MX17004",2010,3,"tmin",NA,NA,NA,NA,14.2,NA,NA,NA,NA,16.8,NA,NA,NA,NA,NA,17.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 8 | "MX17004",2010,4,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,36.3,NA,NA,NA,NA 9 | "MX17004",2010,4,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.7,NA,NA,NA,NA 10 | "MX17004",2010,5,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,33.2,NA,NA,NA,NA 11 | "MX17004",2010,5,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18.2,NA,NA,NA,NA 12 | "MX17004",2010,6,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,30.1,NA,NA 13 | "MX17004",2010,6,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18,NA,NA 14 | "MX17004",2010,7,"tmax",NA,NA,28.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 15 | "MX17004",2010,7,"tmin",NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 16 | "MX17004",2010,8,"tmax",NA,NA,NA,NA,29.6,NA,NA,29,NA,NA,NA,NA,29.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,26.4,NA,29.7,NA,NA,NA,28,NA,25.4 17 | "MX17004",2010,8,"tmin",NA,NA,NA,NA,15.8,NA,NA,17.3,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,15.6,NA,NA,NA,15.3,NA,15.4 18 | "MX17004",2010,10,"tmax",NA,NA,NA,NA,27,NA,28.1,NA,NA,NA,NA,NA,NA,29.5,28.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,31.2,NA,NA,NA 19 | "MX17004",2010,10,"tmin",NA,NA,NA,NA,14,NA,12.9,NA,NA,NA,NA,NA,NA,13,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,NA,NA 20 | "MX17004",2010,11,"tmax",NA,31.3,NA,27.2,26.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28.1,27.7,NA,NA,NA,NA 21 | "MX17004",2010,11,"tmin",NA,16.3,NA,12,7.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,12.1,14.2,NA,NA,NA,NA 22 | "MX17004",2010,12,"tmax",29.9,NA,NA,NA,NA,27.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 23 | "MX17004",2010,12,"tmin",13.8,NA,NA,NA,NA,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 24 | -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /notebooks/03-apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "def my_function(x, y):\n", 10 | " pass" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 4, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "def my_sq(x):\n", 20 | " return x ** 2" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 5, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "4" 32 | ] 33 | }, 34 | "execution_count": 5, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "my_sq(2)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 6, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "16" 52 | ] 53 | }, 54 | "execution_count": 6, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "my_sq(4)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 11, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "assert my_sq(4) == 16" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 12, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def avg_2(x, y):\n", 79 | " return (x + y) / 2" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 13, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "15.0" 91 | ] 92 | }, 93 | "execution_count": 13, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "avg_2(10, 20)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 18, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "import pandas as pd" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 19, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "df = pd.DataFrame({\n", 118 | " 'a': [10, 20, 30],\n", 119 | " 'b': [20, 30, 40]\n", 120 | "})" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 20, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/html": [ 131 | "
\n", 132 | "\n", 145 | "\n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | "
ab
01020
12030
23040
\n", 171 | "
" 172 | ], 173 | "text/plain": [ 174 | " a b\n", 175 | "0 10 20\n", 176 | "1 20 30\n", 177 | "2 30 40" 178 | ] 179 | }, 180 | "execution_count": 20, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "df" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 22, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "0 100\n", 198 | "1 400\n", 199 | "2 900\n", 200 | "Name: a, dtype: int64" 201 | ] 202 | }, 203 | "execution_count": 22, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "df['a'] ** 2" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 23, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/plain": [ 220 | "" 221 | ] 222 | }, 223 | "execution_count": 23, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "my_sq" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 24, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "data": { 239 | "text/plain": [ 240 | "0 100\n", 241 | "1 400\n", 242 | "2 900\n", 243 | "Name: a, dtype: int64" 244 | ] 245 | }, 246 | "execution_count": 24, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "df['a'].apply(my_sq)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 25, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "def my_exp(x, e):\n", 262 | " return x ** e" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 26, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "1024" 274 | ] 275 | }, 276 | "execution_count": 26, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "my_exp(2, 10)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 27, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "0 10000\n", 294 | "1 160000\n", 295 | "2 810000\n", 296 | "Name: a, dtype: int64" 297 | ] 298 | }, 299 | "execution_count": 27, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "df['a'].apply(my_exp, e=4)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 28, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "def print_me(x):\n", 315 | " print(x)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 29, 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "name": "stdout", 325 | "output_type": "stream", 326 | "text": [ 327 | "0 10\n", 328 | "1 20\n", 329 | "2 30\n", 330 | "Name: a, dtype: int64\n", 331 | "0 20\n", 332 | "1 30\n", 333 | "2 40\n", 334 | "Name: b, dtype: int64\n" 335 | ] 336 | }, 337 | { 338 | "data": { 339 | "text/plain": [ 340 | "a None\n", 341 | "b None\n", 342 | "dtype: object" 343 | ] 344 | }, 345 | "execution_count": 29, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "df.apply(print_me)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 32, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/html": [ 362 | "
\n", 363 | "\n", 376 | "\n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | "
ab
01020
12030
23040
\n", 402 | "
" 403 | ], 404 | "text/plain": [ 405 | " a b\n", 406 | "0 10 20\n", 407 | "1 20 30\n", 408 | "2 30 40" 409 | ] 410 | }, 411 | "execution_count": 32, 412 | "metadata": {}, 413 | "output_type": "execute_result" 414 | } 415 | ], 416 | "source": [ 417 | "df" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 30, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "def avg_3(x, y, z):\n", 427 | " return (x + y + z) / 3" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 31, 433 | "metadata": {}, 434 | "outputs": [ 435 | { 436 | "ename": "TypeError", 437 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')", 438 | "output_type": "error", 439 | "traceback": [ 440 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 441 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 442 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 443 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 444 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 445 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 446 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 447 | "\u001b[0;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')" 448 | ] 449 | } 450 | ], 451 | "source": [ 452 | "df.apply(avg_3)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 33, 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "import numpy as np" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 34, 467 | "metadata": {}, 468 | "outputs": [], 469 | "source": [ 470 | "def avg_3_apply(col):\n", 471 | " return np.mean(col)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 35, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/plain": [ 482 | "a 20.0\n", 483 | "b 30.0\n", 484 | "dtype: float64" 485 | ] 486 | }, 487 | "execution_count": 35, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "df.apply(avg_3_apply)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 40, 499 | "metadata": {}, 500 | "outputs": [ 501 | { 502 | "data": { 503 | "text/html": [ 504 | "
\n", 505 | "\n", 518 | "\n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | "
ab
01020
12030
23040
\n", 544 | "
" 545 | ], 546 | "text/plain": [ 547 | " a b\n", 548 | "0 10 20\n", 549 | "1 20 30\n", 550 | "2 30 40" 551 | ] 552 | }, 553 | "execution_count": 40, 554 | "metadata": {}, 555 | "output_type": "execute_result" 556 | } 557 | ], 558 | "source": [ 559 | "df" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 37, 565 | "metadata": {}, 566 | "outputs": [], 567 | "source": [ 568 | "def avg_3_apply(col):\n", 569 | " x = col[0]\n", 570 | " y = col[1]\n", 571 | " z = col[2]\n", 572 | " return (x + y + z) / 3" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 38, 578 | "metadata": {}, 579 | "outputs": [ 580 | { 581 | "data": { 582 | "text/plain": [ 583 | "a 20.0\n", 584 | "b 30.0\n", 585 | "dtype: float64" 586 | ] 587 | }, 588 | "execution_count": 38, 589 | "metadata": {}, 590 | "output_type": "execute_result" 591 | } 592 | ], 593 | "source": [ 594 | "df.apply(avg_3_apply)" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 39, 600 | "metadata": {}, 601 | "outputs": [ 602 | { 603 | "ename": "IndexError", 604 | "evalue": "('index out of bounds', 'occurred at index 0')", 605 | "output_type": "error", 606 | "traceback": [ 607 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 608 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", 609 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4374\u001b[0m return self._engine.get_value(s, k,\n\u001b[0;32m-> 4375\u001b[0;31m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m 4376\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 610 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n", 611 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n", 612 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", 613 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", 614 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", 615 | "\u001b[0;31mKeyError\u001b[0m: 2", 616 | "\nDuring handling of the above exception, another exception occurred:\n", 617 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", 618 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3_apply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'columns'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 619 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 620 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 621 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 622 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 623 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_3_apply\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 624 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 866\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 868\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 869\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 625 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4380\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4381\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_box\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4382\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4383\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 626 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_box\u001b[0;34m()\u001b[0m\n", 627 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n", 628 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n", 629 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n", 630 | "\u001b[0;31mIndexError\u001b[0m: ('index out of bounds', 'occurred at index 0')" 631 | ] 632 | } 633 | ], 634 | "source": [ 635 | "df.apply(avg_3_apply, axis='columns')" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": 41, 641 | "metadata": {}, 642 | "outputs": [ 643 | { 644 | "data": { 645 | "text/plain": [ 646 | "20.0" 647 | ] 648 | }, 649 | "execution_count": 41, 650 | "metadata": {}, 651 | "output_type": "execute_result" 652 | } 653 | ], 654 | "source": [ 655 | "df['a'].mean()" 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 42, 661 | "metadata": {}, 662 | "outputs": [ 663 | { 664 | "data": { 665 | "text/plain": [ 666 | "0 30\n", 667 | "1 50\n", 668 | "2 70\n", 669 | "dtype: int64" 670 | ] 671 | }, 672 | "execution_count": 42, 673 | "metadata": {}, 674 | "output_type": "execute_result" 675 | } 676 | ], 677 | "source": [ 678 | "df['a'] + df['b']" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": 45, 684 | "metadata": {}, 685 | "outputs": [], 686 | "source": [ 687 | "def avg_2_mod(x, y):\n", 688 | " if (x == 20):\n", 689 | " return np.NaN #np.NAN np.nan\n", 690 | " else:\n", 691 | " return(x + y) / 2" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 46, 697 | "metadata": {}, 698 | "outputs": [ 699 | { 700 | "data": { 701 | "text/html": [ 702 | "
\n", 703 | "\n", 716 | "\n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | "
ab
01020
12030
23040
\n", 742 | "
" 743 | ], 744 | "text/plain": [ 745 | " a b\n", 746 | "0 10 20\n", 747 | "1 20 30\n", 748 | "2 30 40" 749 | ] 750 | }, 751 | "execution_count": 46, 752 | "metadata": {}, 753 | "output_type": "execute_result" 754 | } 755 | ], 756 | "source": [ 757 | "df" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 47, 763 | "metadata": {}, 764 | "outputs": [ 765 | { 766 | "ename": "ValueError", 767 | "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", 768 | "output_type": "error", 769 | "traceback": [ 770 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 771 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 772 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 773 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_2_mod\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m \u001b[0;31m#np.NAN np.nan\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 774 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1476\u001b[0m raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m 1477\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1478\u001b[0;31m .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m 1479\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 775 | "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." 776 | ] 777 | } 778 | ], 779 | "source": [ 780 | "avg_2_mod(df['a'], df['b'])" 781 | ] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "execution_count": 48, 786 | "metadata": {}, 787 | "outputs": [], 788 | "source": [ 789 | "import numpy as np" 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": 49, 795 | "metadata": {}, 796 | "outputs": [], 797 | "source": [ 798 | "avg_2_mod_vec = np.vectorize(avg_2_mod)" 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "execution_count": 51, 804 | "metadata": {}, 805 | "outputs": [ 806 | { 807 | "data": { 808 | "text/html": [ 809 | "
\n", 810 | "\n", 823 | "\n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | "
ab
01020
12030
23040
\n", 849 | "
" 850 | ], 851 | "text/plain": [ 852 | " a b\n", 853 | "0 10 20\n", 854 | "1 20 30\n", 855 | "2 30 40" 856 | ] 857 | }, 858 | "execution_count": 51, 859 | "metadata": {}, 860 | "output_type": "execute_result" 861 | } 862 | ], 863 | "source": [ 864 | "df" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": 50, 870 | "metadata": {}, 871 | "outputs": [ 872 | { 873 | "data": { 874 | "text/plain": [ 875 | "array([15., nan, 35.])" 876 | ] 877 | }, 878 | "execution_count": 50, 879 | "metadata": {}, 880 | "output_type": "execute_result" 881 | } 882 | ], 883 | "source": [ 884 | "avg_2_mod_vec(df['a'], df['b'])" 885 | ] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": 53, 890 | "metadata": {}, 891 | "outputs": [], 892 | "source": [ 893 | "@np.vectorize\n", 894 | "def avg_2_mod(x, y):\n", 895 | " if (x == 20):\n", 896 | " return np.NaN #np.NAN np.nan\n", 897 | " else:\n", 898 | " return(x + y) / 2" 899 | ] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": 54, 904 | "metadata": {}, 905 | "outputs": [ 906 | { 907 | "data": { 908 | "text/plain": [ 909 | "array([15., nan, 35.])" 910 | ] 911 | }, 912 | "execution_count": 54, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "avg_2_mod(df['a'], df['b'])" 919 | ] 920 | }, 921 | { 922 | "cell_type": "code", 923 | "execution_count": 55, 924 | "metadata": {}, 925 | "outputs": [], 926 | "source": [ 927 | "import numba" 928 | ] 929 | }, 930 | { 931 | "cell_type": "code", 932 | "execution_count": 59, 933 | "metadata": {}, 934 | "outputs": [], 935 | "source": [ 936 | "@numba.vectorize\n", 937 | "def avg_2_mod_numba(x, y):\n", 938 | " if (x == 20):\n", 939 | " return np.NaN\n", 940 | " else:\n", 941 | " return(x + y) / 2" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": 60, 947 | "metadata": {}, 948 | "outputs": [ 949 | { 950 | "data": { 951 | "text/plain": [ 952 | "array([15., nan, 35.])" 953 | ] 954 | }, 955 | "execution_count": 60, 956 | "metadata": {}, 957 | "output_type": "execute_result" 958 | } 959 | ], 960 | "source": [ 961 | "avg_2_mod_numba(df['a'].values, df['b'].values)" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": 62, 967 | "metadata": {}, 968 | "outputs": [ 969 | { 970 | "name": "stdout", 971 | "output_type": "stream", 972 | "text": [ 973 | "445 µs ± 7.79 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 974 | ] 975 | } 976 | ], 977 | "source": [ 978 | "%%timeit\n", 979 | "avg_2(df['a'], df['b'])" 980 | ] 981 | }, 982 | { 983 | "cell_type": "code", 984 | "execution_count": 63, 985 | "metadata": {}, 986 | "outputs": [ 987 | { 988 | "name": "stdout", 989 | "output_type": "stream", 990 | "text": [ 991 | "211 µs ± 7.73 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 992 | ] 993 | } 994 | ], 995 | "source": [ 996 | "%%timeit\n", 997 | "avg_2_mod(df['a'], df['b'])" 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "code", 1002 | "execution_count": 64, 1003 | "metadata": {}, 1004 | "outputs": [ 1005 | { 1006 | "name": "stdout", 1007 | "output_type": "stream", 1008 | "text": [ 1009 | "8.01 µs ± 226 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" 1010 | ] 1011 | } 1012 | ], 1013 | "source": [ 1014 | "%%timeit\n", 1015 | "avg_2_mod_numba(df['a'].values, df['b'].values)" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": null, 1021 | "metadata": {}, 1022 | "outputs": [], 1023 | "source": [] 1024 | } 1025 | ], 1026 | "metadata": { 1027 | "kernelspec": { 1028 | "display_name": "Python 3", 1029 | "language": "python", 1030 | "name": "python3" 1031 | }, 1032 | "language_info": { 1033 | "codemirror_mode": { 1034 | "name": "ipython", 1035 | "version": 3 1036 | }, 1037 | "file_extension": ".py", 1038 | "mimetype": "text/x-python", 1039 | "name": "python", 1040 | "nbconvert_exporter": "python", 1041 | "pygments_lexer": "ipython3", 1042 | "version": "3.7.3" 1043 | } 1044 | }, 1045 | "nbformat": 4, 1046 | "nbformat_minor": 2 1047 | } 1048 | -------------------------------------------------------------------------------- /notebooks/05-model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import seaborn as sns\n", 11 | "from sklearn import linear_model" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 4, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "tips = sns.load_dataset('tips')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 5, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
\n", 111 | "
" 112 | ], 113 | "text/plain": [ 114 | " total_bill tip sex smoker day time size\n", 115 | "0 16.99 1.01 Female No Sun Dinner 2\n", 116 | "1 10.34 1.66 Male No Sun Dinner 3\n", 117 | "2 21.01 3.50 Male No Sun Dinner 3\n", 118 | "3 23.68 3.31 Male No Sun Dinner 2\n", 119 | "4 24.59 3.61 Female No Sun Dinner 4" 120 | ] 121 | }, 122 | "execution_count": 5, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "tips.head()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "lr = linear_model.LinearRegression()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 11, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 149 | " normalize=False)" 150 | ] 151 | }, 152 | "execution_count": 11, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "lr.fit(X=tips[['total_bill', 'size']], y=tips['tip'])" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 8, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/plain": [ 169 | "array([0.09271334, 0.19259779])" 170 | ] 171 | }, 172 | "execution_count": 8, 173 | "metadata": {}, 174 | "output_type": "execute_result" 175 | } 176 | ], 177 | "source": [ 178 | "lr.coef_" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 9, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "0.6689447408125027" 190 | ] 191 | }, 192 | "execution_count": 9, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "lr.intercept_" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 12, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "# dummy encoding\n", 208 | "# one-hot encoding" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 16, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/html": [ 219 | "
\n", 220 | "\n", 233 | "\n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | "
total_billtipsizesex_Femalesmoker_Noday_Friday_Satday_Suntime_Dinner
016.991.012110011
110.341.663010011
221.013.503010011
323.683.312010011
424.593.614110011
\n", 311 | "
" 312 | ], 313 | "text/plain": [ 314 | " total_bill tip size sex_Female smoker_No day_Fri day_Sat day_Sun \\\n", 315 | "0 16.99 1.01 2 1 1 0 0 1 \n", 316 | "1 10.34 1.66 3 0 1 0 0 1 \n", 317 | "2 21.01 3.50 3 0 1 0 0 1 \n", 318 | "3 23.68 3.31 2 0 1 0 0 1 \n", 319 | "4 24.59 3.61 4 1 1 0 0 1 \n", 320 | "\n", 321 | " time_Dinner \n", 322 | "0 1 \n", 323 | "1 1 \n", 324 | "2 1 \n", 325 | "3 1 \n", 326 | "4 1 " 327 | ] 328 | }, 329 | "execution_count": 16, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "tips_dummy = pd.get_dummies(tips, drop_first=True)\n", 336 | "tips_dummy.head()" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 19, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 348 | " normalize=False)" 349 | ] 350 | }, 351 | "execution_count": 19, 352 | "metadata": {}, 353 | "output_type": "execute_result" 354 | } 355 | ], 356 | "source": [ 357 | "lr = linear_model.LinearRegression()\n", 358 | "lr.fit(X=tips_dummy.iloc[:, 2:], y=tips_dummy['tip'])" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 20, 364 | "metadata": {}, 365 | "outputs": [ 366 | { 367 | "data": { 368 | "text/plain": [ 369 | "array([ 0.71001644, -0.10057881, -0.20916402, -0.20180568, -0.36603136,\n", 370 | " -0.29452609, 0.48575489])" 371 | ] 372 | }, 373 | "execution_count": 20, 374 | "metadata": {}, 375 | "output_type": "execute_result" 376 | } 377 | ], 378 | "source": [ 379 | "lr.coef_" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [] 388 | } 389 | ], 390 | "metadata": { 391 | "kernelspec": { 392 | "display_name": "Python 3", 393 | "language": "python", 394 | "name": "python3" 395 | }, 396 | "language_info": { 397 | "codemirror_mode": { 398 | "name": "ipython", 399 | "version": 3 400 | }, 401 | "file_extension": ".py", 402 | "mimetype": "text/x-python", 403 | "name": "python", 404 | "nbconvert_exporter": "python", 405 | "pygments_lexer": "ipython3", 406 | "version": "3.7.3" 407 | } 408 | }, 409 | "nbformat": 4, 410 | "nbformat_minor": 2 411 | } 412 | -------------------------------------------------------------------------------- /notes/03-apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "source": [ 6 | "# writing a python function" 7 | ], 8 | "outputs": [], 9 | "execution_count": 1, 10 | "metadata": {} 11 | }, 12 | { 13 | "cell_type": "code", 14 | "source": [ 15 | "def my_function():\n", 16 | " pass" 17 | ], 18 | "outputs": [], 19 | "execution_count": 2, 20 | "metadata": {} 21 | }, 22 | { 23 | "cell_type": "code", 24 | "source": [ 25 | "def my_sq(x):\n", 26 | " return x ** 2" 27 | ], 28 | "outputs": [], 29 | "execution_count": 3, 30 | "metadata": {} 31 | }, 32 | { 33 | "cell_type": "code", 34 | "source": [ 35 | "my_sq(4)" 36 | ], 37 | "outputs": [ 38 | { 39 | "output_type": "execute_result", 40 | "execution_count": 4, 41 | "data": { 42 | "text/plain": [ 43 | "16" 44 | ] 45 | }, 46 | "metadata": {} 47 | } 48 | ], 49 | "execution_count": 4, 50 | "metadata": {} 51 | }, 52 | { 53 | "cell_type": "code", 54 | "source": [ 55 | "assert my_sq(4) == 16" 56 | ], 57 | "outputs": [], 58 | "execution_count": 5, 59 | "metadata": {} 60 | }, 61 | { 62 | "cell_type": "code", 63 | "source": [ 64 | "def avg_2(x, y):\n", 65 | " return (x + y) / 2" 66 | ], 67 | "outputs": [], 68 | "execution_count": 6, 69 | "metadata": {} 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "avg_2(10, 20)" 75 | ], 76 | "outputs": [ 77 | { 78 | "output_type": "execute_result", 79 | "execution_count": 7, 80 | "data": { 81 | "text/plain": [ 82 | "15.0" 83 | ] 84 | }, 85 | "metadata": {} 86 | } 87 | ], 88 | "execution_count": 7, 89 | "metadata": {} 90 | }, 91 | { 92 | "cell_type": "code", 93 | "source": [ 94 | "import pandas as pd" 95 | ], 96 | "outputs": [], 97 | "execution_count": 8, 98 | "metadata": {} 99 | }, 100 | { 101 | "cell_type": "code", 102 | "source": [ 103 | "df = pd.DataFrame({\n", 104 | " 'a': [10, 20, 30],\n", 105 | " 'b': [20, 30, 40]\n", 106 | "})" 107 | ], 108 | "outputs": [], 109 | "execution_count": 9, 110 | "metadata": {} 111 | }, 112 | { 113 | "cell_type": "code", 114 | "source": [ 115 | "df['a'] ** 2" 116 | ], 117 | "outputs": [ 118 | { 119 | "output_type": "execute_result", 120 | "execution_count": 10, 121 | "data": { 122 | "text/plain": [ 123 | "0 100\n", 124 | "1 400\n", 125 | "2 900\n", 126 | "Name: a, dtype: int64" 127 | ] 128 | }, 129 | "metadata": {} 130 | } 131 | ], 132 | "execution_count": 10, 133 | "metadata": {} 134 | }, 135 | { 136 | "cell_type": "code", 137 | "source": [ 138 | "df['a'].apply(my_sq)" 139 | ], 140 | "outputs": [ 141 | { 142 | "output_type": "execute_result", 143 | "execution_count": 11, 144 | "data": { 145 | "text/plain": [ 146 | "0 100\n", 147 | "1 400\n", 148 | "2 900\n", 149 | "Name: a, dtype: int64" 150 | ] 151 | }, 152 | "metadata": {} 153 | } 154 | ], 155 | "execution_count": 11, 156 | "metadata": {} 157 | }, 158 | { 159 | "cell_type": "code", 160 | "source": [ 161 | "def my_exp(x, e):\n", 162 | " return x ** e" 163 | ], 164 | "outputs": [], 165 | "execution_count": 12, 166 | "metadata": {} 167 | }, 168 | { 169 | "cell_type": "code", 170 | "source": [ 171 | "my_exp(4, 2)" 172 | ], 173 | "outputs": [ 174 | { 175 | "output_type": "execute_result", 176 | "execution_count": 13, 177 | "data": { 178 | "text/plain": [ 179 | "16" 180 | ] 181 | }, 182 | "metadata": {} 183 | } 184 | ], 185 | "execution_count": 13, 186 | "metadata": {} 187 | }, 188 | { 189 | "cell_type": "code", 190 | "source": [ 191 | "my_exp(4, 3)" 192 | ], 193 | "outputs": [ 194 | { 195 | "output_type": "execute_result", 196 | "execution_count": 14, 197 | "data": { 198 | "text/plain": [ 199 | "64" 200 | ] 201 | }, 202 | "metadata": {} 203 | } 204 | ], 205 | "execution_count": 14, 206 | "metadata": {} 207 | }, 208 | { 209 | "cell_type": "code", 210 | "source": [ 211 | "df['a'].apply(my_exp, e=4)" 212 | ], 213 | "outputs": [ 214 | { 215 | "output_type": "execute_result", 216 | "execution_count": 15, 217 | "data": { 218 | "text/plain": [ 219 | "0 10000\n", 220 | "1 160000\n", 221 | "2 810000\n", 222 | "Name: a, dtype: int64" 223 | ] 224 | }, 225 | "metadata": {} 226 | } 227 | ], 228 | "execution_count": 15, 229 | "metadata": {} 230 | }, 231 | { 232 | "cell_type": "code", 233 | "source": [ 234 | "def print_me(x):\n", 235 | " print(x)" 236 | ], 237 | "outputs": [], 238 | "execution_count": 16, 239 | "metadata": {} 240 | }, 241 | { 242 | "cell_type": "code", 243 | "source": [ 244 | "df.apply(print_me)" 245 | ], 246 | "outputs": [ 247 | { 248 | "output_type": "stream", 249 | "name": "stdout", 250 | "text": [ 251 | "0 10\n", 252 | "1 20\n", 253 | "2 30\n", 254 | "Name: a, dtype: int64\n", 255 | "0 20\n", 256 | "1 30\n", 257 | "2 40\n", 258 | "Name: b, dtype: int64\n" 259 | ] 260 | }, 261 | { 262 | "output_type": "execute_result", 263 | "execution_count": 17, 264 | "data": { 265 | "text/plain": [ 266 | "a None\n", 267 | "b None\n", 268 | "dtype: object" 269 | ] 270 | }, 271 | "metadata": {} 272 | } 273 | ], 274 | "execution_count": 17, 275 | "metadata": {} 276 | }, 277 | { 278 | "cell_type": "code", 279 | "source": [ 280 | "def avg_3(x, y, z):\n", 281 | " return (x + y + z) / 3" 282 | ], 283 | "outputs": [], 284 | "execution_count": 18, 285 | "metadata": {} 286 | }, 287 | { 288 | "cell_type": "code", 289 | "source": [ 290 | "df.apply(avg_3)" 291 | ], 292 | "outputs": [ 293 | { 294 | "output_type": "error", 295 | "ename": "TypeError", 296 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')", 297 | "traceback": [ 298 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 299 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 300 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 301 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 302 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 303 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 304 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 305 | "\u001b[0;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')" 306 | ] 307 | } 308 | ], 309 | "execution_count": 19, 310 | "metadata": {} 311 | }, 312 | { 313 | "cell_type": "code", 314 | "source": [ 315 | "import numpy as np" 316 | ], 317 | "outputs": [], 318 | "execution_count": 20, 319 | "metadata": {} 320 | }, 321 | { 322 | "cell_type": "code", 323 | "source": [ 324 | "def avg_3_apply(col):\n", 325 | " return np.mean(col)" 326 | ], 327 | "outputs": [], 328 | "execution_count": 21, 329 | "metadata": {} 330 | }, 331 | { 332 | "cell_type": "code", 333 | "source": [ 334 | "df.apply(avg_3_apply)" 335 | ], 336 | "outputs": [ 337 | { 338 | "output_type": "execute_result", 339 | "execution_count": 22, 340 | "data": { 341 | "text/plain": [ 342 | "a 20.0\n", 343 | "b 30.0\n", 344 | "dtype: float64" 345 | ] 346 | }, 347 | "metadata": {} 348 | } 349 | ], 350 | "execution_count": 22, 351 | "metadata": {} 352 | }, 353 | { 354 | "cell_type": "code", 355 | "source": [ 356 | "def avg_3_apply(col):\n", 357 | " x = col[0]\n", 358 | " y = col[1]\n", 359 | " z = col[2]\n", 360 | " return (x + y + z) / 3" 361 | ], 362 | "outputs": [], 363 | "execution_count": 23, 364 | "metadata": {} 365 | }, 366 | { 367 | "cell_type": "code", 368 | "source": [ 369 | "df.apply(avg_3_apply)" 370 | ], 371 | "outputs": [ 372 | { 373 | "output_type": "execute_result", 374 | "execution_count": 24, 375 | "data": { 376 | "text/plain": [ 377 | "a 20.0\n", 378 | "b 30.0\n", 379 | "dtype: float64" 380 | ] 381 | }, 382 | "metadata": {} 383 | } 384 | ], 385 | "execution_count": 24, 386 | "metadata": {} 387 | }, 388 | { 389 | "cell_type": "code", 390 | "source": [ 391 | "df.apply(avg_3_apply, axis='columns')" 392 | ], 393 | "outputs": [ 394 | { 395 | "output_type": "error", 396 | "ename": "IndexError", 397 | "evalue": "('index out of bounds', 'occurred at index 0')", 398 | "traceback": [ 399 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 400 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", 401 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4374\u001b[0m return self._engine.get_value(s, k,\n\u001b[0;32m-> 4375\u001b[0;31m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m 4376\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 402 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n", 403 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n", 404 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", 405 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", 406 | "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", 407 | "\u001b[0;31mKeyError\u001b[0m: 2", 408 | "\nDuring handling of the above exception, another exception occurred:\n", 409 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", 410 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg_3_apply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'columns'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 411 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 6485\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6486\u001b[0m kwds=kwds)\n\u001b[0;32m-> 6487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6488\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6489\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 412 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 413 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;31m# compute the result using the series generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 414 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 287\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 415 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_3_apply\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 416 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 866\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 868\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 869\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 417 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4380\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4381\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_box\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4382\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4383\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 418 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_box\u001b[0;34m()\u001b[0m\n", 419 | "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n", 420 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n", 421 | "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n", 422 | "\u001b[0;31mIndexError\u001b[0m: ('index out of bounds', 'occurred at index 0')" 423 | ] 424 | } 425 | ], 426 | "execution_count": 25, 427 | "metadata": {} 428 | }, 429 | { 430 | "cell_type": "code", 431 | "source": [ 432 | "df['a'].mean()" 433 | ], 434 | "outputs": [ 435 | { 436 | "output_type": "execute_result", 437 | "execution_count": 26, 438 | "data": { 439 | "text/plain": [ 440 | "20.0" 441 | ] 442 | }, 443 | "metadata": {} 444 | } 445 | ], 446 | "execution_count": 26, 447 | "metadata": {} 448 | }, 449 | { 450 | "cell_type": "code", 451 | "source": [ 452 | "df['a'] + df['b']" 453 | ], 454 | "outputs": [ 455 | { 456 | "output_type": "execute_result", 457 | "execution_count": 27, 458 | "data": { 459 | "text/plain": [ 460 | "0 30\n", 461 | "1 50\n", 462 | "2 70\n", 463 | "dtype: int64" 464 | ] 465 | }, 466 | "metadata": {} 467 | } 468 | ], 469 | "execution_count": 27, 470 | "metadata": {} 471 | }, 472 | { 473 | "cell_type": "code", 474 | "source": [ 475 | "def avg_2_mod(x, y):\n", 476 | " if (x == 20):\n", 477 | " return np.NaN\n", 478 | " else:\n", 479 | " return (x + y) / 2" 480 | ], 481 | "outputs": [], 482 | "execution_count": 28, 483 | "metadata": {} 484 | }, 485 | { 486 | "cell_type": "code", 487 | "source": [ 488 | "avg_2(df['a'], df['b'])" 489 | ], 490 | "outputs": [ 491 | { 492 | "output_type": "execute_result", 493 | "execution_count": 29, 494 | "data": { 495 | "text/plain": [ 496 | "0 15.0\n", 497 | "1 25.0\n", 498 | "2 35.0\n", 499 | "dtype: float64" 500 | ] 501 | }, 502 | "metadata": {} 503 | } 504 | ], 505 | "execution_count": 29, 506 | "metadata": {} 507 | }, 508 | { 509 | "cell_type": "code", 510 | "source": [ 511 | "avg_2_mod(df['a'], df['b'])" 512 | ], 513 | "outputs": [ 514 | { 515 | "output_type": "error", 516 | "ename": "ValueError", 517 | "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", 518 | "traceback": [ 519 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 520 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 521 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'b'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 522 | "\u001b[0;32m\u001b[0m in \u001b[0;36mavg_2_mod\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mavg_2_mod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNaN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 523 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__nonzero__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1476\u001b[0m raise ValueError(\"The truth value of a {0} is ambiguous. \"\n\u001b[1;32m 1477\u001b[0m \u001b[0;34m\"Use a.empty, a.bool(), a.item(), a.any() or a.all().\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1478\u001b[0;31m .format(self.__class__.__name__))\n\u001b[0m\u001b[1;32m 1479\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1480\u001b[0m \u001b[0m__bool__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__nonzero__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 524 | "\u001b[0;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." 525 | ] 526 | } 527 | ], 528 | "execution_count": 30, 529 | "metadata": {} 530 | }, 531 | { 532 | "cell_type": "code", 533 | "source": [ 534 | "import numpy as np" 535 | ], 536 | "outputs": [], 537 | "execution_count": 31, 538 | "metadata": {} 539 | }, 540 | { 541 | "cell_type": "code", 542 | "source": [ 543 | "avg_2_mod_vec = np.vectorize(avg_2_mod)" 544 | ], 545 | "outputs": [], 546 | "execution_count": 32, 547 | "metadata": {} 548 | }, 549 | { 550 | "cell_type": "code", 551 | "source": [ 552 | "avg_2_mod_vec(df['a'], df['b'])" 553 | ], 554 | "outputs": [ 555 | { 556 | "output_type": "execute_result", 557 | "execution_count": 33, 558 | "data": { 559 | "text/plain": [ 560 | "array([15., nan, 35.])" 561 | ] 562 | }, 563 | "metadata": {} 564 | } 565 | ], 566 | "execution_count": 33, 567 | "metadata": {} 568 | }, 569 | { 570 | "cell_type": "code", 571 | "source": [ 572 | "@np.vectorize\n", 573 | "def v_avg_2_mod(x, y):\n", 574 | " if (x == 20):\n", 575 | " return np.NaN\n", 576 | " else:\n", 577 | " return (x + y) / 2" 578 | ], 579 | "outputs": [], 580 | "execution_count": 34, 581 | "metadata": {} 582 | }, 583 | { 584 | "cell_type": "code", 585 | "source": [ 586 | "v_avg_2_mod(df['a'], df['b'])" 587 | ], 588 | "outputs": [ 589 | { 590 | "output_type": "execute_result", 591 | "execution_count": 35, 592 | "data": { 593 | "text/plain": [ 594 | "array([15., nan, 35.])" 595 | ] 596 | }, 597 | "metadata": {} 598 | } 599 | ], 600 | "execution_count": 35, 601 | "metadata": {} 602 | }, 603 | { 604 | "cell_type": "code", 605 | "source": [ 606 | "import numba" 607 | ], 608 | "outputs": [], 609 | "execution_count": 36, 610 | "metadata": {} 611 | }, 612 | { 613 | "cell_type": "code", 614 | "source": [ 615 | "@numba.vectorize\n", 616 | "def v_avg_2_mod_numba(x, y):\n", 617 | " if (x == 20):\n", 618 | " return np.NaN\n", 619 | " else:\n", 620 | " return (x + y) / 2" 621 | ], 622 | "outputs": [], 623 | "execution_count": 37, 624 | "metadata": {} 625 | }, 626 | { 627 | "cell_type": "code", 628 | "source": [ 629 | "v_avg_2_mod_numba(df['a'].values, df['b'].values)" 630 | ], 631 | "outputs": [ 632 | { 633 | "output_type": "execute_result", 634 | "execution_count": 38, 635 | "data": { 636 | "text/plain": [ 637 | "array([15., nan, 35.])" 638 | ] 639 | }, 640 | "metadata": {} 641 | } 642 | ], 643 | "execution_count": 38, 644 | "metadata": {} 645 | }, 646 | { 647 | "cell_type": "code", 648 | "source": [ 649 | "%%timeit\n", 650 | "avg_2(df['a'], df['b'])" 651 | ], 652 | "outputs": [ 653 | { 654 | "output_type": "stream", 655 | "name": "stdout", 656 | "text": [ 657 | "519 µs ± 34.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 658 | ] 659 | } 660 | ], 661 | "execution_count": 39, 662 | "metadata": {} 663 | }, 664 | { 665 | "cell_type": "code", 666 | "source": [ 667 | "%%timeit\n", 668 | "v_avg_2_mod(df['a'], df['b'])" 669 | ], 670 | "outputs": [ 671 | { 672 | "output_type": "stream", 673 | "name": "stdout", 674 | "text": [ 675 | "236 µs ± 33.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 676 | ] 677 | } 678 | ], 679 | "execution_count": 40, 680 | "metadata": {} 681 | }, 682 | { 683 | "cell_type": "code", 684 | "source": [ 685 | "%%timeit\n", 686 | "v_avg_2_mod_numba(df['a'].values, df['b'].values)" 687 | ], 688 | "outputs": [ 689 | { 690 | "output_type": "stream", 691 | "name": "stdout", 692 | "text": [ 693 | "8.28 µs ± 550 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" 694 | ] 695 | } 696 | ], 697 | "execution_count": 41, 698 | "metadata": {} 699 | }, 700 | { 701 | "cell_type": "code", 702 | "source": [], 703 | "outputs": [], 704 | "execution_count": 42, 705 | "metadata": {} 706 | } 707 | ], 708 | "metadata": { 709 | "kernelspec": { 710 | "name": "python3", 711 | "language": "python", 712 | "display_name": "Python 3" 713 | }, 714 | "language_info": { 715 | "name": "python", 716 | "version": "3.7.3", 717 | "mimetype": "text/x-python", 718 | "codemirror_mode": { 719 | "name": "ipython", 720 | "version": 3 721 | }, 722 | "pygments_lexer": "ipython3", 723 | "nbconvert_exporter": "python", 724 | "file_extension": ".py" 725 | }, 726 | "kernel_info": { 727 | "name": "python3" 728 | }, 729 | "nteract": { 730 | "version": "0.14.3" 731 | } 732 | }, 733 | "nbformat": 4, 734 | "nbformat_minor": 2 735 | } -------------------------------------------------------------------------------- /notes/05-models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "source": [ 6 | "import pandas as pd" 7 | ], 8 | "outputs": [], 9 | "execution_count": 1, 10 | "metadata": {} 11 | }, 12 | { 13 | "cell_type": "code", 14 | "source": [ 15 | "import seaborn as sns" 16 | ], 17 | "outputs": [], 18 | "execution_count": 2, 19 | "metadata": {} 20 | }, 21 | { 22 | "cell_type": "code", 23 | "source": [ 24 | "tips = sns.load_dataset('tips')" 25 | ], 26 | "outputs": [], 27 | "execution_count": 3, 28 | "metadata": {} 29 | }, 30 | { 31 | "cell_type": "code", 32 | "source": [ 33 | "tips.head()" 34 | ], 35 | "outputs": [ 36 | { 37 | "output_type": "execute_result", 38 | "execution_count": 4, 39 | "data": { 40 | "text/plain": [ 41 | " total_bill tip sex smoker day time size\n", 42 | "0 16.99 1.01 Female No Sun Dinner 2\n", 43 | "1 10.34 1.66 Male No Sun Dinner 3\n", 44 | "2 21.01 3.50 Male No Sun Dinner 3\n", 45 | "3 23.68 3.31 Male No Sun Dinner 2\n", 46 | "4 24.59 3.61 Female No Sun Dinner 4" 47 | ], 48 | "text/html": [ 49 | "
\n", 50 | "\n", 63 | "\n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
\n", 129 | "
" 130 | ] 131 | }, 132 | "metadata": {} 133 | } 134 | ], 135 | "execution_count": 4, 136 | "metadata": {} 137 | }, 138 | { 139 | "cell_type": "code", 140 | "source": [ 141 | "from sklearn import linear_model" 142 | ], 143 | "outputs": [], 144 | "execution_count": 5, 145 | "metadata": {} 146 | }, 147 | { 148 | "cell_type": "code", 149 | "source": [ 150 | "lr = linear_model.LinearRegression()" 151 | ], 152 | "outputs": [], 153 | "execution_count": 6, 154 | "metadata": {} 155 | }, 156 | { 157 | "cell_type": "code", 158 | "source": [ 159 | "lr.fit(X=tips['total_bill'], y=tips['tip'])" 160 | ], 161 | "outputs": [ 162 | { 163 | "output_type": "error", 164 | "ename": "ValueError", 165 | "evalue": "Expected 2D array, got 1D array instead:\narray=[16.99 10.34 21.01 23.68 24.59 25.29 8.77 26.88 15.04 14.78 10.27 35.26\n 15.42 18.43 14.83 21.58 10.33 16.29 16.97 20.65 17.92 20.29 15.77 39.42\n 19.82 17.81 13.37 12.69 21.7 19.65 9.55 18.35 15.06 20.69 17.78 24.06\n 16.31 16.93 18.69 31.27 16.04 17.46 13.94 9.68 30.4 18.29 22.23 32.4\n 28.55 18.04 12.54 10.29 34.81 9.94 25.56 19.49 38.01 26.41 11.24 48.27\n 20.29 13.81 11.02 18.29 17.59 20.08 16.45 3.07 20.23 15.01 12.02 17.07\n 26.86 25.28 14.73 10.51 17.92 27.2 22.76 17.29 19.44 16.66 10.07 32.68\n 15.98 34.83 13.03 18.28 24.71 21.16 28.97 22.49 5.75 16.32 22.75 40.17\n 27.28 12.03 21.01 12.46 11.35 15.38 44.3 22.42 20.92 15.36 20.49 25.21\n 18.24 14.31 14. 7.25 38.07 23.95 25.71 17.31 29.93 10.65 12.43 24.08\n 11.69 13.42 14.26 15.95 12.48 29.8 8.52 14.52 11.38 22.82 19.08 20.27\n 11.17 12.26 18.26 8.51 10.33 14.15 16. 13.16 17.47 34.3 41.19 27.05\n 16.43 8.35 18.64 11.87 9.78 7.51 14.07 13.13 17.26 24.55 19.77 29.85\n 48.17 25. 13.39 16.49 21.5 12.66 16.21 13.81 17.51 24.52 20.76 31.71\n 10.59 10.63 50.81 15.81 7.25 31.85 16.82 32.9 17.89 14.48 9.6 34.63\n 34.65 23.33 45.35 23.17 40.55 20.69 20.9 30.46 18.15 23.1 15.69 19.81\n 28.44 15.48 16.58 7.56 10.34 43.11 13. 13.51 18.71 12.74 13. 16.4\n 20.53 16.47 26.59 38.73 24.27 12.76 30.06 25.89 48.33 13.27 28.17 12.9\n 28.15 11.59 7.74 30.14 12.16 13.42 8.58 15.98 13.42 16.27 10.09 20.45\n 13.28 22.12 24.01 15.69 11.61 10.77 15.53 10.07 12.6 32.83 35.83 29.03\n 27.18 22.67 17.82 18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.", 166 | "traceback": [ 167 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 168 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 169 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_bill'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tip'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 170 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 458\u001b[0;31m y_numeric=True, multi_output=True)\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 171 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 754\u001b[0m \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 756\u001b[0;31m estimator=estimator)\n\u001b[0m\u001b[1;32m 757\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 758\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n", 172 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0;34m\"Reshape your data either using array.reshape(-1, 1) if \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 551\u001b[0m \u001b[0;34m\"your data has a single feature or array.reshape(1, -1) \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 552\u001b[0;31m \"if it contains a single sample.\".format(array))\n\u001b[0m\u001b[1;32m 553\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;31m# in the future np.flexible dtypes will be handled like object dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 173 | "\u001b[0;31mValueError\u001b[0m: Expected 2D array, got 1D array instead:\narray=[16.99 10.34 21.01 23.68 24.59 25.29 8.77 26.88 15.04 14.78 10.27 35.26\n 15.42 18.43 14.83 21.58 10.33 16.29 16.97 20.65 17.92 20.29 15.77 39.42\n 19.82 17.81 13.37 12.69 21.7 19.65 9.55 18.35 15.06 20.69 17.78 24.06\n 16.31 16.93 18.69 31.27 16.04 17.46 13.94 9.68 30.4 18.29 22.23 32.4\n 28.55 18.04 12.54 10.29 34.81 9.94 25.56 19.49 38.01 26.41 11.24 48.27\n 20.29 13.81 11.02 18.29 17.59 20.08 16.45 3.07 20.23 15.01 12.02 17.07\n 26.86 25.28 14.73 10.51 17.92 27.2 22.76 17.29 19.44 16.66 10.07 32.68\n 15.98 34.83 13.03 18.28 24.71 21.16 28.97 22.49 5.75 16.32 22.75 40.17\n 27.28 12.03 21.01 12.46 11.35 15.38 44.3 22.42 20.92 15.36 20.49 25.21\n 18.24 14.31 14. 7.25 38.07 23.95 25.71 17.31 29.93 10.65 12.43 24.08\n 11.69 13.42 14.26 15.95 12.48 29.8 8.52 14.52 11.38 22.82 19.08 20.27\n 11.17 12.26 18.26 8.51 10.33 14.15 16. 13.16 17.47 34.3 41.19 27.05\n 16.43 8.35 18.64 11.87 9.78 7.51 14.07 13.13 17.26 24.55 19.77 29.85\n 48.17 25. 13.39 16.49 21.5 12.66 16.21 13.81 17.51 24.52 20.76 31.71\n 10.59 10.63 50.81 15.81 7.25 31.85 16.82 32.9 17.89 14.48 9.6 34.63\n 34.65 23.33 45.35 23.17 40.55 20.69 20.9 30.46 18.15 23.1 15.69 19.81\n 28.44 15.48 16.58 7.56 10.34 43.11 13. 13.51 18.71 12.74 13. 16.4\n 20.53 16.47 26.59 38.73 24.27 12.76 30.06 25.89 48.33 13.27 28.17 12.9\n 28.15 11.59 7.74 30.14 12.16 13.42 8.58 15.98 13.42 16.27 10.09 20.45\n 13.28 22.12 24.01 15.69 11.61 10.77 15.53 10.07 12.6 32.83 35.83 29.03\n 27.18 22.67 17.82 18.78].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample." 174 | ] 175 | } 176 | ], 177 | "execution_count": 7, 178 | "metadata": {} 179 | }, 180 | { 181 | "cell_type": "code", 182 | "source": [ 183 | "lr.fit(X=tips['total_bill'].values.reshape(-1, 1), y=tips['tip'])" 184 | ], 185 | "outputs": [ 186 | { 187 | "output_type": "execute_result", 188 | "execution_count": 8, 189 | "data": { 190 | "text/plain": [ 191 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 192 | " normalize=False)" 193 | ] 194 | }, 195 | "metadata": {} 196 | } 197 | ], 198 | "execution_count": 8, 199 | "metadata": {} 200 | }, 201 | { 202 | "cell_type": "code", 203 | "source": [ 204 | "lr.coef_" 205 | ], 206 | "outputs": [ 207 | { 208 | "output_type": "execute_result", 209 | "execution_count": 9, 210 | "data": { 211 | "text/plain": [ 212 | "array([0.10502452])" 213 | ] 214 | }, 215 | "metadata": {} 216 | } 217 | ], 218 | "execution_count": 9, 219 | "metadata": {} 220 | }, 221 | { 222 | "cell_type": "code", 223 | "source": [ 224 | "lr.intercept_" 225 | ], 226 | "outputs": [ 227 | { 228 | "output_type": "execute_result", 229 | "execution_count": 10, 230 | "data": { 231 | "text/plain": [ 232 | "0.9202696135546731" 233 | ] 234 | }, 235 | "metadata": {} 236 | } 237 | ], 238 | "execution_count": 10, 239 | "metadata": {} 240 | }, 241 | { 242 | "cell_type": "code", 243 | "source": [ 244 | "lr.fit(X=tips[['total_bill', 'size']], y=tips['tip'])" 245 | ], 246 | "outputs": [ 247 | { 248 | "output_type": "execute_result", 249 | "execution_count": 11, 250 | "data": { 251 | "text/plain": [ 252 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 253 | " normalize=False)" 254 | ] 255 | }, 256 | "metadata": {} 257 | } 258 | ], 259 | "execution_count": 11, 260 | "metadata": {} 261 | }, 262 | { 263 | "cell_type": "code", 264 | "source": [ 265 | "lr.coef_" 266 | ], 267 | "outputs": [ 268 | { 269 | "output_type": "execute_result", 270 | "execution_count": 12, 271 | "data": { 272 | "text/plain": [ 273 | "array([0.09271334, 0.19259779])" 274 | ] 275 | }, 276 | "metadata": {} 277 | } 278 | ], 279 | "execution_count": 12, 280 | "metadata": {} 281 | }, 282 | { 283 | "cell_type": "code", 284 | "source": [ 285 | "tips" 286 | ], 287 | "outputs": [ 288 | { 289 | "output_type": "execute_result", 290 | "execution_count": 13, 291 | "data": { 292 | "text/plain": [ 293 | " total_bill tip sex smoker day time size\n", 294 | "0 16.99 1.01 Female No Sun Dinner 2\n", 295 | "1 10.34 1.66 Male No Sun Dinner 3\n", 296 | "2 21.01 3.50 Male No Sun Dinner 3\n", 297 | "3 23.68 3.31 Male No Sun Dinner 2\n", 298 | "4 24.59 3.61 Female No Sun Dinner 4\n", 299 | "5 25.29 4.71 Male No Sun Dinner 4\n", 300 | "6 8.77 2.00 Male No Sun Dinner 2\n", 301 | "7 26.88 3.12 Male No Sun Dinner 4\n", 302 | "8 15.04 1.96 Male No Sun Dinner 2\n", 303 | "9 14.78 3.23 Male No Sun Dinner 2\n", 304 | "10 10.27 1.71 Male No Sun Dinner 2\n", 305 | "11 35.26 5.00 Female No Sun Dinner 4\n", 306 | "12 15.42 1.57 Male No Sun Dinner 2\n", 307 | "13 18.43 3.00 Male No Sun Dinner 4\n", 308 | "14 14.83 3.02 Female No Sun Dinner 2\n", 309 | "15 21.58 3.92 Male No Sun Dinner 2\n", 310 | "16 10.33 1.67 Female No Sun Dinner 3\n", 311 | "17 16.29 3.71 Male No Sun Dinner 3\n", 312 | "18 16.97 3.50 Female No Sun Dinner 3\n", 313 | "19 20.65 3.35 Male No Sat Dinner 3\n", 314 | "20 17.92 4.08 Male No Sat Dinner 2\n", 315 | "21 20.29 2.75 Female No Sat Dinner 2\n", 316 | "22 15.77 2.23 Female No Sat Dinner 2\n", 317 | "23 39.42 7.58 Male No Sat Dinner 4\n", 318 | "24 19.82 3.18 Male No Sat Dinner 2\n", 319 | "25 17.81 2.34 Male No Sat Dinner 4\n", 320 | "26 13.37 2.00 Male No Sat Dinner 2\n", 321 | "27 12.69 2.00 Male No Sat Dinner 2\n", 322 | "28 21.70 4.30 Male No Sat Dinner 2\n", 323 | "29 19.65 3.00 Female No Sat Dinner 2\n", 324 | ".. ... ... ... ... ... ... ...\n", 325 | "214 28.17 6.50 Female Yes Sat Dinner 3\n", 326 | "215 12.90 1.10 Female Yes Sat Dinner 2\n", 327 | "216 28.15 3.00 Male Yes Sat Dinner 5\n", 328 | "217 11.59 1.50 Male Yes Sat Dinner 2\n", 329 | "218 7.74 1.44 Male Yes Sat Dinner 2\n", 330 | "219 30.14 3.09 Female Yes Sat Dinner 4\n", 331 | "220 12.16 2.20 Male Yes Fri Lunch 2\n", 332 | "221 13.42 3.48 Female Yes Fri Lunch 2\n", 333 | "222 8.58 1.92 Male Yes Fri Lunch 1\n", 334 | "223 15.98 3.00 Female No Fri Lunch 3\n", 335 | "224 13.42 1.58 Male Yes Fri Lunch 2\n", 336 | "225 16.27 2.50 Female Yes Fri Lunch 2\n", 337 | "226 10.09 2.00 Female Yes Fri Lunch 2\n", 338 | "227 20.45 3.00 Male No Sat Dinner 4\n", 339 | "228 13.28 2.72 Male No Sat Dinner 2\n", 340 | "229 22.12 2.88 Female Yes Sat Dinner 2\n", 341 | "230 24.01 2.00 Male Yes Sat Dinner 4\n", 342 | "231 15.69 3.00 Male Yes Sat Dinner 3\n", 343 | "232 11.61 3.39 Male No Sat Dinner 2\n", 344 | "233 10.77 1.47 Male No Sat Dinner 2\n", 345 | "234 15.53 3.00 Male Yes Sat Dinner 2\n", 346 | "235 10.07 1.25 Male No Sat Dinner 2\n", 347 | "236 12.60 1.00 Male Yes Sat Dinner 2\n", 348 | "237 32.83 1.17 Male Yes Sat Dinner 2\n", 349 | "238 35.83 4.67 Female No Sat Dinner 3\n", 350 | "239 29.03 5.92 Male No Sat Dinner 3\n", 351 | "240 27.18 2.00 Female Yes Sat Dinner 2\n", 352 | "241 22.67 2.00 Male Yes Sat Dinner 2\n", 353 | "242 17.82 1.75 Male No Sat Dinner 2\n", 354 | "243 18.78 3.00 Female No Thur Dinner 2\n", 355 | "\n", 356 | "[244 rows x 7 columns]" 357 | ], 358 | "text/html": [ 359 | "
\n", 360 | "\n", 373 | "\n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | "
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4
525.294.71MaleNoSunDinner4
68.772.00MaleNoSunDinner2
726.883.12MaleNoSunDinner4
815.041.96MaleNoSunDinner2
914.783.23MaleNoSunDinner2
1010.271.71MaleNoSunDinner2
1135.265.00FemaleNoSunDinner4
1215.421.57MaleNoSunDinner2
1318.433.00MaleNoSunDinner4
1414.833.02FemaleNoSunDinner2
1521.583.92MaleNoSunDinner2
1610.331.67FemaleNoSunDinner3
1716.293.71MaleNoSunDinner3
1816.973.50FemaleNoSunDinner3
1920.653.35MaleNoSatDinner3
2017.924.08MaleNoSatDinner2
2120.292.75FemaleNoSatDinner2
2215.772.23FemaleNoSatDinner2
2339.427.58MaleNoSatDinner4
2419.823.18MaleNoSatDinner2
2517.812.34MaleNoSatDinner4
2613.372.00MaleNoSatDinner2
2712.692.00MaleNoSatDinner2
2821.704.30MaleNoSatDinner2
2919.653.00FemaleNoSatDinner2
........................
21428.176.50FemaleYesSatDinner3
21512.901.10FemaleYesSatDinner2
21628.153.00MaleYesSatDinner5
21711.591.50MaleYesSatDinner2
2187.741.44MaleYesSatDinner2
21930.143.09FemaleYesSatDinner4
22012.162.20MaleYesFriLunch2
22113.423.48FemaleYesFriLunch2
2228.581.92MaleYesFriLunch1
22315.983.00FemaleNoFriLunch3
22413.421.58MaleYesFriLunch2
22516.272.50FemaleYesFriLunch2
22610.092.00FemaleYesFriLunch2
22720.453.00MaleNoSatDinner4
22813.282.72MaleNoSatDinner2
22922.122.88FemaleYesSatDinner2
23024.012.00MaleYesSatDinner4
23115.693.00MaleYesSatDinner3
23211.613.39MaleNoSatDinner2
23310.771.47MaleNoSatDinner2
23415.533.00MaleYesSatDinner2
23510.071.25MaleNoSatDinner2
23612.601.00MaleYesSatDinner2
23732.831.17MaleYesSatDinner2
23835.834.67FemaleNoSatDinner3
23929.035.92MaleNoSatDinner3
24027.182.00FemaleYesSatDinner2
24122.672.00MaleYesSatDinner2
24217.821.75MaleNoSatDinner2
24318.783.00FemaleNoThurDinner2
\n", 999 | "

244 rows × 7 columns

\n", 1000 | "
" 1001 | ] 1002 | }, 1003 | "metadata": {} 1004 | } 1005 | ], 1006 | "execution_count": 13, 1007 | "metadata": {} 1008 | }, 1009 | { 1010 | "cell_type": "code", 1011 | "source": [ 1012 | "# dummy encoding\n", 1013 | "# one-hot encoding" 1014 | ], 1015 | "outputs": [], 1016 | "execution_count": 14, 1017 | "metadata": {} 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "source": [ 1022 | "lr.fit(X=tips[['total_bill', 'sex']], y=tips['tip'])" 1023 | ], 1024 | "outputs": [ 1025 | { 1026 | "output_type": "error", 1027 | "ename": "ValueError", 1028 | "evalue": "could not convert string to float: 'Female'", 1029 | "traceback": [ 1030 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 1031 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 1032 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_bill'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'sex'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtips\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tip'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 1033 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0mn_jobs_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 458\u001b[0;31m y_numeric=True, multi_output=True)\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 1034 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 754\u001b[0m \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 756\u001b[0;31m estimator=estimator)\n\u001b[0m\u001b[1;32m 757\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 758\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n", 1035 | "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0;31m# make sure we actually converted to numeric:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdtype_numeric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"O\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 567\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 568\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mallow_nd\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 569\u001b[0m raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n", 1036 | "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Female'" 1037 | ] 1038 | } 1039 | ], 1040 | "execution_count": 15, 1041 | "metadata": {} 1042 | }, 1043 | { 1044 | "cell_type": "code", 1045 | "source": [ 1046 | "tips_dummy = tips[['tip', 'total_bill', 'sex']]" 1047 | ], 1048 | "outputs": [], 1049 | "execution_count": 16, 1050 | "metadata": {} 1051 | }, 1052 | { 1053 | "cell_type": "code", 1054 | "source": [ 1055 | "tips_dummy.head()" 1056 | ], 1057 | "outputs": [ 1058 | { 1059 | "output_type": "execute_result", 1060 | "execution_count": 17, 1061 | "data": { 1062 | "text/plain": [ 1063 | " tip total_bill sex\n", 1064 | "0 1.01 16.99 Female\n", 1065 | "1 1.66 10.34 Male\n", 1066 | "2 3.50 21.01 Male\n", 1067 | "3 3.31 23.68 Male\n", 1068 | "4 3.61 24.59 Female" 1069 | ], 1070 | "text/html": [ 1071 | "
\n", 1072 | "\n", 1085 | "\n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | "
tiptotal_billsex
01.0116.99Female
11.6610.34Male
23.5021.01Male
33.3123.68Male
43.6124.59Female
\n", 1127 | "
" 1128 | ] 1129 | }, 1130 | "metadata": {} 1131 | } 1132 | ], 1133 | "execution_count": 17, 1134 | "metadata": {} 1135 | }, 1136 | { 1137 | "cell_type": "code", 1138 | "source": [ 1139 | "tips_dummy = pd.get_dummies(tips_dummy, drop_first=True)" 1140 | ], 1141 | "outputs": [], 1142 | "execution_count": 18, 1143 | "metadata": {} 1144 | }, 1145 | { 1146 | "cell_type": "code", 1147 | "source": [ 1148 | "tips_dummy.head()" 1149 | ], 1150 | "outputs": [ 1151 | { 1152 | "output_type": "execute_result", 1153 | "execution_count": 19, 1154 | "data": { 1155 | "text/plain": [ 1156 | " tip total_bill sex_Female\n", 1157 | "0 1.01 16.99 1\n", 1158 | "1 1.66 10.34 0\n", 1159 | "2 3.50 21.01 0\n", 1160 | "3 3.31 23.68 0\n", 1161 | "4 3.61 24.59 1" 1162 | ], 1163 | "text/html": [ 1164 | "
\n", 1165 | "\n", 1178 | "\n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | "
tiptotal_billsex_Female
01.0116.991
11.6610.340
23.5021.010
33.3123.680
43.6124.591
\n", 1220 | "
" 1221 | ] 1222 | }, 1223 | "metadata": {} 1224 | } 1225 | ], 1226 | "execution_count": 19, 1227 | "metadata": {} 1228 | }, 1229 | { 1230 | "cell_type": "code", 1231 | "source": [ 1232 | "lr.fit(X=tips_dummy.iloc[:, 1:], y=tips_dummy.iloc[:, 0])" 1233 | ], 1234 | "outputs": [ 1235 | { 1236 | "output_type": "execute_result", 1237 | "execution_count": 20, 1238 | "data": { 1239 | "text/plain": [ 1240 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 1241 | " normalize=False)" 1242 | ] 1243 | }, 1244 | "metadata": {} 1245 | } 1246 | ], 1247 | "execution_count": 20, 1248 | "metadata": {} 1249 | }, 1250 | { 1251 | "cell_type": "code", 1252 | "source": [ 1253 | "tip_money = lr.coef_[0]" 1254 | ], 1255 | "outputs": [], 1256 | "execution_count": 21, 1257 | "metadata": {} 1258 | }, 1259 | { 1260 | "cell_type": "code", 1261 | "source": [ 1262 | "tip_money" 1263 | ], 1264 | "outputs": [ 1265 | { 1266 | "output_type": "execute_result", 1267 | "execution_count": 22, 1268 | "data": { 1269 | "text/plain": [ 1270 | "0.10523235686615456" 1271 | ] 1272 | }, 1273 | "metadata": {} 1274 | } 1275 | ], 1276 | "execution_count": 22, 1277 | "metadata": {} 1278 | }, 1279 | { 1280 | "cell_type": "code", 1281 | "source": [], 1282 | "outputs": [], 1283 | "execution_count": 24, 1284 | "metadata": {} 1285 | } 1286 | ], 1287 | "metadata": { 1288 | "kernelspec": { 1289 | "name": "python3", 1290 | "language": "python", 1291 | "display_name": "Python 3" 1292 | }, 1293 | "language_info": { 1294 | "name": "python", 1295 | "version": "3.7.3", 1296 | "mimetype": "text/x-python", 1297 | "codemirror_mode": { 1298 | "name": "ipython", 1299 | "version": 3 1300 | }, 1301 | "pygments_lexer": "ipython3", 1302 | "nbconvert_exporter": "python", 1303 | "file_extension": ".py" 1304 | }, 1305 | "kernel_info": { 1306 | "name": "python3" 1307 | }, 1308 | "nteract": { 1309 | "version": "0.14.3" 1310 | } 1311 | }, 1312 | "nbformat": 4, 1313 | "nbformat_minor": 2 1314 | } -------------------------------------------------------------------------------- /test_installation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import sklearn as sk 4 | --------------------------------------------------------------------------------