├── .gitignore ├── 01_Introduction.ipynb ├── 02_Read_Create_Data.ipynb ├── 03_exploring_data.ipynb ├── 04_subsetting_data.ipynb ├── 05_aggregating_data.ipynb ├── 06_data_plotting.ipynb ├── 07_data_manipulation.ipynb ├── 08_missing_data.ipynb ├── 09_extra_tips.ipynb ├── 10_capstone_project.ipynb ├── LICENSE ├── README.md ├── data ├── wine-reviews │ ├── exercise_2.1.txt │ ├── winemag-data-130k-v2.csv │ └── winemag-data-130k-v2_tiny.json └── world-happiness │ ├── 2015.csv │ ├── 2016.csv │ └── 2017.csv ├── environment.yml └── solutions ├── 02_read_create_data_solutions.ipynb ├── 03_exploring_data_solutions.ipynb ├── 04_subsetting_data_solutions.ipynb ├── 05_aggregating_data_solutions.ipynb ├── 06_data_plotting_solutions.ipynb ├── 07_data_manipulation_solutions-Copy1.ipynb ├── 08_missing_data_solutions.ipynb ├── 09_extra_tips_solutions.ipynb └── 10_capstone_project_solutions.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /01_Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Data Used\n", 8 | "The data used has been sourced from the following:\n", 9 | "- https://www.kaggle.com/zynicide/wine-reviews\n", 10 | "- https://www.kaggle.com/unsdsn/world-happiness\n", 11 | "\n", 12 | "in some cases it has been modified for size/training purposes" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## Useful Links\n", 20 | "### [Installation](https://pandas.pydata.org/pandas-docs/stable/install.html): Official Pandas Installation Guide\n", 21 | "### [Basic Python](https://www.kaggle.com/learn/python): Free Python introductory course with tutorial and exercises\n", 22 | "### [Basic Jupyter Notebook](https://dzone.com/articles/getting-started-with-jupyterlab): Introductory tutorial on the use of Jupyter Lab\n", 23 | "### [Pandas cheatsheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf): Very useful reference guide to the main features of pandas" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Library version\n", 31 | "The library version used for this tutorial (and it's dependencies) are the followings\n", 32 | "\n", 33 | "The environment specifications can be found in `environment.yml`" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": { 40 | "collapsed": true, 41 | "jupyter": { 42 | "outputs_hidden": true 43 | } 44 | }, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "\n", 51 | "INSTALLED VERSIONS\n", 52 | "------------------\n", 53 | "commit : None\n", 54 | "python : 3.7.4.final.0\n", 55 | "python-bits : 64\n", 56 | "OS : Windows\n", 57 | "OS-release : 10\n", 58 | "machine : AMD64\n", 59 | "processor : Intel64 Family 6 Model 58 Stepping 9, GenuineIntel\n", 60 | "byteorder : little\n", 61 | "LC_ALL : None\n", 62 | "LANG : None\n", 63 | "LOCALE : None.None\n", 64 | "\n", 65 | "pandas : 0.25.0\n", 66 | "numpy : 1.16.4\n", 67 | "pytz : 2019.3\n", 68 | "dateutil : 2.8.1\n", 69 | "pip : 19.3.1\n", 70 | "setuptools : 41.6.0.post20191030\n", 71 | "Cython : None\n", 72 | "pytest : 5.2.4\n", 73 | "hypothesis : None\n", 74 | "sphinx : None\n", 75 | "blosc : None\n", 76 | "feather : None\n", 77 | "xlsxwriter : None\n", 78 | "lxml.etree : None\n", 79 | "html5lib : None\n", 80 | "pymysql : None\n", 81 | "psycopg2 : None\n", 82 | "jinja2 : 2.10.3\n", 83 | "IPython : 7.9.0\n", 84 | "pandas_datareader: None\n", 85 | "bs4 : 4.8.0\n", 86 | "bottleneck : 1.2.1\n", 87 | "fastparquet : None\n", 88 | "gcsfs : None\n", 89 | "lxml.etree : None\n", 90 | "matplotlib : 3.1.1\n", 91 | "numexpr : 2.7.0\n", 92 | "odfpy : None\n", 93 | "openpyxl : None\n", 94 | "pandas_gbq : None\n", 95 | "pyarrow : 0.15.1\n", 96 | "pytables : None\n", 97 | "s3fs : None\n", 98 | "scipy : 1.3.1\n", 99 | "sqlalchemy : None\n", 100 | "tables : None\n", 101 | "xarray : None\n", 102 | "xlrd : None\n", 103 | "xlwt : None\n", 104 | "xlsxwriter : None\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "import pandas as pd\n", 110 | "pd.show_versions()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "DataScience", 124 | "language": "python", 125 | "name": "ds_env" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.7.4" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 4 142 | } 143 | -------------------------------------------------------------------------------- /02_Read_Create_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Importing the library" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Reading in data\n", 24 | "To load data into python use pandas's ```read_..``` methods. There are plenty of read methods for different file formats.\n", 25 | "\n", 26 | "For example, loading data from excel..\n", 27 | "```python\n", 28 | "excel_file = pd.ExcelFile('path_to_excel.xlsx')\n", 29 | "df = pd.read_excel(excel_file, 'Sheet1')\n", 30 | "```\n", 31 | "for other files try typing ```pd.read_``` and then hit the \\ key" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "**NOTE**: be careful when using the `\\` (backslash) as path separator (default in Windows), as it is used to \"escape\" characters to give them new meaning, e.g. `\\n` means new line.\n", 39 | "If you need to use backslash, put a 'r' in front of the string to have python read it \"raw\"." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/html": [ 50 | "
\n", 51 | "\n", 64 | "\n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | "
countrydescriptiondesignationpointspriceprovinceregion_1region_2taster_nametaster_twitter_handletitlevarietywinery
0ItalyAromas include tropical fruit, broom, brimston...Vulkà Bianco87NaNSicily & SardiniaEtnaNoneKerin O’Keefe@kerinokeefeNicosia 2013 Vulkà Bianco (Etna)White BlendNicosia
1PortugalThis is ripe and fruity, a wine that is smooth...Avidagos8715.0DouroNoneNoneRoger Voss@vossrogerQuinta dos Avidagos 2011 Avidagos Red (Douro)Portuguese RedQuinta dos Avidagos
10USSoft, supple plum envelopes an oaky structure ...Mountain Cuvée8719.0CaliforniaNapa ValleyNapaVirginie Boone@vbooneKirkland Signature 2011 Mountain Cuvée Caberne...Cabernet SauvignonKirkland Signature
100USFresh apple, lemon and pear flavors are accent...None8818.0New YorkFinger LakesFinger LakesAnna Lee C. IijimaNoneVentosa 2015 Pinot Gris (Finger Lakes)Pinot GrisVentosa
101USDusty mineral, smoke and struck flint lend a s...Red Oak Vineyard8720.0New YorkFinger LakesFinger LakesAnna Lee C. IijimaNoneLamoreaux Landing 2014 Red Oak Vineyard Riesli...RieslingLamoreaux Landing
102USIntensely smoky tones of struck flint and ash ...Yellow Dog Vineyard8720.0New YorkFinger LakesFinger LakesAnna Lee C. IijimaNoneLamoreaux Landing 2014 Yellow Dog Vineyard Rie...RieslingLamoreaux Landing
103ChileA bright nose with green apple and citric arom...Single Vineyard Falaris Hill8718.0Leyda ValleyNoneNoneMichael Schachner@wineschachLeyda 2015 Single Vineyard Falaris Hill Chardo...ChardonnayLeyda
104ItalyMade with 65% Sangiovese, 20% Merlot and 15% C...Nativo8716.0TuscanyToscanaNoneKerin O’Keefe@kerinokeefeMadonna Alta 2014 Nativo Red (Toscana)Red BlendMadonna Alta
105ItalyMade predominantly with Trebbiano and Malvasia...Villa Antinori8714.0TuscanyToscanaNoneKerin O’Keefe@kerinokeefeMarchesi Antinori 2015 Villa Antinori White (T...White BlendMarchesi Antinori
106ItalyA blend of Cabernet Sauvignon, Merlot, Caberne...Castiglioni8730.0TuscanyToscanaNoneKerin O’Keefe@kerinokeefeMarchesi de' Frescobaldi 2014 Castiglioni Red ...Red BlendMarchesi de' Frescobaldi
107ItalyAromas of yellow stone fruit, white spring flo...Ammiraglia Massovivo8718.0TuscanyToscanaNoneKerin O’Keefe@kerinokeefeMarchesi de' Frescobaldi 2015 Ammiraglia Masso...VermentinoMarchesi de' Frescobaldi
108USLots of spearmint, coyote mint, hot licorice, ...J.D. Hurley8726.0CaliforniaSanta Clara ValleyCentral CoastMatt Kettmann@mattkettmannMartin Ranch 2014 J.D. Hurley Zinfandel (Santa...ZinfandelMartin Ranch
109ItalyAn easy-drinking blend of Merlot, Sangiovese a...Le Volte8730.0TuscanyToscanaNoneKerin O’Keefe@kerinokeefeOrnellaia 2014 Le Volte Red (Toscana)Red BlendOrnellaia
11FranceThis is a dry wine, very spicy, with a tight, ...None8730.0AlsaceAlsaceNoneRoger Voss@vossrogerLeon Beyer 2012 Gewurztraminer (Alsace)GewürztraminerLeon Beyer
110FranceProduced from cru vines at the base of Mount B...Les Quartelets8723.0BeaujolaisBrouillyNoneRoger Voss@vossrogerPardon et Fils 2015 Les Quartelets (Brouilly)GamayPardon et Fils
\n", 326 | "
" 327 | ], 328 | "text/plain": [ 329 | " country description \\\n", 330 | "0 Italy Aromas include tropical fruit, broom, brimston... \n", 331 | "1 Portugal This is ripe and fruity, a wine that is smooth... \n", 332 | "10 US Soft, supple plum envelopes an oaky structure ... \n", 333 | "100 US Fresh apple, lemon and pear flavors are accent... \n", 334 | "101 US Dusty mineral, smoke and struck flint lend a s... \n", 335 | "102 US Intensely smoky tones of struck flint and ash ... \n", 336 | "103 Chile A bright nose with green apple and citric arom... \n", 337 | "104 Italy Made with 65% Sangiovese, 20% Merlot and 15% C... \n", 338 | "105 Italy Made predominantly with Trebbiano and Malvasia... \n", 339 | "106 Italy A blend of Cabernet Sauvignon, Merlot, Caberne... \n", 340 | "107 Italy Aromas of yellow stone fruit, white spring flo... \n", 341 | "108 US Lots of spearmint, coyote mint, hot licorice, ... \n", 342 | "109 Italy An easy-drinking blend of Merlot, Sangiovese a... \n", 343 | "11 France This is a dry wine, very spicy, with a tight, ... \n", 344 | "110 France Produced from cru vines at the base of Mount B... \n", 345 | "\n", 346 | " designation points price province \\\n", 347 | "0 Vulkà Bianco 87 NaN Sicily & Sardinia \n", 348 | "1 Avidagos 87 15.0 Douro \n", 349 | "10 Mountain Cuvée 87 19.0 California \n", 350 | "100 None 88 18.0 New York \n", 351 | "101 Red Oak Vineyard 87 20.0 New York \n", 352 | "102 Yellow Dog Vineyard 87 20.0 New York \n", 353 | "103 Single Vineyard Falaris Hill 87 18.0 Leyda Valley \n", 354 | "104 Nativo 87 16.0 Tuscany \n", 355 | "105 Villa Antinori 87 14.0 Tuscany \n", 356 | "106 Castiglioni 87 30.0 Tuscany \n", 357 | "107 Ammiraglia Massovivo 87 18.0 Tuscany \n", 358 | "108 J.D. Hurley 87 26.0 California \n", 359 | "109 Le Volte 87 30.0 Tuscany \n", 360 | "11 None 87 30.0 Alsace \n", 361 | "110 Les Quartelets 87 23.0 Beaujolais \n", 362 | "\n", 363 | " region_1 region_2 taster_name \\\n", 364 | "0 Etna None Kerin O’Keefe \n", 365 | "1 None None Roger Voss \n", 366 | "10 Napa Valley Napa Virginie Boone \n", 367 | "100 Finger Lakes Finger Lakes Anna Lee C. Iijima \n", 368 | "101 Finger Lakes Finger Lakes Anna Lee C. Iijima \n", 369 | "102 Finger Lakes Finger Lakes Anna Lee C. Iijima \n", 370 | "103 None None Michael Schachner \n", 371 | "104 Toscana None Kerin O’Keefe \n", 372 | "105 Toscana None Kerin O’Keefe \n", 373 | "106 Toscana None Kerin O’Keefe \n", 374 | "107 Toscana None Kerin O’Keefe \n", 375 | "108 Santa Clara Valley Central Coast Matt Kettmann \n", 376 | "109 Toscana None Kerin O’Keefe \n", 377 | "11 Alsace None Roger Voss \n", 378 | "110 Brouilly None Roger Voss \n", 379 | "\n", 380 | " taster_twitter_handle title \\\n", 381 | "0 @kerinokeefe Nicosia 2013 Vulkà Bianco (Etna) \n", 382 | "1 @vossroger Quinta dos Avidagos 2011 Avidagos Red (Douro) \n", 383 | "10 @vboone Kirkland Signature 2011 Mountain Cuvée Caberne... \n", 384 | "100 None Ventosa 2015 Pinot Gris (Finger Lakes) \n", 385 | "101 None Lamoreaux Landing 2014 Red Oak Vineyard Riesli... \n", 386 | "102 None Lamoreaux Landing 2014 Yellow Dog Vineyard Rie... \n", 387 | "103 @wineschach Leyda 2015 Single Vineyard Falaris Hill Chardo... \n", 388 | "104 @kerinokeefe Madonna Alta 2014 Nativo Red (Toscana) \n", 389 | "105 @kerinokeefe Marchesi Antinori 2015 Villa Antinori White (T... \n", 390 | "106 @kerinokeefe Marchesi de' Frescobaldi 2014 Castiglioni Red ... \n", 391 | "107 @kerinokeefe Marchesi de' Frescobaldi 2015 Ammiraglia Masso... \n", 392 | "108 @mattkettmann Martin Ranch 2014 J.D. Hurley Zinfandel (Santa... \n", 393 | "109 @kerinokeefe Ornellaia 2014 Le Volte Red (Toscana) \n", 394 | "11 @vossroger Leon Beyer 2012 Gewurztraminer (Alsace) \n", 395 | "110 @vossroger Pardon et Fils 2015 Les Quartelets (Brouilly) \n", 396 | "\n", 397 | " variety winery \n", 398 | "0 White Blend Nicosia \n", 399 | "1 Portuguese Red Quinta dos Avidagos \n", 400 | "10 Cabernet Sauvignon Kirkland Signature \n", 401 | "100 Pinot Gris Ventosa \n", 402 | "101 Riesling Lamoreaux Landing \n", 403 | "102 Riesling Lamoreaux Landing \n", 404 | "103 Chardonnay Leyda \n", 405 | "104 Red Blend Madonna Alta \n", 406 | "105 White Blend Marchesi Antinori \n", 407 | "106 Red Blend Marchesi de' Frescobaldi \n", 408 | "107 Vermentino Marchesi de' Frescobaldi \n", 409 | "108 Zinfandel Martin Ranch \n", 410 | "109 Red Blend Ornellaia \n", 411 | "11 Gewürztraminer Leon Beyer \n", 412 | "110 Gamay Pardon et Fils " 413 | ] 414 | }, 415 | "execution_count": 2, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "pd.read_json(r'data\\wine-reviews\\winemag-data-130k-v2_tiny.json')" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "### Additional options\n", 429 | "All the read methods have additional useful options.\n", 430 | "\n", 431 | "The main ones for `read_csv` (some of which are in common with other `read_..` methods): \n", 432 | "- `sep`: defines the type of column separator\n", 433 | "- `index_col`: defines which column should be read as the index (row label)\n", 434 | "- `usecols`: allows reading only a subset of the columns\n", 435 | "- `skiprows` and `skipfooter`: allows ignoring of initial/final rows\n", 436 | "- `nrows`: allows reading of a limited amount of rows\n", 437 | "- `na_values`: defines the values to be treated as null\n", 438 | "- `parse_dates`: defines the columns that contain dates to be parsed\n", 439 | "- `dayfirst`: defines that the day comes before the month in the date to be parsed\n", 440 | "- `thousands`: defines the thousands numeric separator\n", 441 | "- `decimal`: defines the decimal numeric separator\n", 442 | "- `encoding`: defines the type of encoding to be used, default utf-8.\n", 443 | "\n", 444 | "for more information consult the corresponding [documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html).\n", 445 | "\n", 446 | "**TIP**: if you get a `UnicodeDecodeError: 'utf-8' codec` error, try adding `encoding='latin'` to the read options, that usually solves the issue." 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 3, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/html": [ 457 | "
\n", 458 | "\n", 471 | "\n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | "
countrydesignationprice
wine_id
0ItalyVulkà BiancoNaN
1PortugalAvidagos15.0
2USNaN14.0
3USReserve Late Harvest13.0
4USVintner's Reserve Wild Child Block65.0
\n", 519 | "
" 520 | ], 521 | "text/plain": [ 522 | " country designation price\n", 523 | "wine_id \n", 524 | "0 Italy Vulkà Bianco NaN\n", 525 | "1 Portugal Avidagos 15.0\n", 526 | "2 US NaN 14.0\n", 527 | "3 US Reserve Late Harvest 13.0\n", 528 | "4 US Vintner's Reserve Wild Child Block 65.0" 529 | ] 530 | }, 531 | "execution_count": 3, 532 | "metadata": {}, 533 | "output_type": "execute_result" 534 | } 535 | ], 536 | "source": [ 537 | "pd.read_csv(\n", 538 | " 'data/wine-reviews/winemag-data-130k-v2.csv',\n", 539 | " usecols=['wine_id', 'country', 'designation', 'price'],\n", 540 | " index_col='wine_id',\n", 541 | " nrows=5,\n", 542 | ")" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": {}, 548 | "source": [ 549 | "### ***EXERCISE 2.1***\n", 550 | "Using the pandas documentation, find a way to read the file `data/wine-reviews/exercise_2.1.txt` in the following way:\n", 551 | "- read the file as tab-separated\n", 552 | "- parse the date in the `date of rating` column, in day/month format\n", 553 | "- set `country` as the index\n", 554 | "- skip the 3rd and 4th rows" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 4, 560 | "metadata": {}, 561 | "outputs": [], 562 | "source": [ 563 | "# insert solution here" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "# Creating Data\n", 571 | "In order to creating a pd.DataFrame from data we have to follow the contructor specifications.\n", 572 | "\n", 573 | "In general they can be created using:\n", 574 | "- dictionary\n", 575 | "- iterable (np.array, list, pd.Series..)" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 5, 581 | "metadata": {}, 582 | "outputs": [ 583 | { 584 | "data": { 585 | "text/html": [ 586 | "
\n", 587 | "\n", 600 | "\n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | "
NameCatsDogs
0Bob21
1Mary02
\n", 624 | "
" 625 | ], 626 | "text/plain": [ 627 | " Name Cats Dogs\n", 628 | "0 Bob 2 1\n", 629 | "1 Mary 0 2" 630 | ] 631 | }, 632 | "execution_count": 5, 633 | "metadata": {}, 634 | "output_type": "execute_result" 635 | } 636 | ], 637 | "source": [ 638 | "# using dictionary\n", 639 | "data = {'Name': ['Bob', 'Mary'], 'Cats':[2,0], 'Dogs':[1,2]}\n", 640 | "\n", 641 | "pd.DataFrame(data)" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 6, 647 | "metadata": {}, 648 | "outputs": [ 649 | { 650 | "data": { 651 | "text/html": [ 652 | "
\n", 653 | "\n", 666 | "\n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | "
NameCatsDogs
0Bob21
1Mary02
\n", 690 | "
" 691 | ], 692 | "text/plain": [ 693 | " Name Cats Dogs\n", 694 | "0 Bob 2 1\n", 695 | "1 Mary 0 2" 696 | ] 697 | }, 698 | "execution_count": 6, 699 | "metadata": {}, 700 | "output_type": "execute_result" 701 | } 702 | ], 703 | "source": [ 704 | "# using 2D iterable\n", 705 | "data = [['Bob', 2, 1], ['Mary', 0, 2]]\n", 706 | "labels = ['Name','Cats','Dogs'] \n", 707 | "\n", 708 | "pd.DataFrame(data, columns=labels)" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "### ***EXERCISE 2.2***\n", 716 | "Create a DataFrame like the following:\n", 717 | "\n", 718 | "| \t| r \t| g \t| b \t| hex \t|\n", 719 | "|--------\t|-----\t|-----\t|----\t|---------\t|\n", 720 | "| blue \t| 0 \t| 0 \t| 1 \t| #0000ff \t|\n", 721 | "| olive \t| 85 \t| 107 \t| 47 \t| #556B2F \t|\n", 722 | "| sienna \t| 160 \t| 82 \t| 45 \t| #A0522D \t|" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 7, 728 | "metadata": {}, 729 | "outputs": [], 730 | "source": [ 731 | "# insert solution here" 732 | ] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": null, 737 | "metadata": {}, 738 | "outputs": [], 739 | "source": [] 740 | } 741 | ], 742 | "metadata": { 743 | "kernelspec": { 744 | "display_name": "Python 3", 745 | "language": "python", 746 | "name": "python3" 747 | }, 748 | "language_info": { 749 | "codemirror_mode": { 750 | "name": "ipython", 751 | "version": 3 752 | }, 753 | "file_extension": ".py", 754 | "mimetype": "text/x-python", 755 | "name": "python", 756 | "nbconvert_exporter": "python", 757 | "pygments_lexer": "ipython3", 758 | "version": "3.7.3" 759 | } 760 | }, 761 | "nbformat": 4, 762 | "nbformat_minor": 4 763 | } 764 | -------------------------------------------------------------------------------- /08_missing_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# **Handling Missing Data**\n", 18 | "Pandas has convenient methods to check, remove, fill missing data.\n", 19 | "It also ignore missing data when performing main operation such as mean, sum etc.\n", 20 | "\n", 21 | "To signal missing data, pandas uses the NaN object of numpy, the numerical library it depends on. So in order to set a value as NaN we need to import it, conventionally as \"np\"." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | "
abcd
01.04.0812.0
12.0NaN9NaN
2NaN6.010NaN
3NaNNaN1115.0
\n", 87 | "
" 88 | ], 89 | "text/plain": [ 90 | " a b c d\n", 91 | "0 1.0 4.0 8 12.0\n", 92 | "1 2.0 NaN 9 NaN\n", 93 | "2 NaN 6.0 10 NaN\n", 94 | "3 NaN NaN 11 15.0" 95 | ] 96 | }, 97 | "execution_count": 2, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "df = pd.DataFrame({\n", 104 | " 'a': [1,2,np.nan,np.nan], \n", 105 | " 'b':[4, np.nan, 6, np.nan],\n", 106 | " 'c':[8, 9, 10, 11],\n", 107 | " 'd':[12, np.nan, np.nan, 15],\n", 108 | "})\n", 109 | "df" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Detecting missing data\n", 117 | "The method `.isna` (or `.isnull` in the older versions of pandas) can be used to return a mask that is True where the data is missing. To get the opposit mask, use `.notna` (or `.notnull`)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 3, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/html": [ 128 | "
\n", 129 | "\n", 142 | "\n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | "
abcd
0FalseFalseFalseFalse
1FalseTrueFalseTrue
2TrueFalseFalseTrue
3TrueTrueFalseFalse
\n", 183 | "
" 184 | ], 185 | "text/plain": [ 186 | " a b c d\n", 187 | "0 False False False False\n", 188 | "1 False True False True\n", 189 | "2 True False False True\n", 190 | "3 True True False False" 191 | ] 192 | }, 193 | "execution_count": 3, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "df.isna()" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 4, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/html": [ 210 | "
\n", 211 | "\n", 224 | "\n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | "
abcd
0TrueTrueTrueTrue
1TrueFalseTrueFalse
2FalseTrueTrueFalse
3FalseFalseTrueTrue
\n", 265 | "
" 266 | ], 267 | "text/plain": [ 268 | " a b c d\n", 269 | "0 True True True True\n", 270 | "1 True False True False\n", 271 | "2 False True True False\n", 272 | "3 False False True True" 273 | ] 274 | }, 275 | "execution_count": 4, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "df.notna()" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### Removing missing data\n", 289 | "The method `dropna` handles removal of missing data in one of the following way:\n", 290 | " - how='any': (default) removes all rows that have even just one missing data in any column\n", 291 | " - how='all': removes the row only if all the columns contain missing data\n", 292 | "\n", 293 | "The method returns an edited copy of the data." 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 5, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/html": [ 304 | "
\n", 305 | "\n", 318 | "\n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | "
abcd
01.04.0812.0
\n", 338 | "
" 339 | ], 340 | "text/plain": [ 341 | " a b c d\n", 342 | "0 1.0 4.0 8 12.0" 343 | ] 344 | }, 345 | "execution_count": 5, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "df.dropna()\n", 352 | "# same as: df.dropna(how='any')" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 6, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/html": [ 363 | "
\n", 364 | "\n", 377 | "\n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | "
abcd
01.04.0812.0
12.0NaN9NaN
2NaN6.010NaN
3NaNNaN1115.0
\n", 418 | "
" 419 | ], 420 | "text/plain": [ 421 | " a b c d\n", 422 | "0 1.0 4.0 8 12.0\n", 423 | "1 2.0 NaN 9 NaN\n", 424 | "2 NaN 6.0 10 NaN\n", 425 | "3 NaN NaN 11 15.0" 426 | ] 427 | }, 428 | "execution_count": 6, 429 | "metadata": {}, 430 | "output_type": "execute_result" 431 | } 432 | ], 433 | "source": [ 434 | "df.dropna(how='all')" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "If you want to remove columns instead of rows the axis argument has to be set accordingly: `axis='columns'`" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 7, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/html": [ 452 | "
\n", 453 | "\n", 466 | "\n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | "
c
08
19
210
311
\n", 492 | "
" 493 | ], 494 | "text/plain": [ 495 | " c\n", 496 | "0 8\n", 497 | "1 9\n", 498 | "2 10\n", 499 | "3 11" 500 | ] 501 | }, 502 | "execution_count": 7, 503 | "metadata": {}, 504 | "output_type": "execute_result" 505 | } 506 | ], 507 | "source": [ 508 | "df.dropna(axis='columns')" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "The optional `subset` argument can be used to specify a subset of columns to focus the search of missing data on." 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 8, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/html": [ 526 | "
\n", 527 | "\n", 540 | "\n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | "
abcd
01.04.0812.0
3NaNNaN1115.0
\n", 567 | "
" 568 | ], 569 | "text/plain": [ 570 | " a b c d\n", 571 | "0 1.0 4.0 8 12.0\n", 572 | "3 NaN NaN 11 15.0" 573 | ] 574 | }, 575 | "execution_count": 8, 576 | "metadata": {}, 577 | "output_type": "execute_result" 578 | } 579 | ], 580 | "source": [ 581 | "df.dropna(how='any', subset=['c', 'd'])" 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "metadata": {}, 587 | "source": [ 588 | "### Replacing missing data\n", 589 | "`fillna` can be used as a convenient way to replace missing data in a DataFrame.\n", 590 | "\n", 591 | "The method returns an edited copy of the data." 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 9, 597 | "metadata": {}, 598 | "outputs": [ 599 | { 600 | "data": { 601 | "text/html": [ 602 | "
\n", 603 | "\n", 616 | "\n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | "
abcd
01.04.0812.0
12.0-999.09-999.0
2-999.06.010-999.0
3-999.0-999.01115.0
\n", 657 | "
" 658 | ], 659 | "text/plain": [ 660 | " a b c d\n", 661 | "0 1.0 4.0 8 12.0\n", 662 | "1 2.0 -999.0 9 -999.0\n", 663 | "2 -999.0 6.0 10 -999.0\n", 664 | "3 -999.0 -999.0 11 15.0" 665 | ] 666 | }, 667 | "execution_count": 9, 668 | "metadata": {}, 669 | "output_type": "execute_result" 670 | } 671 | ], 672 | "source": [ 673 | "df.fillna(-999)" 674 | ] 675 | }, 676 | { 677 | "cell_type": "markdown", 678 | "metadata": {}, 679 | "source": [ 680 | "Instead of a value, a method can be used to fill the missing data. The available methods are:\n", 681 | "- `pad` or `ffill`: propagate last valid observation forward to next valid\n", 682 | "- `backfill` or `bfill`: use next valid observation to fill gap." 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 10, 688 | "metadata": {}, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/html": [ 693 | "
\n", 694 | "\n", 707 | "\n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | "
abcd
01.04.0812.0
12.04.0912.0
22.06.01012.0
32.06.01115.0
\n", 748 | "
" 749 | ], 750 | "text/plain": [ 751 | " a b c d\n", 752 | "0 1.0 4.0 8 12.0\n", 753 | "1 2.0 4.0 9 12.0\n", 754 | "2 2.0 6.0 10 12.0\n", 755 | "3 2.0 6.0 11 15.0" 756 | ] 757 | }, 758 | "execution_count": 10, 759 | "metadata": {}, 760 | "output_type": "execute_result" 761 | } 762 | ], 763 | "source": [ 764 | "df.fillna(method='ffill')" 765 | ] 766 | }, 767 | { 768 | "cell_type": "markdown", 769 | "metadata": {}, 770 | "source": [ 771 | "### ***EXERCISE 8.1***\n", 772 | "Prove that main pandas operation ignore missing data by creating a copy of the following `s1` Series provided, where all NaN have been removed. Name the amended copy `s2`.\n", 773 | "Compare the `.mean()` results of the two Series.\n", 774 | "\n", 775 | "***HINT***: the same methods used for the dataframes above work for Series" 776 | ] 777 | }, 778 | { 779 | "cell_type": "code", 780 | "execution_count": 11, 781 | "metadata": {}, 782 | "outputs": [], 783 | "source": [ 784 | "s1 = pd.Series([1,2,np.nan,4])\n", 785 | "# insert solution here" 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": {}, 791 | "source": [ 792 | "### ***EXERCISE 8.2***\n", 793 | "Get the sum total of all the values in the `df` provided, only including rows where either 'quality1' or quality2' is not missing." 794 | ] 795 | }, 796 | { 797 | "cell_type": "code", 798 | "execution_count": 12, 799 | "metadata": {}, 800 | "outputs": [], 801 | "source": [ 802 | "df = pd.DataFrame({\n", 803 | " 'quality1': [100,92,30,np.nan,np.nan,15], \n", 804 | " 'value':[7,4,8,1,9,2],\n", 805 | " 'quality2': [89,88,np.nan,np.nan,1,100], \n", 806 | "})\n", 807 | "# insert solution here" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [] 816 | } 817 | ], 818 | "metadata": { 819 | "kernelspec": { 820 | "display_name": "DataScience", 821 | "language": "python", 822 | "name": "ds_env" 823 | }, 824 | "language_info": { 825 | "codemirror_mode": { 826 | "name": "ipython", 827 | "version": 3 828 | }, 829 | "file_extension": ".py", 830 | "mimetype": "text/x-python", 831 | "name": "python", 832 | "nbconvert_exporter": "python", 833 | "pygments_lexer": "ipython3", 834 | "version": "3.7.4" 835 | } 836 | }, 837 | "nbformat": 4, 838 | "nbformat_minor": 4 839 | } 840 | -------------------------------------------------------------------------------- /09_extra_tips.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# **Extra Tips**\n", 17 | "This notebook will be a mix of extra useful functionalities that are good to know to work more effectively.\n", 18 | "- .str .dt methods\n", 19 | "- pd.set/reset_option (search from pd.describe_option())\n", 20 | "- styling tables\n", 21 | "- MultiIndex" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## String and DateTime methods\n", 29 | "Pandas provides a way to use string and datetime methods on Series as if it was a single element:\n", 30 | "- To use string methods use `.str` between the Series and the method.\n", 31 | "- To use datetime methods use `.dt` between the Series and the method.\n", 32 | "\n", 33 | "This is very handy when wanting to create a derived column." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | "
mydatemystr
02020-01-01 00:00:00string_0
12020-01-01 06:00:00string_1
22020-01-01 12:00:00string_2
32020-01-01 18:00:00string_3
42020-01-02 00:00:00string_4
52020-01-02 06:00:00string_5
\n", 99 | "
" 100 | ], 101 | "text/plain": [ 102 | " mydate mystr\n", 103 | "0 2020-01-01 00:00:00 string_0\n", 104 | "1 2020-01-01 06:00:00 string_1\n", 105 | "2 2020-01-01 12:00:00 string_2\n", 106 | "3 2020-01-01 18:00:00 string_3\n", 107 | "4 2020-01-02 00:00:00 string_4\n", 108 | "5 2020-01-02 06:00:00 string_5" 109 | ] 110 | }, 111 | "execution_count": 2, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "df = pd.DataFrame({\n", 118 | " 'mydate': pd.date_range(start='2020-01-01', periods=6, freq='6H'), \n", 119 | " 'mystr':[f'string_{i}' for i in range(6)],\n", 120 | "})\n", 121 | "\n", 122 | "df" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 3, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/html": [ 133 | "
\n", 134 | "\n", 147 | "\n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
mydatemystritem_idhourday
02020-01-01 00:00:00string_0#001
12020-01-01 06:00:00string_1#161
22020-01-01 12:00:00string_2#2121
32020-01-01 18:00:00string_3#3181
42020-01-02 00:00:00string_4#402
52020-01-02 06:00:00string_5#562
\n", 209 | "
" 210 | ], 211 | "text/plain": [ 212 | " mydate mystr item_id hour day\n", 213 | "0 2020-01-01 00:00:00 string_0 #0 0 1\n", 214 | "1 2020-01-01 06:00:00 string_1 #1 6 1\n", 215 | "2 2020-01-01 12:00:00 string_2 #2 12 1\n", 216 | "3 2020-01-01 18:00:00 string_3 #3 18 1\n", 217 | "4 2020-01-02 00:00:00 string_4 #4 0 2\n", 218 | "5 2020-01-02 06:00:00 string_5 #5 6 2" 219 | ] 220 | }, 221 | "execution_count": 3, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "df['item_id'] = df['mystr'].str.replace('string_', '#')\n", 228 | "df['hour'] = df['mydate'].dt.hour\n", 229 | "df['day'] = df['mydate'].dt.day\n", 230 | "\n", 231 | "df" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "### ***EXERCISE 9.1***\n", 239 | "Using the `df` provided below, get the mean score of people whose name stats with 'J'" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 4, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "df = pd.DataFrame({\n", 249 | " 'name': ['John', 'Albert', 'Jack', 'Josef', 'Bob', 'Juliette', 'Mary', 'Jane'], \n", 250 | " 'score': [5,8,6,4,8,7,3,5]\n", 251 | "})\n", 252 | "# insert solution here" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 5, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/plain": [ 263 | "5.4" 264 | ] 265 | }, 266 | "execution_count": 5, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "df.loc[df['name'].str.contains('J'), 'score'].mean()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Pandas Options\n", 280 | "The default pandas options can be changed and reset using the convenient `set_option` and `reset_option` functions.\n", 281 | "\n", 282 | "For example, pandas by default will show 60 rows before starting to hide some of them with \"...\". We can increase or decrease the number as needed and reset it." 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 6, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "pandas default number of rows to display: 60\n" 295 | ] 296 | }, 297 | { 298 | "data": { 299 | "text/html": [ 300 | "
\n", 301 | "\n", 314 | "\n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | "
mycol
010
111
212
313
414
515
616
717
\n", 356 | "
" 357 | ], 358 | "text/plain": [ 359 | " mycol\n", 360 | "0 10\n", 361 | "1 11\n", 362 | "2 12\n", 363 | "3 13\n", 364 | "4 14\n", 365 | "5 15\n", 366 | "6 16\n", 367 | "7 17" 368 | ] 369 | }, 370 | "execution_count": 6, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "print('pandas default number of rows to display:', pd.get_option('display.max_rows'))\n", 377 | "\n", 378 | "df = pd.DataFrame({'mycol': range(8)})+10\n", 379 | "df" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 7, 385 | "metadata": {}, 386 | "outputs": [ 387 | { 388 | "name": "stdout", 389 | "output_type": "stream", 390 | "text": [ 391 | "Overwriting to show 4 at most..\n" 392 | ] 393 | }, 394 | { 395 | "data": { 396 | "text/html": [ 397 | "
\n", 398 | "\n", 411 | "\n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | "
mycol
010
111
......
616
717
\n", 441 | "

8 rows × 1 columns

\n", 442 | "
" 443 | ], 444 | "text/plain": [ 445 | " mycol\n", 446 | "0 10\n", 447 | "1 11\n", 448 | ".. ...\n", 449 | "6 16\n", 450 | "7 17\n", 451 | "\n", 452 | "[8 rows x 1 columns]" 453 | ] 454 | }, 455 | "execution_count": 7, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [ 461 | "print('Overwriting to show 4 at most..')\n", 462 | "pd.set_option('display.max_rows', 4)\n", 463 | "\n", 464 | "df" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 8, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "Resetting to the default 60\n" 477 | ] 478 | }, 479 | { 480 | "data": { 481 | "text/html": [ 482 | "
\n", 483 | "\n", 496 | "\n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | "
mycol
010
111
212
313
414
515
616
717
\n", 538 | "
" 539 | ], 540 | "text/plain": [ 541 | " mycol\n", 542 | "0 10\n", 543 | "1 11\n", 544 | "2 12\n", 545 | "3 13\n", 546 | "4 14\n", 547 | "5 15\n", 548 | "6 16\n", 549 | "7 17" 550 | ] 551 | }, 552 | "execution_count": 8, 553 | "metadata": {}, 554 | "output_type": "execute_result" 555 | } 556 | ], 557 | "source": [ 558 | "print('Resetting to the default 60')\n", 559 | "pd.reset_option('display.max_rows')\n", 560 | "\n", 561 | "df" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": {}, 568 | "outputs": [], 569 | "source": [] 570 | } 571 | ], 572 | "metadata": { 573 | "kernelspec": { 574 | "display_name": "DataScience", 575 | "language": "python", 576 | "name": "ds_env" 577 | }, 578 | "language_info": { 579 | "codemirror_mode": { 580 | "name": "ipython", 581 | "version": 3 582 | }, 583 | "file_extension": ".py", 584 | "mimetype": "text/x-python", 585 | "name": "python", 586 | "nbconvert_exporter": "python", 587 | "pygments_lexer": "ipython3", 588 | "version": "3.7.4" 589 | } 590 | }, 591 | "nbformat": 4, 592 | "nbformat_minor": 4 593 | } 594 | -------------------------------------------------------------------------------- /10_capstone_project.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "%matplotlib inline" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Capstone Project\n", 20 | "To tie most of what we have learned together in this tutorial, let's analyse the *world-happiness* dataset.\n", 21 | "\n", 22 | "Below are the tasks that need to be done." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Loading data\n", 30 | "load the data in `data/world-happiness/{year}.csv` for each year \n", 31 | "\n", 32 | "to each a column 'data_year' to keep track of the source year" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# insert solution here" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "dfs = {}\n", 51 | "for year in (2015, 2016, 2017):\n", 52 | " i_df = pd.read_csv(f'data/world-happiness/{year}.csv')\n", 53 | " i_df['data_year'] = year\n", 54 | " print(i_df.columns)\n", 55 | " dfs[year] = i_df" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Correcting column names\n", 63 | "correct the column names in the 2017 data so to be consistent with the previous years" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# insert solution here" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "# Standardize Country names\n", 80 | "correct the names in all DataFrames to common ones.\n", 81 | "\n", 82 | "**HINT**: you can use the following correction dictionary or create your own\n", 83 | "```python\n", 84 | "correcting_country_names = {\n", 85 | " 'Taiwan': 'Taiwan Province of China',\n", 86 | " 'Hong Kong': 'Hong Kong S.A.R., China',\n", 87 | "}\n", 88 | "```" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# insert solution here" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Remove Inconsistent Countries \n", 105 | "remove the rows of the countries that are not in all datasets" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# insert solution here" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Add Region to 2017 data\n", 122 | "add Region column to the 2017 data by using the same mapping of the previous years " 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# insert solution here" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "## Remove Inconsistent Columns \n", 139 | "remove the columns that are not in all datasets" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "# insert solution here" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Combine All DataFrames\n", 156 | "now that all three dataframes have been standardized, we can combine them (vertically)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "# insert solution here" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Analysing the data\n", 173 | "Use the cleaned dataset to answer the following questions/tasks.\n", 174 | "\n", 175 | "Feel free to explore more and answer your own questions." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "# insert solution here" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "### Which country is consistely more Generous? which least Generous?" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "# insert solution here" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "### Which country has seen the biggest drop in Happiness Score within 1 year? which the biggest increase?" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# insert solution here" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### Which Region, on average, has seen the biggest drop in Happiness Score within 1 year? which the biggest increase?" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "# insert solution here" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "### Which variables are most/least correlated with *Happiness Score*? make a scatterplot of them against *Happiness Score*.\n", 240 | "NOTE: remove *Happiness Rank* " 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "# insert solution here" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### Taking the data average, is it clear if we getting happier or sadder? Which was the saddest year?" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "# insert solution here" 266 | ] 267 | } 268 | ], 269 | "metadata": { 270 | "kernelspec": { 271 | "display_name": "DataScience", 272 | "language": "python", 273 | "name": "ds_env" 274 | }, 275 | "language_info": { 276 | "codemirror_mode": { 277 | "name": "ipython", 278 | "version": 3 279 | }, 280 | "file_extension": ".py", 281 | "mimetype": "text/x-python", 282 | "name": "python", 283 | "nbconvert_exporter": "python", 284 | "pygments_lexer": "ipython3", 285 | "version": "3.7.4" 286 | } 287 | }, 288 | "nbformat": 4, 289 | "nbformat_minor": 4 290 | } 291 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Gabriele Calvo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pandas Tutorial 2 | This tutorial will guide you throught the main feature of *pandas* and test your knowledge on the way. 3 | 4 | The tutorial culminates in a Capstone Project where you can test you data processing and analysis skills. 5 | 6 | The *solutions* folder contains solutions to all exercises and the answer to the given Capstone Project. 7 | 8 | # Usage 9 | If you are familiar with jupyter notebooks and running them locally, I suggest [downloading](https://github.com/gabrielecalvo/pandas_tutorial/archive/refs/heads/master.zip) or cloning this repository. 10 | 11 | If you just want to play around with some of the notebooks, I suggest using [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/gabrielecalvo/pandas_tutorial/master). Note: it might take a few minutes to startup but will then give you access to an ephemeral Jupyter environment that has access to all notebooks in this reporitory. As it is ephemeral, once you close the browser all your work will disappear unless you download a copy of it. 12 | 13 | In alternative you can use [Colab](https://colab.research.google.com), selecting the GitHub tab and enter `gabrielecalvo/pandas_tutorial` in the search input. You will have to select and open each notebook separately, but it will be easier to copy them to your google drive to edit and store. 14 | 15 | 16 | ## Syllabus 17 | 1. Introduction 18 | - Data Used 19 | - Useful links 20 | - Library version 21 | 1. Reading/Creating data 22 | - read_.. 23 | - using additional options 24 | - pd.DataFrame constructors 25 | 1. Exploring data 26 | - first/last/random rows 27 | - shape, describe, dtypes 28 | - columns, index 29 | - value_counts(normalize) 30 | - unique 31 | 1. Subsetting data 32 | - Bracket/Dot notation (& slicing) 33 | - loc, iloc 34 | - filter by column value 35 | - multiple filters 36 | 1. Data Aggregation & Reshaping 37 | - groupby (+ agg) 38 | - pivot_table 39 | - sorting by index, values, multiple-values 40 | - stack, unstack 41 | 1. Data Plotting 42 | - plot types 43 | - line plot 44 | - scatter plot 45 | - bar chart 46 | - frequency histogram 47 | - main features: 48 | - adding grid, resizing the figure, changing color, etc.. 49 | - setting axis limits 50 | - overlaying plots 51 | - multiple plots in one figure 52 | - saving to file 53 | 1. Data Manipulation 54 | - renaming columns 55 | - adding column, index 56 | - remove column, index 57 | - parsing: astype, pd.to_datetime 58 | - concat DataFrames 59 | - remove duplicate 60 | - apply, map, applymap 61 | - transposing 62 | 1. Handling Missing Data 63 | - isna, notna (+ sum, mean) 64 | - dropna (any, all) 65 | - fillna 66 | 1. Extra Tips 67 | - .str .dt methods 68 | - pd.set/reset_option (search from pd.describe_option()) 69 | 1. Capstone Project 70 | -------------------------------------------------------------------------------- /data/wine-reviews/exercise_2.1.txt: -------------------------------------------------------------------------------- 1 | wine_id country description designation points date of rating 2 | 0 Italy "Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering unripened apple, citrus and dried sage alongside brisk acidity." Vulkà Bianco 87 01/12/2015 3 | 1 Portugal "This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy red berry fruits and freshened with acidity. It's already drinkable, although it will certainly be better from 2016." Avidagos 87 13/03/2014 4 | 2 US "Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp acidity underscoring the flavors. The wine was all stainless-steel fermented." 87 26/09/2014 5 | 3 US "Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with notes of honey-drizzled guava and mango giving way to a slightly astringent, semidry finish." Reserve Late Harvest 87 07/08/2015 6 | 4 US "Much like the regular bottling from 2012, this comes across as rather rough and tannic, with rustic, earthy, herbal characteristics. Nonetheless, if you think of it as a pleasantly unfussy country wine, it's a good companion to a hearty winter stew." Vintner's Reserve Wild Child Block 87 17/10/2014 7 | 5 Spain "Blackberry and raspberry aromas show a typical Navarran whiff of green herbs and, in this case, horseradish. In the mouth, this is fairly full bodied, with tomatoey acidity. Spicy, herbal flavors complement dark plum fruit, while the finish is fresh but grabby." Ars In Vitro 87 26/11/2015 8 | -------------------------------------------------------------------------------- /data/wine-reviews/winemag-data-130k-v2_tiny.json: -------------------------------------------------------------------------------- 1 | {"country":{"0":"Italy","1":"Portugal","10":"US","100":"US","101":"US","102":"US","103":"Chile","104":"Italy","105":"Italy","106":"Italy","107":"Italy","108":"US","109":"Italy","11":"France","110":"France"},"description":{"0":"Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering unripened apple, citrus and dried sage alongside brisk acidity.","1":"This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy red berry fruits and freshened with acidity. It's already drinkable, although it will certainly be better from 2016.","10":"Soft, supple plum envelopes an oaky structure in this Cabernet, supported by 15% Merlot. Coffee and chocolate complete the picture, finishing strong at the end, resulting in a value-priced wine of attractive flavor and immediate accessibility.","100":"Fresh apple, lemon and pear flavors are accented by a hint of smoked nuts in this bold, full-bodied Pinot Gris. Rich and a bit creamy in mouthfeel yet balanced briskly, it's a satisfying white with wide pairing appeal. Drink now through 2019.","101":"Dusty mineral, smoke and struck flint lend a savory tone to this lean light-bodied Riesling. Off dry in style, the palate offers delicately concentrated flavors of red apple and nectarine off set by tangerine acidity. Drink now through 2021.","102":"Intensely smoky tones of struck flint and ash extend throughout this otherwise bright nimble Riesling. The palate boasts freshly pressed apple and pear flavors accentuated by zippy lemon-lime acidity. Drink now through 2021.","103":"A bright nose with green apple and citric aromas comes with mild oak. This is acidic and racy in composition, with roundness along the edges of the palate. Apple and nectarine flavors are lightly oaked and salty, while this holds form on a high-acid finish.","104":"Made with 65% Sangiovese, 20% Merlot and 15% Cabernet Sauvignon, this has subtle aromas of black-skinned fruit and thyme. The easygoing palate delivers black cherry and cinnamon alongside smooth tannins.","105":"Made predominantly with Trebbiano and Malvasia, along with Pinot Bianco, Pinot Grigio and Riesling Renano, this has inviting aromas of ripe orchard fruit and citrus. The round fruity palate doles out ripe apricot, orange zest and juicy pineapple.","106":"A blend of Cabernet Sauvignon, Merlot, Cabernet Franc and Sangiovese, this pleasant red has aromas of dark-skinned fruit, toast and a whiff of espresso. The light-bodied, straightforward palate offers cherry, red currant and a hint of light spice alongside zesty acidity and polished tannins.","107":"Aromas of yellow stone fruit, white spring flower and a whiff of citrus lift out of the glass. The soft round palate offers ripe apricot, yellow peach and a hint of candied nectarine zest.","108":"Lots of spearmint, coyote mint, hot licorice, ginger snaps and Dr Pepper spice up the strawberry fruit of this wine that provides a very herbal take on the grape. Oregano, marjoram, thyme and dill all make a showing on the sip, against a backbone of sweet cherry and blackberry fruit, finishing on cedar.","109":"An easy-drinking blend of Merlot, Sangiovese and Cabernet Sauvignon, this opens with aromas of red berry, toast, herb and roasted coffee bean. The soft palate offers black cherry, mocha and a hint of coffee alongside bright acidity and silky tannins. Drink now.","11":"This is a dry wine, very spicy, with a tight, taut texture and strongly mineral character layered with citrus as well as pepper. It's a food wine with its almost crisp aftertaste.","110":"Produced from cru vines at the base of Mount Brouilly, the wine has structure as well as ripe black-plum fruits. It is generous and its fruit is well balanced by acidity and solid tannins. The wines is ready to drink."},"designation":{"0":"Vulk\u00e0 Bianco","1":"Avidagos","10":"Mountain Cuv\u00e9e","100":null,"101":"Red Oak Vineyard","102":"Yellow Dog Vineyard","103":"Single Vineyard Falaris Hill","104":"Nativo","105":"Villa Antinori","106":"Castiglioni","107":"Ammiraglia Massovivo","108":"J.D. Hurley","109":"Le Volte","11":null,"110":"Les Quartelets"},"points":{"0":87,"1":87,"10":87,"100":88,"101":87,"102":87,"103":87,"104":87,"105":87,"106":87,"107":87,"108":87,"109":87,"11":87,"110":87},"price":{"0":null,"1":15.0,"10":19.0,"100":18.0,"101":20.0,"102":20.0,"103":18.0,"104":16.0,"105":14.0,"106":30.0,"107":18.0,"108":26.0,"109":30.0,"11":30.0,"110":23.0},"province":{"0":"Sicily & Sardinia","1":"Douro","10":"California","100":"New York","101":"New York","102":"New York","103":"Leyda Valley","104":"Tuscany","105":"Tuscany","106":"Tuscany","107":"Tuscany","108":"California","109":"Tuscany","11":"Alsace","110":"Beaujolais"},"region_1":{"0":"Etna","1":null,"10":"Napa Valley","100":"Finger Lakes","101":"Finger Lakes","102":"Finger Lakes","103":null,"104":"Toscana","105":"Toscana","106":"Toscana","107":"Toscana","108":"Santa Clara Valley","109":"Toscana","11":"Alsace","110":"Brouilly"},"region_2":{"0":null,"1":null,"10":"Napa","100":"Finger Lakes","101":"Finger Lakes","102":"Finger Lakes","103":null,"104":null,"105":null,"106":null,"107":null,"108":"Central Coast","109":null,"11":null,"110":null},"taster_name":{"0":"Kerin O\u2019Keefe","1":"Roger Voss","10":"Virginie Boone","100":"Anna Lee C. Iijima","101":"Anna Lee C. Iijima","102":"Anna Lee C. Iijima","103":"Michael Schachner","104":"Kerin O\u2019Keefe","105":"Kerin O\u2019Keefe","106":"Kerin O\u2019Keefe","107":"Kerin O\u2019Keefe","108":"Matt Kettmann","109":"Kerin O\u2019Keefe","11":"Roger Voss","110":"Roger Voss"},"taster_twitter_handle":{"0":"@kerinokeefe","1":"@vossroger","10":"@vboone","100":null,"101":null,"102":null,"103":"@wineschach","104":"@kerinokeefe","105":"@kerinokeefe","106":"@kerinokeefe","107":"@kerinokeefe","108":"@mattkettmann","109":"@kerinokeefe","11":"@vossroger","110":"@vossroger"},"title":{"0":"Nicosia 2013 Vulk\u00e0 Bianco (Etna)","1":"Quinta dos Avidagos 2011 Avidagos Red (Douro)","10":"Kirkland Signature 2011 Mountain Cuv\u00e9e Cabernet Sauvignon (Napa Valley)","100":"Ventosa 2015 Pinot Gris (Finger Lakes)","101":"Lamoreaux Landing 2014 Red Oak Vineyard Riesling (Finger Lakes)","102":"Lamoreaux Landing 2014 Yellow Dog Vineyard Riesling (Finger Lakes)","103":"Leyda 2015 Single Vineyard Falaris Hill Chardonnay (Leyda Valley)","104":"Madonna Alta 2014 Nativo Red (Toscana)","105":"Marchesi Antinori 2015 Villa Antinori White (Toscana)","106":"Marchesi de' Frescobaldi 2014 Castiglioni Red (Toscana)","107":"Marchesi de' Frescobaldi 2015 Ammiraglia Massovivo Vermentino (Toscana)","108":"Martin Ranch 2014 J.D. Hurley Zinfandel (Santa Clara Valley)","109":"Ornellaia 2014 Le Volte Red (Toscana)","11":"Leon Beyer 2012 Gewurztraminer (Alsace)","110":"Pardon et Fils 2015 Les Quartelets (Brouilly)"},"variety":{"0":"White Blend","1":"Portuguese Red","10":"Cabernet Sauvignon","100":"Pinot Gris","101":"Riesling","102":"Riesling","103":"Chardonnay","104":"Red Blend","105":"White Blend","106":"Red Blend","107":"Vermentino","108":"Zinfandel","109":"Red Blend","11":"Gew\u00fcrztraminer","110":"Gamay"},"winery":{"0":"Nicosia","1":"Quinta dos Avidagos","10":"Kirkland Signature","100":"Ventosa","101":"Lamoreaux Landing","102":"Lamoreaux Landing","103":"Leyda","104":"Madonna Alta","105":"Marchesi Antinori","106":"Marchesi de' Frescobaldi","107":"Marchesi de' Frescobaldi","108":"Martin Ranch","109":"Ornellaia","11":"Leon Beyer","110":"Pardon et Fils"}} -------------------------------------------------------------------------------- /data/world-happiness/2015.csv: -------------------------------------------------------------------------------- 1 | Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual 2 | Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738 3 | Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201 4 | Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204 5 | Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531 6 | Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176 7 | Finland,Western Europe,6,7.406,0.0314,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351,2.61955 8 | Netherlands,Western Europe,7,7.378,0.02799,1.32944,1.28017,0.89284,0.61576,0.31814,0.4761,2.4657 9 | Sweden,Western Europe,8,7.364,0.03157,1.33171,1.28907,0.91087,0.6598,0.43844,0.36262,2.37119 10 | New Zealand,Australia and New Zealand,9,7.286,0.03371,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501,2.26425 11 | Australia,Australia and New Zealand,10,7.284,0.04083,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562,2.26646 12 | Israel,Middle East and Northern Africa,11,7.278,0.0347,1.22857,1.22393,0.91387,0.41319,0.07785,0.33172,3.08854 13 | Costa Rica,Latin America and Caribbean,12,7.226,0.04454,0.95578,1.23788,0.86027,0.63376,0.10583,0.25497,3.17728 14 | Austria,Western Europe,13,7.2,0.03751,1.33723,1.29704,0.89042,0.62433,0.18676,0.33088,2.5332 15 | Mexico,Latin America and Caribbean,14,7.187,0.04176,1.02054,0.91451,0.81444,0.48181,0.21312,0.14074,3.60214 16 | United States,North America,15,7.119,0.03839,1.39451,1.24711,0.86179,0.54604,0.1589,0.40105,2.51011 17 | Brazil,Latin America and Caribbean,16,6.983,0.04076,0.98124,1.23287,0.69702,0.49049,0.17521,0.14574,3.26001 18 | Luxembourg,Western Europe,17,6.946,0.03499,1.56391,1.21963,0.91894,0.61583,0.37798,0.28034,1.96961 19 | Ireland,Western Europe,18,6.94,0.03676,1.33596,1.36948,0.89533,0.61777,0.28703,0.45901,1.9757 20 | Belgium,Western Europe,19,6.937,0.03595,1.30782,1.28566,0.89667,0.5845,0.2254,0.2225,2.41484 21 | United Arab Emirates,Middle East and Northern Africa,20,6.901,0.03729,1.42727,1.12575,0.80925,0.64157,0.38583,0.26428,2.24743 22 | United Kingdom,Western Europe,21,6.867,0.01866,1.26637,1.28548,0.90943,0.59625,0.32067,0.51912,1.96994 23 | Oman,Middle East and Northern Africa,22,6.853,0.05335,1.36011,1.08182,0.76276,0.63274,0.32524,0.21542,2.47489 24 | Venezuela,Latin America and Caribbean,23,6.81,0.06476,1.04424,1.25596,0.72052,0.42908,0.11069,0.05841,3.19131 25 | Singapore,Southeastern Asia,24,6.798,0.0378,1.52186,1.02,1.02525,0.54252,0.4921,0.31105,1.88501 26 | Panama,Latin America and Caribbean,25,6.786,0.0491,1.06353,1.1985,0.79661,0.5421,0.0927,0.24434,2.84848 27 | Germany,Western Europe,26,6.75,0.01848,1.32792,1.29937,0.89186,0.61477,0.21843,0.28214,2.11569 28 | Chile,Latin America and Caribbean,27,6.67,0.058,1.10715,1.12447,0.85857,0.44132,0.12869,0.33363,2.67585 29 | Qatar,Middle East and Northern Africa,28,6.611,0.06257,1.69042,1.0786,0.79733,0.6404,0.52208,0.32573,1.55674 30 | France,Western Europe,29,6.575,0.03512,1.27778,1.26038,0.94579,0.55011,0.20646,0.12332,2.21126 31 | Argentina,Latin America and Caribbean,30,6.574,0.04612,1.05351,1.24823,0.78723,0.44974,0.08484,0.11451,2.836 32 | Czech Republic,Central and Eastern Europe,31,6.505,0.04168,1.17898,1.20643,0.84483,0.46364,0.02652,0.10686,2.67782 33 | Uruguay,Latin America and Caribbean,32,6.485,0.04539,1.06166,1.2089,0.8116,0.60362,0.24558,0.2324,2.32142 34 | Colombia,Latin America and Caribbean,33,6.477,0.05051,0.91861,1.24018,0.69077,0.53466,0.0512,0.18401,2.85737 35 | Thailand,Southeastern Asia,34,6.455,0.03557,0.9669,1.26504,0.7385,0.55664,0.03187,0.5763,2.31945 36 | Saudi Arabia,Middle East and Northern Africa,35,6.411,0.04633,1.39541,1.08393,0.72025,0.31048,0.32524,0.13706,2.43872 37 | Spain,Western Europe,36,6.329,0.03468,1.23011,1.31379,0.95562,0.45951,0.06398,0.18227,2.12367 38 | Malta,Western Europe,37,6.302,0.04206,1.2074,1.30203,0.88721,0.60365,0.13586,0.51752,1.6488 39 | Taiwan,Eastern Asia,38,6.298,0.03868,1.29098,1.07617,0.8753,0.3974,0.08129,0.25376,2.32323 40 | Kuwait,Middle East and Northern Africa,39,6.295,0.04456,1.55422,1.16594,0.72492,0.55499,0.25609,0.16228,1.87634 41 | Suriname,Latin America and Caribbean,40,6.269,0.09811,0.99534,0.972,0.6082,0.59657,0.13633,0.16991,2.79094 42 | Trinidad and Tobago,Latin America and Caribbean,41,6.168,0.10895,1.21183,1.18354,0.61483,0.55884,0.0114,0.31844,2.26882 43 | El Salvador,Latin America and Caribbean,42,6.13,0.05618,0.76454,1.02507,0.67737,0.4035,0.11776,0.10692,3.035 44 | Guatemala,Latin America and Caribbean,43,6.123,0.05224,0.74553,1.04356,0.64425,0.57733,0.09472,0.27489,2.74255 45 | Uzbekistan,Central and Eastern Europe,44,6.003,0.04361,0.63244,1.34043,0.59772,0.65821,0.30826,0.22837,2.23741 46 | Slovakia,Central and Eastern Europe,45,5.995,0.04267,1.16891,1.26999,0.78902,0.31751,0.03431,0.16893,2.24639 47 | Japan,Eastern Asia,46,5.987,0.03581,1.27074,1.25712,0.99111,0.49615,0.1806,0.10705,1.68435 48 | South Korea,Eastern Asia,47,5.984,0.04098,1.24461,0.95774,0.96538,0.33208,0.07857,0.18557,2.21978 49 | Ecuador,Latin America and Caribbean,48,5.975,0.04528,0.86402,0.99903,0.79075,0.48574,0.1809,0.11541,2.53942 50 | Bahrain,Middle East and Northern Africa,49,5.96,0.05412,1.32376,1.21624,0.74716,0.45492,0.306,0.17362,1.73797 51 | Italy,Western Europe,50,5.948,0.03914,1.25114,1.19777,0.95446,0.26236,0.02901,0.22823,2.02518 52 | Bolivia,Latin America and Caribbean,51,5.89,0.05642,0.68133,0.97841,0.5392,0.57414,0.088,0.20536,2.82334 53 | Moldova,Central and Eastern Europe,52,5.889,0.03799,0.59448,1.01528,0.61826,0.32818,0.01615,0.20951,3.10712 54 | Paraguay,Latin America and Caribbean,53,5.878,0.04563,0.75985,1.30477,0.66098,0.53899,0.08242,0.3424,2.18896 55 | Kazakhstan,Central and Eastern Europe,54,5.855,0.04114,1.12254,1.12241,0.64368,0.51649,0.08454,0.11827,2.24729 56 | Slovenia,Central and Eastern Europe,55,5.848,0.04251,1.18498,1.27385,0.87337,0.60855,0.03787,0.25328,1.61583 57 | Lithuania,Central and Eastern Europe,56,5.833,0.03843,1.14723,1.25745,0.73128,0.21342,0.01031,0.02641,2.44649 58 | Nicaragua,Latin America and Caribbean,57,5.828,0.05371,0.59325,1.14184,0.74314,0.55475,0.19317,0.27815,2.32407 59 | Peru,Latin America and Caribbean,58,5.824,0.04615,0.90019,0.97459,0.73017,0.41496,0.05989,0.14982,2.5945 60 | Belarus,Central and Eastern Europe,59,5.813,0.03938,1.03192,1.23289,0.73608,0.37938,0.1909,0.11046,2.1309 61 | Poland,Central and Eastern Europe,60,5.791,0.04263,1.12555,1.27948,0.77903,0.53122,0.04212,0.16759,1.86565 62 | Malaysia,Southeastern Asia,61,5.77,0.0433,1.12486,1.07023,0.72394,0.53024,0.10501,0.33075,1.88541 63 | Croatia,Central and Eastern Europe,62,5.759,0.04394,1.08254,0.79624,0.78805,0.25883,0.0243,0.05444,2.75414 64 | Libya,Middle East and Northern Africa,63,5.754,0.07832,1.13145,1.11862,0.7038,0.41668,0.11023,0.18295,2.09066 65 | Russia,Central and Eastern Europe,64,5.716,0.03135,1.13764,1.23617,0.66926,0.36679,0.03005,0.00199,2.27394 66 | Jamaica,Latin America and Caribbean,65,5.709,0.13693,0.81038,1.15102,0.68741,0.50442,0.02299,0.2123,2.32038 67 | North Cyprus,Western Europe,66,5.695,0.05635,1.20806,1.07008,0.92356,0.49027,0.1428,0.26169,1.59888 68 | Cyprus,Western Europe,67,5.689,0.0558,1.20813,0.89318,0.92356,0.40672,0.06146,0.30638,1.88931 69 | Algeria,Middle East and Northern Africa,68,5.605,0.05099,0.93929,1.07772,0.61766,0.28579,0.17383,0.07822,2.43209 70 | Kosovo,Central and Eastern Europe,69,5.589,0.05018,0.80148,0.81198,0.63132,0.24749,0.04741,0.2831,2.76579 71 | Turkmenistan,Central and Eastern Europe,70,5.548,0.04175,0.95847,1.22668,0.53886,0.4761,0.30844,0.16979,1.86984 72 | Mauritius,Sub-Saharan Africa,71,5.477,0.07197,1.00761,0.98521,0.7095,0.56066,0.07521,0.37744,1.76145 73 | Hong Kong,Eastern Asia,72,5.474,0.05051,1.38604,1.05818,1.01328,0.59608,0.37124,0.39478,0.65429 74 | Estonia,Central and Eastern Europe,73,5.429,0.04013,1.15174,1.22791,0.77361,0.44888,0.15184,0.0868,1.58782 75 | Indonesia,Southeastern Asia,74,5.399,0.02596,0.82827,1.08708,0.63793,0.46611,0,0.51535,1.86399 76 | Vietnam,Southeastern Asia,75,5.36,0.03107,0.63216,0.91226,0.74676,0.59444,0.10441,0.1686,2.20173 77 | Turkey,Middle East and Northern Africa,76,5.332,0.03864,1.06098,0.94632,0.73172,0.22815,0.15746,0.12253,2.08528 78 | Kyrgyzstan,Central and Eastern Europe,77,5.286,0.03823,0.47428,1.15115,0.65088,0.43477,0.04232,0.3003,2.2327 79 | Nigeria,Sub-Saharan Africa,78,5.268,0.04192,0.65435,0.90432,0.16007,0.34334,0.0403,0.27233,2.89319 80 | Bhutan,Southern Asia,79,5.253,0.03225,0.77042,1.10395,0.57407,0.53206,0.15445,0.47998,1.63794 81 | Azerbaijan,Central and Eastern Europe,80,5.212,0.03363,1.02389,0.93793,0.64045,0.3703,0.16065,0.07799,2.00073 82 | Pakistan,Southern Asia,81,5.194,0.03726,0.59543,0.41411,0.51466,0.12102,0.10464,0.33671,3.10709 83 | Jordan,Middle East and Northern Africa,82,5.192,0.04524,0.90198,1.05392,0.69639,0.40661,0.14293,0.11053,1.87996 84 | Montenegro,Central and Eastern Europe,82,5.192,0.05235,0.97438,0.90557,0.72521,0.1826,0.14296,0.1614,2.10017 85 | China,Eastern Asia,84,5.14,0.02424,0.89012,0.94675,0.81658,0.51697,0.02781,0.08185,1.8604 86 | Zambia,Sub-Saharan Africa,85,5.129,0.06988,0.47038,0.91612,0.29924,0.48827,0.12468,0.19591,2.6343 87 | Romania,Central and Eastern Europe,86,5.124,0.06607,1.04345,0.88588,0.7689,0.35068,0.00649,0.13748,1.93129 88 | Serbia,Central and Eastern Europe,87,5.123,0.04864,0.92053,1.00964,0.74836,0.20107,0.02617,0.19231,2.025 89 | Portugal,Western Europe,88,5.102,0.04802,1.15991,1.13935,0.87519,0.51469,0.01078,0.13719,1.26462 90 | Latvia,Central and Eastern Europe,89,5.098,0.0464,1.11312,1.09562,0.72437,0.29671,0.06332,0.18226,1.62215 91 | Philippines,Southeastern Asia,90,5.073,0.04934,0.70532,1.03516,0.58114,0.62545,0.12279,0.24991,1.7536 92 | Somaliland region,Sub-Saharan Africa,91,5.057,0.06161,0.18847,0.95152,0.43873,0.46582,0.39928,0.50318,2.11032 93 | Morocco,Middle East and Northern Africa,92,5.013,0.0342,0.73479,0.64095,0.60954,0.41691,0.08546,0.07172,2.45373 94 | Macedonia,Central and Eastern Europe,93,5.007,0.05376,0.91851,1.00232,0.73545,0.33457,0.05327,0.22359,1.73933 95 | Mozambique,Sub-Saharan Africa,94,4.971,0.07896,0.08308,1.02626,0.09131,0.34037,0.15603,0.22269,3.05137 96 | Albania,Central and Eastern Europe,95,4.959,0.05013,0.87867,0.80434,0.81325,0.35733,0.06413,0.14272,1.89894 97 | Bosnia and Herzegovina,Central and Eastern Europe,96,4.949,0.06913,0.83223,0.91916,0.79081,0.09245,0.00227,0.24808,2.06367 98 | Lesotho,Sub-Saharan Africa,97,4.898,0.09438,0.37545,1.04103,0.07612,0.31767,0.12504,0.16388,2.79832 99 | Dominican Republic,Latin America and Caribbean,98,4.885,0.07446,0.89537,1.17202,0.66825,0.57672,0.14234,0.21684,1.21305 100 | Laos,Southeastern Asia,99,4.876,0.06698,0.59066,0.73803,0.54909,0.59591,0.24249,0.42192,1.73799 101 | Mongolia,Eastern Asia,100,4.874,0.03313,0.82819,1.3006,0.60268,0.43626,0.02666,0.3323,1.34759 102 | Swaziland,Sub-Saharan Africa,101,4.867,0.08742,0.71206,1.07284,0.07566,0.30658,0.0306,0.18259,2.48676 103 | Greece,Western Europe,102,4.857,0.05062,1.15406,0.92933,0.88213,0.07699,0.01397,0,1.80101 104 | Lebanon,Middle East and Northern Africa,103,4.839,0.04337,1.02564,0.80001,0.83947,0.33916,0.04582,0.21854,1.57059 105 | Hungary,Central and Eastern Europe,104,4.8,0.06107,1.12094,1.20215,0.75905,0.32112,0.02758,0.128,1.24074 106 | Honduras,Latin America and Caribbean,105,4.788,0.05648,0.59532,0.95348,0.6951,0.40148,0.06825,0.23027,1.84408 107 | Tajikistan,Central and Eastern Europe,106,4.786,0.03198,0.39047,0.85563,0.57379,0.47216,0.15072,0.22974,2.11399 108 | Tunisia,Middle East and Northern Africa,107,4.739,0.03589,0.88113,0.60429,0.73793,0.26268,0.06358,0.06431,2.12466 109 | Palestinian Territories,Middle East and Northern Africa,108,4.715,0.04394,0.59867,0.92558,0.66015,0.24499,0.12905,0.11251,2.04384 110 | Bangladesh,Southern Asia,109,4.694,0.03077,0.39753,0.43106,0.60164,0.4082,0.12569,0.21222,2.51767 111 | Iran,Middle East and Northern Africa,110,4.686,0.04449,1.0088,0.54447,0.69805,0.30033,0.05863,0.38086,1.6944 112 | Ukraine,Central and Eastern Europe,111,4.681,0.04412,0.79907,1.20278,0.6739,0.25123,0.02961,0.15275,1.5714 113 | Iraq,Middle East and Northern Africa,112,4.677,0.05232,0.98549,0.81889,0.60237,0,0.13788,0.17922,1.95335 114 | South Africa,Sub-Saharan Africa,113,4.642,0.04585,0.92049,1.18468,0.27688,0.33207,0.08884,0.11973,1.71956 115 | Ghana,Sub-Saharan Africa,114,4.633,0.04742,0.54558,0.67954,0.40132,0.42342,0.04355,0.23087,2.30919 116 | Zimbabwe,Sub-Saharan Africa,115,4.61,0.0429,0.271,1.03276,0.33475,0.25861,0.08079,0.18987,2.44191 117 | Liberia,Sub-Saharan Africa,116,4.571,0.11068,0.0712,0.78968,0.34201,0.28531,0.06232,0.24362,2.77729 118 | India,Southern Asia,117,4.565,0.02043,0.64499,0.38174,0.51529,0.39786,0.08492,0.26475,2.27513 119 | Sudan,Sub-Saharan Africa,118,4.55,0.0674,0.52107,1.01404,0.36878,0.10081,0.1466,0.19062,2.20857 120 | Haiti,Latin America and Caribbean,119,4.518,0.07331,0.26673,0.74302,0.38847,0.24425,0.17175,0.46187,2.24173 121 | Congo (Kinshasa),Sub-Saharan Africa,120,4.517,0.0368,0,1.0012,0.09806,0.22605,0.07625,0.24834,2.86712 122 | Nepal,Southern Asia,121,4.514,0.03607,0.35997,0.86449,0.56874,0.38282,0.05907,0.32296,1.95637 123 | Ethiopia,Sub-Saharan Africa,122,4.512,0.0378,0.19073,0.60406,0.44055,0.4345,0.15048,0.24325,2.44876 124 | Sierra Leone,Sub-Saharan Africa,123,4.507,0.07068,0.33024,0.95571,0,0.4084,0.08786,0.21488,2.51009 125 | Mauritania,Sub-Saharan Africa,124,4.436,0.03947,0.45407,0.86908,0.35874,0.24232,0.17461,0.219,2.11773 126 | Kenya,Sub-Saharan Africa,125,4.419,0.04734,0.36471,0.99876,0.41435,0.42215,0.05839,0.37542,1.78555 127 | Djibouti,Sub-Saharan Africa,126,4.369,0.08096,0.44025,0.59207,0.36291,0.46074,0.28105,0.18093,2.05125 128 | Armenia,Central and Eastern Europe,127,4.35,0.04763,0.76821,0.77711,0.7299,0.19847,0.039,0.07855,1.75873 129 | Botswana,Sub-Saharan Africa,128,4.332,0.04934,0.99355,1.10464,0.04776,0.49495,0.12474,0.10461,1.46181 130 | Myanmar,Southeastern Asia,129,4.307,0.04351,0.27108,0.70905,0.48246,0.44017,0.19034,0.79588,1.41805 131 | Georgia,Central and Eastern Europe,130,4.297,0.04221,0.7419,0.38562,0.72926,0.40577,0.38331,0.05547,1.59541 132 | Malawi,Sub-Saharan Africa,131,4.292,0.0613,0.01604,0.41134,0.22562,0.43054,0.06977,0.33128,2.80791 133 | Sri Lanka,Southern Asia,132,4.271,0.03751,0.83524,1.01905,0.70806,0.53726,0.09179,0.40828,0.67108 134 | Cameroon,Sub-Saharan Africa,133,4.252,0.04678,0.4225,0.88767,0.23402,0.49309,0.05786,0.20618,1.95071 135 | Bulgaria,Central and Eastern Europe,134,4.218,0.04828,1.01216,1.10614,0.76649,0.30587,0.00872,0.11921,0.89991 136 | Egypt,Middle East and Northern Africa,135,4.194,0.0326,0.8818,0.747,0.61712,0.17288,0.06324,0.11291,1.59927 137 | Yemen,Middle East and Northern Africa,136,4.077,0.04367,0.54649,0.68093,0.40064,0.35571,0.07854,0.09131,1.92313 138 | Angola,Sub-Saharan Africa,137,4.033,0.04758,0.75778,0.8604,0.16683,0.10384,0.07122,0.12344,1.94939 139 | Mali,Sub-Saharan Africa,138,3.995,0.05602,0.26074,1.03526,0.20583,0.38857,0.12352,0.18798,1.79293 140 | Congo (Brazzaville),Sub-Saharan Africa,139,3.989,0.06682,0.67866,0.6629,0.31051,0.41466,0.11686,0.12388,1.68135 141 | Comoros,Sub-Saharan Africa,140,3.956,0.04797,0.23906,0.79273,0.36315,0.22917,0.199,0.17441,1.95812 142 | Uganda,Sub-Saharan Africa,141,3.931,0.04317,0.21102,1.13299,0.33861,0.45727,0.07267,0.29066,1.42766 143 | Senegal,Sub-Saharan Africa,142,3.904,0.03608,0.36498,0.97619,0.4354,0.36772,0.10713,0.20843,1.44395 144 | Gabon,Sub-Saharan Africa,143,3.896,0.04547,1.06024,0.90528,0.43372,0.31914,0.11091,0.06822,0.99895 145 | Niger,Sub-Saharan Africa,144,3.845,0.03602,0.0694,0.77265,0.29707,0.47692,0.15639,0.19387,1.87877 146 | Cambodia,Southeastern Asia,145,3.819,0.05069,0.46038,0.62736,0.61114,0.66246,0.07247,0.40359,0.98195 147 | Tanzania,Sub-Saharan Africa,146,3.781,0.05061,0.2852,1.00268,0.38215,0.32878,0.05747,0.34377,1.38079 148 | Madagascar,Sub-Saharan Africa,147,3.681,0.03633,0.20824,0.66801,0.46721,0.19184,0.08124,0.21333,1.851 149 | Central African Republic,Sub-Saharan Africa,148,3.678,0.06112,0.0785,0,0.06699,0.48879,0.08289,0.23835,2.7223 150 | Chad,Sub-Saharan Africa,149,3.667,0.0383,0.34193,0.76062,0.1501,0.23501,0.05269,0.18386,1.94296 151 | Guinea,Sub-Saharan Africa,150,3.656,0.0359,0.17417,0.46475,0.24009,0.37725,0.12139,0.28657,1.99172 152 | Ivory Coast,Sub-Saharan Africa,151,3.655,0.05141,0.46534,0.77115,0.15185,0.46866,0.17922,0.20165,1.41723 153 | Burkina Faso,Sub-Saharan Africa,152,3.587,0.04324,0.25812,0.85188,0.27125,0.39493,0.12832,0.21747,1.46494 154 | Afghanistan,Southern Asia,153,3.575,0.03084,0.31982,0.30285,0.30335,0.23414,0.09719,0.3651,1.9521 155 | Rwanda,Sub-Saharan Africa,154,3.465,0.03464,0.22208,0.7737,0.42864,0.59201,0.55191,0.22628,0.67042 156 | Benin,Sub-Saharan Africa,155,3.34,0.03656,0.28665,0.35386,0.3191,0.4845,0.0801,0.1826,1.63328 157 | Syria,Middle East and Northern Africa,156,3.006,0.05015,0.6632,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858 158 | Burundi,Sub-Saharan Africa,157,2.905,0.08658,0.0153,0.41587,0.22396,0.1185,0.10062,0.19727,1.83302 159 | Togo,Sub-Saharan Africa,158,2.839,0.06727,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726 160 | -------------------------------------------------------------------------------- /data/world-happiness/2016.csv: -------------------------------------------------------------------------------- 1 | Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual 2 | Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939 3 | Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463 4 | Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137 5 | Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465 6 | Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596 7 | Canada,North America,6,7.404,7.335,7.473,1.44015,1.0961,0.8276,0.5737,0.31329,0.44834,2.70485 8 | Netherlands,Western Europe,7,7.339,7.284,7.394,1.46468,1.02912,0.81231,0.55211,0.29927,0.47416,2.70749 9 | New Zealand,Australia and New Zealand,8,7.334,7.264,7.404,1.36066,1.17278,0.83096,0.58147,0.41904,0.49401,2.47553 10 | Australia,Australia and New Zealand,9,7.313,7.241,7.385,1.44443,1.10476,0.8512,0.56837,0.32331,0.47407,2.5465 11 | Sweden,Western Europe,10,7.291,7.227,7.355,1.45181,1.08764,0.83121,0.58218,0.40867,0.38254,2.54734 12 | Israel,Middle East and Northern Africa,11,7.267,7.199,7.335,1.33766,0.99537,0.84917,0.36432,0.08728,0.32288,3.31029 13 | Austria,Western Europe,12,7.119,7.045,7.193,1.45038,1.08383,0.80565,0.54355,0.21348,0.32865,2.69343 14 | United States,North America,13,7.104,7.02,7.188,1.50796,1.04782,0.779,0.48163,0.14868,0.41077,2.72782 15 | Costa Rica,Latin America and Caribbean,14,7.087,6.999,7.175,1.06879,1.02152,0.76146,0.55225,0.10547,0.22553,3.35168 16 | Puerto Rico,Latin America and Caribbean,15,7.039,6.794,7.284,1.35943,1.08113,0.77758,0.46823,0.12275,0.22202,3.0076 17 | Germany,Western Europe,16,6.994,6.93,7.058,1.44787,1.09774,0.81487,0.53466,0.28551,0.30452,2.50931 18 | Brazil,Latin America and Caribbean,17,6.952,6.875,7.029,1.08754,1.03938,0.61415,0.40425,0.14166,0.15776,3.50733 19 | Belgium,Western Europe,18,6.929,6.861,6.997,1.42539,1.05249,0.81959,0.51354,0.26248,0.2424,2.61355 20 | Ireland,Western Europe,19,6.907,6.836,6.978,1.48341,1.16157,0.81455,0.54008,0.29754,0.44963,2.15988 21 | Luxembourg,Western Europe,20,6.871,6.804,6.938,1.69752,1.03999,0.84542,0.5487,0.35329,0.27571,2.11055 22 | Mexico,Latin America and Caribbean,21,6.778,6.68,6.876,1.11508,0.7146,0.71143,0.37709,0.18355,0.11735,3.55906 23 | Singapore,Southeastern Asia,22,6.739,6.674,6.804,1.64555,0.86758,0.94719,0.4877,0.46987,0.32706,1.99375 24 | United Kingdom,Western Europe,23,6.725,6.647,6.803,1.40283,1.08672,0.80991,0.50036,0.27399,0.50156,2.14999 25 | Chile,Latin America and Caribbean,24,6.705,6.615,6.795,1.2167,0.90587,0.81883,0.37789,0.11451,0.31595,2.95505 26 | Panama,Latin America and Caribbean,25,6.701,6.601,6.801,1.18306,0.98912,0.70835,0.48927,0.08423,0.2418,3.00559 27 | Argentina,Latin America and Caribbean,26,6.65,6.56,6.74,1.15137,1.06612,0.69711,0.42284,0.07296,0.10989,3.12985 28 | Czech Republic,Central and Eastern Europe,27,6.596,6.515,6.677,1.30915,1.00793,0.76376,0.41418,0.03986,0.09929,2.96211 29 | United Arab Emirates,Middle East and Northern Africa,28,6.573,6.494,6.652,1.57352,0.87114,0.72993,0.56215,0.35561,0.26591,2.21507 30 | Uruguay,Latin America and Caribbean,29,6.545,6.456,6.634,1.18157,1.03143,0.72183,0.54388,0.21394,0.18056,2.67139 31 | Malta,Western Europe,30,6.488,6.409,6.567,1.30782,1.09879,0.80315,0.54994,0.17554,0.56237,1.99032 32 | Colombia,Latin America and Caribbean,31,6.481,6.384,6.578,1.03032,1.02169,0.59659,0.44735,0.05399,0.15626,3.17471 33 | France,Western Europe,32,6.478,6.397,6.559,1.39488,1.00508,0.83795,0.46562,0.17808,0.1216,2.4744 34 | Thailand,Southeastern Asia,33,6.474,6.396,6.552,1.0893,1.04477,0.64915,0.49553,0.02833,0.58696,2.5796 35 | Saudi Arabia,Middle East and Northern Africa,34,6.379,6.287,6.471,1.48953,0.84829,0.59267,0.37904,0.30008,0.15457,2.61482 36 | Taiwan,Eastern Asia,34,6.379,6.305,6.453,1.39729,0.92624,0.79565,0.32377,0.0663,0.25495,2.61523 37 | Qatar,Middle East and Northern Africa,36,6.375,6.178,6.572,1.82427,0.87964,0.71723,0.56679,0.48049,0.32388,1.58224 38 | Spain,Western Europe,37,6.361,6.288,6.434,1.34253,1.12945,0.87896,0.37545,0.06137,0.17665,2.39663 39 | Algeria,Middle East and Northern Africa,38,6.355,6.227,6.483,1.05266,0.83309,0.61804,0.21006,0.16157,0.07044,3.40904 40 | Guatemala,Latin America and Caribbean,39,6.324,6.213,6.435,0.83454,0.87119,0.54039,0.50379,0.08701,0.28808,3.19863 41 | Suriname,Latin America and Caribbean,40,6.269,6.073,6.465,1.09686,0.77866,0.50933,0.52234,0.12692,0.16665,3.06852 42 | Kuwait,Middle East and Northern Africa,41,6.239,6.154,6.324,1.61714,0.87758,0.63569,0.43166,0.23669,0.15965,2.28085 43 | Bahrain,Middle East and Northern Africa,42,6.218,6.128,6.308,1.44024,0.94397,0.65696,0.47375,0.25772,0.17147,2.27405 44 | Trinidad and Tobago,Latin America and Caribbean,43,6.168,5.95,6.386,1.32572,0.98569,0.52608,0.48453,0.01241,0.31935,2.51394 45 | Venezuela,Latin America and Caribbean,44,6.084,5.973,6.195,1.13367,1.03302,0.61904,0.19847,0.08304,0.0425,2.97468 46 | Slovakia,Central and Eastern Europe,45,6.078,5.996,6.16,1.27973,1.08268,0.70367,0.23391,0.02947,0.13837,2.61065 47 | El Salvador,Latin America and Caribbean,46,6.068,5.967,6.169,0.8737,0.80975,0.596,0.37269,0.10613,0.08877,3.22134 48 | Malaysia,Southeastern Asia,47,6.005,5.921,6.089,1.25142,0.88025,0.62366,0.39031,0.09081,0.41474,2.35384 49 | Nicaragua,Latin America and Caribbean,48,5.992,5.877,6.107,0.69384,0.89521,0.65213,0.46582,0.16292,0.29773,2.82428 50 | Uzbekistan,Central and Eastern Europe,49,5.987,5.896,6.078,0.73591,1.1681,0.50163,0.60848,0.28333,0.34326,2.34638 51 | Italy,Western Europe,50,5.977,5.898,6.056,1.35495,1.04167,0.85102,0.18827,0.02556,0.16684,2.34918 52 | Ecuador,Latin America and Caribbean,51,5.976,5.88,6.072,0.97306,0.85974,0.68613,0.4027,0.18037,0.10074,2.77366 53 | Belize,Latin America and Caribbean,52,5.956,5.71,6.202,0.87616,0.68655,0.45569,0.51231,0.10771,0.23684,3.08039 54 | Japan,Eastern Asia,53,5.921,5.85,5.992,1.38007,1.06054,0.91491,0.46761,0.18985,0.10224,1.80584 55 | Kazakhstan,Central and Eastern Europe,54,5.919,5.837,6.001,1.22943,0.95544,0.57386,0.4052,0.11132,0.15011,2.49325 56 | Moldova,Central and Eastern Europe,55,5.897,5.823,5.971,0.69177,0.83132,0.52309,0.25202,0.01903,0.19997,3.38007 57 | Russia,Central and Eastern Europe,56,5.856,5.789,5.923,1.23228,1.05261,0.58991,0.32682,0.03586,0.02736,2.59115 58 | Poland,Central and Eastern Europe,57,5.835,5.749,5.921,1.24585,1.04685,0.69058,0.4519,0.055,0.14443,2.20035 59 | South Korea,Eastern Asia,57,5.835,5.747,5.923,1.35948,0.72194,0.88645,0.25168,0.07716,0.18824,2.35015 60 | Bolivia,Latin America and Caribbean,59,5.822,5.74,5.904,0.79422,0.83779,0.4697,0.50961,0.07746,0.21698,2.91635 61 | Lithuania,Central and Eastern Europe,60,5.813,5.734,5.892,1.2692,1.06411,0.64674,0.18929,0.0182,0.02025,2.60525 62 | Belarus,Central and Eastern Europe,61,5.802,5.723,5.881,1.13062,1.04993,0.63104,0.29091,0.17457,0.13942,2.38582 63 | North Cyprus,Western Europe,62,5.771,5.67,5.872,1.31141,0.81826,0.84142,0.43596,0.16578,0.26322,1.93447 64 | Slovenia,Central and Eastern Europe,63,5.768,5.683,5.853,1.29947,1.05613,0.79151,0.53164,0.03635,0.25738,1.79522 65 | Peru,Latin America and Caribbean,64,5.743,5.647,5.839,0.99602,0.81255,0.62994,0.37502,0.05292,0.14527,2.73117 66 | Turkmenistan,Central and Eastern Europe,65,5.658,5.58,5.736,1.08017,1.03817,0.44006,0.37408,0.28467,0.22567,2.21489 67 | Mauritius,Sub-Saharan Africa,66,5.648,5.507,5.789,1.14372,0.75695,0.66189,0.46145,0.05203,0.36951,2.20223 68 | Libya,Middle East and Northern Africa,67,5.615,5.406,5.824,1.06688,0.95076,0.52304,0.40672,0.10339,0.17087,2.39374 69 | Latvia,Central and Eastern Europe,68,5.56,5.486,5.634,1.21788,0.95025,0.63952,0.27996,0.0889,0.17445,2.20859 70 | Cyprus,Western Europe,69,5.546,5.442,5.65,1.31857,0.70697,0.8488,0.29507,0.05228,0.27906,2.04497 71 | Paraguay,Latin America and Caribbean,70,5.538,5.453,5.623,0.89373,1.11111,0.58295,0.46235,0.07396,0.25296,2.16091 72 | Romania,Central and Eastern Europe,71,5.528,5.427,5.629,1.1697,0.72803,0.67602,0.36712,0.00679,0.12889,2.45184 73 | Estonia,Central and Eastern Europe,72,5.517,5.437,5.597,1.27964,1.05163,0.68098,0.41511,0.18519,0.08423,1.81985 74 | Jamaica,Latin America and Caribbean,73,5.51,5.315,5.705,0.89333,0.96372,0.59469,0.43597,0.04294,0.22245,2.35682 75 | Croatia,Central and Eastern Europe,74,5.488,5.402,5.574,1.18649,0.60809,0.70524,0.23907,0.04002,0.18434,2.52462 76 | Hong Kong,Eastern Asia,75,5.458,5.362,5.554,1.5107,0.87021,0.95277,0.48079,0.31647,0.40097,0.92614 77 | Somalia,Sub-Saharan Africa,76,5.44,5.321,5.559,0,0.33613,0.11466,0.56778,0.3118,0.27225,3.83772 78 | Kosovo,Central and Eastern Europe,77,5.401,5.308,5.494,0.90145,0.66062,0.54,0.14396,0.06547,0.27992,2.80998 79 | Turkey,Middle East and Northern Africa,78,5.389,5.295,5.483,1.16492,0.87717,0.64718,0.23889,0.12348,0.04707,2.29074 80 | Indonesia,Southeastern Asia,79,5.314,5.237,5.391,0.95104,0.87625,0.49374,0.39237,0.00322,0.56521,2.03171 81 | Jordan,Middle East and Northern Africa,80,5.303,5.187,5.419,0.99673,0.86216,0.60712,0.36023,0.13297,0.14262,2.20142 82 | Azerbaijan,Central and Eastern Europe,81,5.291,5.226,5.356,1.12373,0.76042,0.54504,0.35327,0.17914,0.0564,2.2735 83 | Philippines,Southeastern Asia,82,5.279,5.16,5.398,0.81217,0.87877,0.47036,0.54854,0.11757,0.21674,2.23484 84 | China,Eastern Asia,83,5.245,5.199,5.291,1.0278,0.79381,0.73561,0.44012,0.02745,0.04959,2.17087 85 | Bhutan,Southern Asia,84,5.196,5.138,5.254,0.8527,0.90836,0.49759,0.46074,0.1616,0.48546,1.82916 86 | Kyrgyzstan,Central and Eastern Europe,85,5.185,5.103,5.267,0.56044,0.95434,0.55449,0.40212,0.04762,0.38432,2.28136 87 | Serbia,Central and Eastern Europe,86,5.177,5.083,5.271,1.03437,0.81329,0.6458,0.15718,0.04339,0.20737,2.27539 88 | Bosnia and Herzegovina,Central and Eastern Europe,87,5.163,5.063,5.263,0.93383,0.64367,0.70766,0.09511,0,0.29889,2.48406 89 | Montenegro,Central and Eastern Europe,88,5.161,5.055,5.267,1.07838,0.74173,0.63533,0.15111,0.12721,0.17191,2.25531 90 | Dominican Republic,Latin America and Caribbean,89,5.155,5.037,5.273,1.02787,0.99496,0.57669,0.52259,0.12372,0.21286,1.69626 91 | Morocco,Middle East and Northern Africa,90,5.151,5.058,5.244,0.84058,0.38595,0.59471,0.25646,0.08404,0.04053,2.94891 92 | Hungary,Central and Eastern Europe,91,5.145,5.056,5.234,1.24142,0.93164,0.67608,0.1977,0.04472,0.099,1.95473 93 | Pakistan,Southern Asia,92,5.132,5.038,5.226,0.68816,0.26135,0.40306,0.14622,0.1388,0.31185,3.18286 94 | Lebanon,Middle East and Northern Africa,93,5.129,5.031,5.227,1.12268,0.64184,0.76171,0.26228,0.03061,0.23693,2.07339 95 | Portugal,Western Europe,94,5.123,5.03,5.216,1.27607,0.94367,0.79363,0.44727,0.01521,0.11691,1.53015 96 | Macedonia,Central and Eastern Europe,95,5.121,5.017,5.225,1.0193,0.78236,0.64738,0.27668,0.07047,0.23507,2.08947 97 | Vietnam,Southeastern Asia,96,5.061,4.991,5.131,0.74037,0.79117,0.66157,0.55954,0.11556,0.25075,1.9418 98 | Somaliland Region,Sub-Saharan Africa,97,5.057,4.934,5.18,0.25558,0.75862,0.33108,0.3913,0.36794,0.51479,2.43801 99 | Tunisia,Middle East and Northern Africa,98,5.045,4.965,5.125,0.97724,0.43165,0.59577,0.23553,0.0817,0.03936,2.68413 100 | Greece,Western Europe,99,5.033,4.935,5.131,1.24886,0.75473,0.80029,0.05822,0.04127,0,2.12944 101 | Tajikistan,Central and Eastern Europe,100,4.996,4.923,5.069,0.48835,0.75602,0.53119,0.43408,0.13509,0.25998,2.39106 102 | Mongolia,Eastern Asia,101,4.907,4.838,4.976,0.98853,1.08983,0.55469,0.35972,0.03285,0.34539,1.53586 103 | Laos,Southeastern Asia,102,4.876,4.742,5.01,0.68042,0.5497,0.38291,0.52168,0.22423,0.43079,2.08637 104 | Nigeria,Sub-Saharan Africa,103,4.875,4.75,5,0.75216,0.64498,0.05108,0.27854,0.0305,0.23219,2.88586 105 | Honduras,Latin America and Caribbean,104,4.871,4.75,4.992,0.69429,0.75596,0.58383,0.26755,0.06906,0.2044,2.29551 106 | Iran,Middle East and Northern Africa,105,4.813,4.703,4.923,1.11758,0.38857,0.64232,0.22544,0.0557,0.38538,1.99817 107 | Zambia,Sub-Saharan Africa,106,4.795,4.645,4.945,0.61202,0.6376,0.23573,0.42662,0.11479,0.17866,2.58991 108 | Nepal,Southern Asia,107,4.793,4.698,4.888,0.44626,0.69699,0.50073,0.37012,0.07008,0.3816,2.32694 109 | Palestinian Territories,Middle East and Northern Africa,108,4.754,4.649,4.859,0.67024,0.71629,0.56844,0.17744,0.10613,0.11154,2.40364 110 | Albania,Central and Eastern Europe,109,4.655,4.546,4.764,0.9553,0.50163,0.73007,0.31866,0.05301,0.1684,1.92816 111 | Bangladesh,Southern Asia,110,4.643,4.56,4.726,0.54177,0.24749,0.52989,0.39778,0.12583,0.19132,2.60904 112 | Sierra Leone,Sub-Saharan Africa,111,4.635,4.505,4.765,0.36485,0.628,0,0.30685,0.08196,0.23897,3.01402 113 | Iraq,Middle East and Northern Africa,112,4.575,4.446,4.704,1.07474,0.59205,0.51076,0.24856,0.13636,0.19589,1.81657 114 | Namibia,Sub-Saharan Africa,113,4.574,4.374,4.774,0.93287,0.70362,0.34745,0.48614,0.10398,0.07795,1.92198 115 | Cameroon,Sub-Saharan Africa,114,4.513,4.417,4.609,0.52497,0.62542,0.12698,0.42736,0.06126,0.2268,2.5198 116 | Ethiopia,Sub-Saharan Africa,115,4.508,4.425,4.591,0.29283,0.37932,0.34578,0.36703,0.1717,0.29522,2.65614 117 | South Africa,Sub-Saharan Africa,116,4.459,4.371,4.547,1.02416,0.96053,0.18611,0.42483,0.08415,0.13656,1.64227 118 | Sri Lanka,Southern Asia,117,4.415,4.322,4.508,0.97318,0.84783,0.62007,0.50817,0.07964,0.46978,0.91681 119 | India,Southern Asia,118,4.404,4.351,4.457,0.74036,0.29247,0.45091,0.40285,0.08722,0.25028,2.18032 120 | Myanmar,Southeastern Asia,119,4.395,4.327,4.463,0.34112,0.69981,0.3988,0.42692,0.20243,0.81971,1.50655 121 | Egypt,Middle East and Northern Africa,120,4.362,4.259,4.465,0.95395,0.49813,0.52116,0.18847,0.10393,0.12706,1.96895 122 | Armenia,Central and Eastern Europe,121,4.36,4.266,4.454,0.86086,0.62477,0.64083,0.14037,0.03616,0.07793,1.97864 123 | Kenya,Sub-Saharan Africa,122,4.356,4.259,4.453,0.52267,0.7624,0.30147,0.40576,0.06686,0.41328,1.88326 124 | Ukraine,Central and Eastern Europe,123,4.324,4.236,4.412,0.87287,1.01413,0.58628,0.12859,0.01829,0.20363,1.50066 125 | Ghana,Sub-Saharan Africa,124,4.276,4.185,4.367,0.63107,0.49353,0.29681,0.40973,0.0326,0.21203,2.2002 126 | Congo (Kinshasa),Sub-Saharan Africa,125,4.272,4.191,4.353,0.05661,0.80676,0.188,0.15602,0.06075,0.25458,2.74924 127 | Georgia,Central and Eastern Europe,126,4.252,4.164,4.34,0.83792,0.19249,0.64035,0.32461,0.3188,0.06786,1.87031 128 | Congo (Brazzaville),Sub-Saharan Africa,127,4.236,4.107,4.365,0.77109,0.47799,0.28212,0.37938,0.09753,0.12077,2.10681 129 | Senegal,Sub-Saharan Africa,128,4.219,4.151,4.287,0.44314,0.77416,0.40457,0.31056,0.11681,0.19103,1.97861 130 | Bulgaria,Central and Eastern Europe,129,4.217,4.104,4.33,1.11306,0.92542,0.67806,0.21219,0.00615,0.12793,1.15377 131 | Mauritania,Sub-Saharan Africa,130,4.201,4.127,4.275,0.61391,0.84142,0.28639,0.1268,0.17955,0.22686,1.9263 132 | Zimbabwe,Sub-Saharan Africa,131,4.193,4.101,4.285,0.35041,0.71478,0.1595,0.25429,0.08582,0.18503,2.4427 133 | Malawi,Sub-Saharan Africa,132,4.156,4.041,4.271,0.08709,0.147,0.29364,0.4143,0.07564,0.30968,2.82859 134 | Sudan,Sub-Saharan Africa,133,4.139,3.928,4.35,0.63069,0.81928,0.29759,0,0.10039,0.18077,2.10995 135 | Gabon,Sub-Saharan Africa,134,4.121,4.03,4.212,1.15851,0.72368,0.3494,0.28098,0.09314,0.06244,1.45332 136 | Mali,Sub-Saharan Africa,135,4.073,3.988,4.158,0.31292,0.86333,0.16347,0.27544,0.13647,0.21064,2.11087 137 | Haiti,Latin America and Caribbean,136,4.028,3.893,4.163,0.34097,0.29561,0.27494,0.12072,0.14476,0.47958,2.37116 138 | Botswana,Sub-Saharan Africa,137,3.974,3.875,4.073,1.09426,0.89186,0.34752,0.44089,0.10769,0.12425,0.96741 139 | Comoros,Sub-Saharan Africa,138,3.956,3.86,4.052,0.27509,0.60323,0.29981,0.15412,0.18437,0.1827,2.25632 140 | Ivory Coast,Sub-Saharan Africa,139,3.916,3.826,4.006,0.55507,0.57576,0.04476,0.40663,0.1553,0.20338,1.97478 141 | Cambodia,Southeastern Asia,140,3.907,3.798,4.016,0.55604,0.5375,0.42494,0.58852,0.08092,0.40339,1.31573 142 | Angola,Sub-Saharan Africa,141,3.866,3.753,3.979,0.84731,0.66366,0.04991,0.00589,0.08434,0.12071,2.09459 143 | Niger,Sub-Saharan Africa,142,3.856,3.781,3.931,0.1327,0.6053,0.26162,0.38041,0.17176,0.2097,2.09469 144 | South Sudan,Sub-Saharan Africa,143,3.832,3.596,4.068,0.39394,0.18519,0.15781,0.19662,0.13015,0.25899,2.50929 145 | Chad,Sub-Saharan Africa,144,3.763,3.672,3.854,0.42214,0.63178,0.03824,0.12807,0.04952,0.18667,2.30637 146 | Burkina Faso,Sub-Saharan Africa,145,3.739,3.647,3.831,0.31995,0.63054,0.21297,0.3337,0.12533,0.24353,1.87319 147 | Uganda,Sub-Saharan Africa,145,3.739,3.629,3.849,0.34719,0.90981,0.19625,0.43653,0.06442,0.27102,1.51416 148 | Yemen,Middle East and Northern Africa,147,3.724,3.621,3.827,0.57939,0.47493,0.31048,0.2287,0.05892,0.09821,1.97295 149 | Madagascar,Sub-Saharan Africa,148,3.695,3.621,3.769,0.27954,0.46115,0.37109,0.13684,0.07506,0.2204,2.15075 150 | Tanzania,Sub-Saharan Africa,149,3.666,3.561,3.771,0.47155,0.77623,0.357,0.3176,0.05099,0.31472,1.37769 151 | Liberia,Sub-Saharan Africa,150,3.622,3.463,3.781,0.10706,0.50353,0.23165,0.25748,0.04852,0.24063,2.23284 152 | Guinea,Sub-Saharan Africa,151,3.607,3.533,3.681,0.22415,0.3109,0.18829,0.30953,0.1192,0.29914,2.15604 153 | Rwanda,Sub-Saharan Africa,152,3.515,3.444,3.586,0.32846,0.61586,0.31865,0.5432,0.50521,0.23552,0.96819 154 | Benin,Sub-Saharan Africa,153,3.484,3.404,3.564,0.39499,0.10419,0.21028,0.39747,0.06681,0.2018,2.10812 155 | Afghanistan,Southern Asia,154,3.36,3.288,3.432,0.38227,0.11037,0.17344,0.1643,0.07112,0.31268,2.14558 156 | Togo,Sub-Saharan Africa,155,3.303,3.192,3.414,0.28123,0,0.24811,0.34678,0.11587,0.17517,2.1354 157 | Syria,Middle East and Northern Africa,156,3.069,2.936,3.202,0.74719,0.14866,0.62994,0.06912,0.17233,0.48397,0.81789 158 | Burundi,Sub-Saharan Africa,157,2.905,2.732,3.078,0.06831,0.23442,0.15747,0.0432,0.09419,0.2029,2.10404 159 | -------------------------------------------------------------------------------- /data/world-happiness/2017.csv: -------------------------------------------------------------------------------- 1 | "Country","Happiness.Rank","Happiness.Score","Whisker.high","Whisker.low","Economy..GDP.per.Capita.","Family","Health..Life.Expectancy.","Freedom","Generosity","Trust..Government.Corruption.","Dystopia.Residual" 2 | "Norway",1,7.53700017929077,7.59444482058287,7.47955553799868,1.61646318435669,1.53352355957031,0.796666502952576,0.635422587394714,0.36201223731041,0.315963834524155,2.27702665328979 3 | "Denmark",2,7.52199983596802,7.58172806486487,7.46227160707116,1.48238301277161,1.55112159252167,0.792565524578094,0.626006722450256,0.355280488729477,0.40077006816864,2.31370735168457 4 | "Iceland",3,7.50400018692017,7.62203047305346,7.38596990078688,1.480633020401,1.6105740070343,0.833552122116089,0.627162635326385,0.475540220737457,0.153526559472084,2.32271528244019 5 | "Switzerland",4,7.49399995803833,7.56177242040634,7.42622749567032,1.56497955322266,1.51691174507141,0.858131289482117,0.620070576667786,0.290549278259277,0.367007285356522,2.2767162322998 6 | "Finland",5,7.4689998626709,7.52754207581282,7.41045764952898,1.44357192516327,1.5402467250824,0.80915766954422,0.617950856685638,0.24548277258873,0.38261154294014,2.4301815032959 7 | "Netherlands",6,7.3769998550415,7.42742584124207,7.32657386884093,1.50394463539124,1.42893922328949,0.810696125030518,0.585384488105774,0.470489829778671,0.282661825418472,2.29480409622192 8 | "Canada",7,7.31599998474121,7.38440283536911,7.24759713411331,1.47920441627502,1.48134899139404,0.83455765247345,0.611100912094116,0.435539722442627,0.287371516227722,2.18726444244385 9 | "New Zealand",8,7.31400012969971,7.3795104418695,7.24848981752992,1.40570604801178,1.54819512367249,0.816759705543518,0.614062130451202,0.500005125999451,0.382816702127457,2.0464563369751 10 | "Sweden",9,7.28399991989136,7.34409487739205,7.22390496239066,1.49438726902008,1.47816216945648,0.830875158309937,0.612924098968506,0.385399252176285,0.384398728609085,2.09753799438477 11 | "Australia",10,7.28399991989136,7.35665122494102,7.2113486148417,1.484414935112,1.51004195213318,0.84388679265976,0.601607382297516,0.477699249982834,0.301183730363846,2.06521081924438 12 | "Israel",11,7.21299982070923,7.27985325649381,7.14614638492465,1.37538242340088,1.37628996372223,0.83840399980545,0.405988603830338,0.330082654953003,0.0852421000599861,2.80175733566284 13 | "Costa Rica",12,7.0789999961853,7.16811166629195,6.98988832607865,1.10970628261566,1.41640365123749,0.759509265422821,0.580131649971008,0.214613229036331,0.100106589496136,2.89863920211792 14 | "Austria",13,7.00600004196167,7.07066981211305,6.94133027181029,1.48709726333618,1.4599449634552,0.815328419208527,0.567766189575195,0.316472321748734,0.221060365438461,2.1385064125061 15 | "United States",14,6.99300003051758,7.07465674757957,6.91134331345558,1.54625928401947,1.41992056369781,0.77428662776947,0.505740523338318,0.392578780651093,0.135638788342476,2.2181134223938 16 | "Ireland",15,6.97700023651123,7.04335166752338,6.91064880549908,1.53570663928986,1.55823111534119,0.80978262424469,0.573110342025757,0.42785832285881,0.29838815331459,1.77386903762817 17 | "Germany",16,6.95100021362305,7.00538156926632,6.89661885797977,1.48792338371277,1.47252035140991,0.798950731754303,0.562511384487152,0.336269170045853,0.276731938123703,2.01576995849609 18 | "Belgium",17,6.89099979400635,6.95582075044513,6.82617883756757,1.46378076076508,1.46231269836426,0.818091869354248,0.539770722389221,0.231503337621689,0.251343131065369,2.12421035766602 19 | "Luxembourg",18,6.86299991607666,6.92368609987199,6.80231373228133,1.74194359779358,1.45758366584778,0.845089495182037,0.59662789106369,0.283180981874466,0.31883442401886,1.61951208114624 20 | "United Kingdom",19,6.71400022506714,6.78379176110029,6.64420868903399,1.44163393974304,1.49646008014679,0.805335938930511,0.508190035820007,0.492774158716202,0.265428066253662,1.70414352416992 21 | "Chile",20,6.65199995040894,6.73925056010485,6.56474934071302,1.25278460979462,1.28402495384216,0.819479703903198,0.376895278692245,0.326662421226501,0.0822879821062088,2.50958585739136 22 | "United Arab Emirates",21,6.64799976348877,6.72204730376601,6.57395222321153,1.62634336948395,1.26641023159027,0.726798236370087,0.60834527015686,0.3609419465065,0.324489563703537,1.734703540802 23 | "Brazil",22,6.63500022888184,6.72546950161457,6.5445309561491,1.10735321044922,1.43130600452423,0.616552352905273,0.437453746795654,0.16234989464283,0.111092761158943,2.76926708221436 24 | "Czech Republic",23,6.60900020599365,6.68386246263981,6.5341379493475,1.35268235206604,1.43388521671295,0.754444003105164,0.490946173667908,0.0881067588925362,0.0368729270994663,2.45186185836792 25 | "Argentina",24,6.59899997711182,6.69008508607745,6.50791486814618,1.18529546260834,1.44045114517212,0.695137083530426,0.494519203901291,0.109457060694695,0.059739887714386,2.61400532722473 26 | "Mexico",25,6.57800006866455,6.67114890769124,6.48485122963786,1.15318381786346,1.210862159729,0.709978997707367,0.412730008363724,0.120990432798862,0.132774114608765,2.83715486526489 27 | "Singapore",26,6.57200002670288,6.63672306910157,6.50727698430419,1.69227766990662,1.35381436347961,0.949492394924164,0.549840569496155,0.345965981483459,0.46430778503418,1.21636199951172 28 | "Malta",27,6.52699995040894,6.59839677289128,6.45560312792659,1.34327983856201,1.48841166496277,0.821944236755371,0.588767051696777,0.574730575084686,0.153066068887711,1.55686283111572 29 | "Uruguay",28,6.4539999961853,6.54590621769428,6.36209377467632,1.21755969524384,1.41222786903381,0.719216823577881,0.57939225435257,0.175096929073334,0.178061872720718,2.17240953445435 30 | "Guatemala",29,6.4539999961853,6.56687397271395,6.34112601965666,0.872001945972443,1.25558519363403,0.540239989757538,0.531310617923737,0.283488392829895,0.0772232785820961,2.89389109611511 31 | "Panama",30,6.4520001411438,6.55713071614504,6.34686956614256,1.23374843597412,1.37319254875183,0.706156134605408,0.550026834011078,0.21055693924427,0.070983923971653,2.30719995498657 32 | "France",31,6.44199991226196,6.51576780244708,6.36823202207685,1.43092346191406,1.38777685165405,0.844465851783752,0.470222115516663,0.129762306809425,0.172502428293228,2.00595474243164 33 | "Thailand",32,6.42399978637695,6.50911685571074,6.33888271704316,1.12786877155304,1.42579245567322,0.647239029407501,0.580200731754303,0.572123110294342,0.0316127352416515,2.03950834274292 34 | "Taiwan Province of China",33,6.42199993133545,6.49459602192044,6.34940384075046,1.43362653255463,1.38456535339355,0.793984234333038,0.361466586589813,0.258360475301743,0.0638292357325554,2.1266074180603 35 | "Spain",34,6.40299987792969,6.4710548453033,6.33494491055608,1.38439786434174,1.53209090232849,0.888960599899292,0.408781230449677,0.190133571624756,0.0709140971302986,1.92775774002075 36 | "Qatar",35,6.375,6.56847681432962,6.18152318567038,1.87076568603516,1.27429687976837,0.710098087787628,0.604130983352661,0.330473870038986,0.439299255609512,1.1454644203186 37 | "Colombia",36,6.35699987411499,6.45202005416155,6.26197969406843,1.07062232494354,1.4021829366684,0.595027923583984,0.477487415075302,0.149014472961426,0.0466687418520451,2.61606812477112 38 | "Saudi Arabia",37,6.3439998626709,6.44416661202908,6.24383311331272,1.53062355518341,1.28667759895325,0.590148329734802,0.449750572443008,0.147616013884544,0.27343225479126,2.0654296875 39 | "Trinidad and Tobago",38,6.16800022125244,6.38153389066458,5.95446655184031,1.36135590076447,1.3802285194397,0.519983291625977,0.518630743026733,0.325296461582184,0.00896481610834599,2.05324745178223 40 | "Kuwait",39,6.10500001907349,6.1919569888711,6.01804304927588,1.63295245170593,1.25969874858856,0.632105708122253,0.496337592601776,0.228289797902107,0.215159550309181,1.64042520523071 41 | "Slovakia",40,6.09800004959106,6.1773484121263,6.01865168705583,1.32539355754852,1.50505924224854,0.712732911109924,0.295817464590073,0.136544480919838,0.0242108516395092,2.09777665138245 42 | "Bahrain",41,6.08699989318848,6.17898906782269,5.99501071855426,1.48841226100922,1.32311046123505,0.653133034706116,0.536746919155121,0.172668486833572,0.257042169570923,1.65614938735962 43 | "Malaysia",42,6.08400011062622,6.17997963652015,5.98802058473229,1.29121541976929,1.28464603424072,0.618784427642822,0.402264982461929,0.416608929634094,0.0656007081270218,2.00444889068604 44 | "Nicaragua",43,6.07100009918213,6.18658360034227,5.95541659802198,0.737299203872681,1.28721570968628,0.653095960617065,0.447551846504211,0.301674216985703,0.130687981843948,2.51393055915833 45 | "Ecuador",44,6.00799989700317,6.10584767535329,5.91015211865306,1.00082039833069,1.28616881370544,0.685636222362518,0.4551981985569,0.150112465023994,0.140134647488594,2.29035258293152 46 | "El Salvador",45,6.00299978256226,6.108635122329,5.89736444279552,0.909784495830536,1.18212509155273,0.596018552780151,0.432452529668808,0.0782579854130745,0.0899809598922729,2.7145938873291 47 | "Poland",46,5.97300004959106,6.05390834122896,5.89209175795317,1.29178786277771,1.44571197032928,0.699475347995758,0.520342111587524,0.158465966582298,0.0593078061938286,1.79772281646729 48 | "Uzbekistan",47,5.97100019454956,6.06553757295012,5.876462816149,0.786441087722778,1.54896914958954,0.498272627592087,0.658248662948608,0.415983647108078,0.246528223156929,1.81691360473633 49 | "Italy",48,5.96400022506714,6.04273690596223,5.88526354417205,1.39506661891937,1.44492328166962,0.853144347667694,0.256450712680817,0.17278964817524,0.0280280914157629,1.81331205368042 50 | "Russia",49,5.96299982070923,6.03027490749955,5.89572473391891,1.28177809715271,1.46928238868713,0.547349333763123,0.373783111572266,0.0522638224065304,0.0329628810286522,2.20560741424561 51 | "Belize",50,5.95599985122681,6.19724231779575,5.71475738465786,0.907975316047668,1.08141779899597,0.450191766023636,0.547509372234344,0.240015640854836,0.0965810716152191,2.63195562362671 52 | "Japan",51,5.92000007629395,5.99071944460273,5.84928070798516,1.41691517829895,1.43633782863617,0.913475871086121,0.505625545978546,0.12057276815176,0.163760736584663,1.36322355270386 53 | "Lithuania",52,5.90199995040894,5.98266964137554,5.82133025944233,1.31458234786987,1.47351610660553,0.62894994020462,0.234231784939766,0.010164656676352,0.0118656428530812,2.22844052314758 54 | "Algeria",53,5.87200021743774,5.97828643366694,5.76571400120854,1.09186446666718,1.1462174654007,0.617584645748138,0.233335807919502,0.0694366469979286,0.146096110343933,2.56760382652283 55 | "Latvia",54,5.84999990463257,5.92026353821158,5.77973627105355,1.26074862480164,1.40471494197845,0.638566970825195,0.325707912445068,0.153074786067009,0.0738427266478539,1.99365520477295 56 | "South Korea",55,5.83799982070923,5.92255902826786,5.7534406131506,1.40167844295502,1.12827444076538,0.900214076042175,0.257921665906906,0.206674367189407,0.0632826685905457,1.88037800788879 57 | "Moldova",56,5.83799982070923,5.90837083846331,5.76762880295515,0.728870630264282,1.25182557106018,0.589465200901031,0.240729048848152,0.208779126405716,0.0100912861526012,2.80780839920044 58 | "Romania",57,5.82499980926514,5.91969415679574,5.73030546173453,1.21768391132355,1.15009129047394,0.685158312320709,0.457003742456436,0.133519917726517,0.00438790069893003,2.17683148384094 59 | "Bolivia",58,5.82299995422363,5.9039769025147,5.74202300593257,0.833756566047668,1.22761905193329,0.473630249500275,0.558732926845551,0.22556072473526,0.0604777261614799,2.44327902793884 60 | "Turkmenistan",59,5.82200002670288,5.88518087550998,5.75881917789578,1.13077676296234,1.49314916133881,0.437726080417633,0.41827192902565,0.24992498755455,0.259270340204239,1.83290982246399 61 | "Kazakhstan",60,5.81899976730347,5.90364177465439,5.73435775995255,1.28455626964569,1.38436901569366,0.606041550636292,0.437454283237457,0.201964423060417,0.119282886385918,1.78489255905151 62 | "North Cyprus",61,5.80999994277954,5.89736646488309,5.72263342067599,1.3469113111496,1.18630337715149,0.834647238254547,0.471203625202179,0.266845703125,0.155353352427483,1.54915761947632 63 | "Slovenia",62,5.75799989700317,5.84222516000271,5.67377463400364,1.3412059545517,1.45251882076263,0.790828227996826,0.572575807571411,0.242649093270302,0.0451289787888527,1.31331729888916 64 | "Peru",63,5.71500015258789,5.81194677859545,5.61805352658033,1.03522527217865,1.21877038478851,0.630166113376617,0.450002878904343,0.126819714903831,0.0470490865409374,2.20726943016052 65 | "Mauritius",64,5.62900018692017,5.72986219167709,5.52813818216324,1.18939554691315,1.20956099033356,0.638007462024689,0.491247326135635,0.360933750867844,0.0421815551817417,1.6975839138031 66 | "Cyprus",65,5.62099981307983,5.71469269931316,5.5273069268465,1.35593807697296,1.13136327266693,0.84471470117569,0.355111539363861,0.271254301071167,0.0412379764020443,1.62124919891357 67 | "Estonia",66,5.61100006103516,5.68813987419009,5.53386024788022,1.32087934017181,1.47667109966278,0.695168316364288,0.479131430387497,0.0988908112049103,0.183248922228813,1.35750865936279 68 | "Belarus",67,5.56899976730347,5.64611424401402,5.49188529059291,1.15655755996704,1.44494521617889,0.637714266777039,0.295400261878967,0.15513750910759,0.156313821673393,1.72323298454285 69 | "Libya",68,5.52500009536743,5.67695380687714,5.37304638385773,1.10180306434631,1.35756433010101,0.520169019699097,0.465733230113983,0.152073666453362,0.0926102101802826,1.83501124382019 70 | "Turkey",69,5.5,5.59486496329308,5.40513503670692,1.19827437400818,1.33775317668915,0.637605607509613,0.300740599632263,0.0466930419206619,0.0996715798974037,1.87927794456482 71 | "Paraguay",70,5.49300003051758,5.57738126963377,5.40861879140139,0.932537317276001,1.50728487968445,0.579250693321228,0.473507791757584,0.224150657653809,0.091065913438797,1.6853334903717 72 | "Hong Kong S.A.R., China",71,5.47200012207031,5.54959417313337,5.39440607100725,1.55167484283447,1.26279091835022,0.943062424659729,0.490968644618988,0.374465793371201,0.293933749198914,0.554633140563965 73 | "Philippines",72,5.42999982833862,5.54533505424857,5.31466460242867,0.85769921541214,1.25391757488251,0.468009054660797,0.585214674472809,0.193513423204422,0.0993318930268288,1.97260475158691 74 | "Serbia",73,5.39499998092651,5.49156965613365,5.29843030571938,1.06931757926941,1.25818979740143,0.65078467130661,0.208715528249741,0.220125883817673,0.0409037806093693,1.94708442687988 75 | "Jordan",74,5.33599996566772,5.44841002240777,5.22358990892768,0.991012394428253,1.23908889293671,0.604590058326721,0.418421149253845,0.172170460224152,0.11980327218771,1.79117655754089 76 | "Hungary",75,5.32399988174438,5.40303970918059,5.24496005430818,1.2860119342804,1.34313309192657,0.687763452529907,0.175863519310951,0.0784016624093056,0.0366369374096394,1.71645927429199 77 | "Jamaica",76,5.31099987030029,5.58139872848988,5.04060101211071,0.925579309463501,1.36821806430817,0.641022384166718,0.474307239055634,0.233818337321281,0.0552677810192108,1.61232566833496 78 | "Croatia",77,5.29300022125244,5.39177720457315,5.19422323793173,1.22255623340607,0.96798300743103,0.701288521289825,0.255772292613983,0.248002976179123,0.0431031100451946,1.85449242591858 79 | "Kosovo",78,5.27899980545044,5.36484799548984,5.19315161541104,0.951484382152557,1.13785350322723,0.541452050209045,0.260287940502167,0.319931447505951,0.0574716180562973,2.01054072380066 80 | "China",79,5.27299976348877,5.31927808977663,5.2267214372009,1.08116579055786,1.16083741188049,0.741415500640869,0.472787708044052,0.0288068410009146,0.0227942746132612,1.76493859291077 81 | "Pakistan",80,5.26900005340576,5.35998364135623,5.17801646545529,0.72688353061676,0.672690689563751,0.402047783136368,0.23521526157856,0.315446019172668,0.124348066747189,2.79248929023743 82 | "Indonesia",81,5.26200008392334,5.35288859814405,5.17111156970263,0.995538592338562,1.27444469928741,0.492345720529556,0.443323463201523,0.611704587936401,0.0153171354904771,1.42947697639465 83 | "Venezuela",82,5.25,5.3700319455564,5.1299680544436,1.12843120098114,1.43133759498596,0.617144227027893,0.153997123241425,0.0650196298956871,0.0644911229610443,1.78946375846863 84 | "Montenegro",83,5.23699998855591,5.34104444056749,5.13295553654432,1.12112903594971,1.23837649822235,0.667464673519135,0.194989055395126,0.197911024093628,0.0881741940975189,1.72919154167175 85 | "Morocco",84,5.2350001335144,5.31834096476436,5.15165930226445,0.878114581108093,0.774864435195923,0.59771066904068,0.408158332109451,0.0322099551558495,0.0877631828188896,2.45618939399719 86 | "Azerbaijan",85,5.23400020599365,5.29928653523326,5.16871387675405,1.15360176563263,1.15240025520325,0.540775775909424,0.398155838251114,0.0452693402767181,0.180987507104874,1.76248168945312 87 | "Dominican Republic",86,5.23000001907349,5.34906088516116,5.11093915298581,1.07937383651733,1.40241670608521,0.574873745441437,0.55258983373642,0.186967849731445,0.113945253193378,1.31946516036987 88 | "Greece",87,5.22700023651123,5.3252461694181,5.12875430360436,1.28948748111725,1.23941457271576,0.810198903083801,0.0957312509417534,0,0.04328977689147,1.74922156333923 89 | "Lebanon",88,5.22499990463257,5.31888228848577,5.13111752077937,1.07498753070831,1.12962424755096,0.735081076622009,0.288515985012054,0.264450758695602,0.037513829767704,1.69507384300232 90 | "Portugal",89,5.19500017166138,5.28504173308611,5.10495861023665,1.3151752948761,1.36704301834106,0.795843541622162,0.498465299606323,0.0951027125120163,0.0158694516867399,1.10768270492554 91 | "Bosnia and Herzegovina",90,5.18200016021729,5.27633568674326,5.08766463369131,0.982409417629242,1.0693359375,0.705186307430267,0.204403176903725,0.328867495059967,0,1.89217257499695 92 | "Honduras",91,5.18100023269653,5.30158279687166,5.0604176685214,0.730573117733002,1.14394497871399,0.582569479942322,0.348079860210419,0.236188873648643,0.0733454525470734,2.06581115722656 93 | "Macedonia",92,5.17500019073486,5.27217263966799,5.07782774180174,1.06457793712616,1.20789301395416,0.644948184490204,0.325905978679657,0.25376096367836,0.0602777935564518,1.6174693107605 94 | "Somalia",93,5.15100002288818,5.24248370990157,5.0595163358748,0.0226431842893362,0.721151351928711,0.113989137113094,0.602126955986023,0.291631311178207,0.282410323619843,3.11748456954956 95 | "Vietnam",94,5.07399988174438,5.14728076457977,5.000718998909,0.788547575473785,1.27749133110046,0.652168989181519,0.571055591106415,0.234968051314354,0.0876332372426987,1.46231865882874 96 | "Nigeria",95,5.07399988174438,5.20950013548136,4.93849962800741,0.783756256103516,1.21577048301697,0.0569157302379608,0.394952565431595,0.230947196483612,0.0261215660721064,2.36539053916931 97 | "Tajikistan",96,5.04099988937378,5.11142559587956,4.970574182868,0.524713635444641,1.27146327495575,0.529235124588013,0.471566706895828,0.248997643589973,0.146377146244049,1.84904932975769 98 | "Bhutan",97,5.01100015640259,5.07933456212282,4.94266575068235,0.885416388511658,1.34012651443481,0.495879292488098,0.501537680625916,0.474054545164108,0.173380389809608,1.14018440246582 99 | "Kyrgyzstan",98,5.00400018692017,5.08991990312934,4.91808047071099,0.596220076084137,1.39423859119415,0.553457796573639,0.454943388700485,0.42858037352562,0.0394391790032387,1.53672313690186 100 | "Nepal",99,4.96199989318848,5.06735607936978,4.85664370700717,0.479820191860199,1.17928326129913,0.504130780696869,0.440305948257446,0.394096165895462,0.0729755461215973,1.8912410736084 101 | "Mongolia",100,4.95499992370605,5.0216795091331,4.88832033827901,1.02723586559296,1.4930112361908,0.557783484458923,0.394143968820572,0.338464230298996,0.0329022891819477,1.11129236221313 102 | "South Africa",101,4.8289999961853,4.92943518772721,4.72856480464339,1.05469870567322,1.38478863239288,0.187080070376396,0.479246735572815,0.139362379908562,0.0725094974040985,1.51090860366821 103 | "Tunisia",102,4.80499982833862,4.88436700701714,4.72563264966011,1.00726580619812,0.868351459503174,0.613212049007416,0.289680689573288,0.0496933571994305,0.0867231488227844,1.89025115966797 104 | "Palestinian Territories",103,4.77500009536743,4.88184834256768,4.66815184816718,0.716249227523804,1.15564715862274,0.565666973590851,0.25471106171608,0.114173173904419,0.0892826020717621,1.8788902759552 105 | "Egypt",104,4.7350001335144,4.82513378962874,4.64486647740006,0.989701807498932,0.997471392154694,0.520187258720398,0.282110154628754,0.128631442785263,0.114381365478039,1.70216107368469 106 | "Bulgaria",105,4.71400022506714,4.80369470641017,4.62430574372411,1.1614590883255,1.43437945842743,0.708217680454254,0.289231717586517,0.113177694380283,0.0110515309497714,0.996139287948608 107 | "Sierra Leone",106,4.70900011062622,4.85064333498478,4.56735688626766,0.36842092871666,0.984136044979095,0.00556475389748812,0.318697690963745,0.293040901422501,0.0710951760411263,2.66845989227295 108 | "Cameroon",107,4.69500017166138,4.79654085725546,4.5934594860673,0.564305365085602,0.946018218994141,0.132892116904259,0.430388748645782,0.236298456788063,0.0513066314160824,2.3336455821991 109 | "Iran",108,4.69199991226196,4.79822470769286,4.58577511683106,1.15687310695648,0.711551249027252,0.639333188533783,0.249322608113289,0.387242913246155,0.048761073499918,1.49873495101929 110 | "Albania",109,4.64400005340576,4.75246400639415,4.53553610041738,0.996192753314972,0.803685247898102,0.731159746646881,0.381498634815216,0.201312944293022,0.0398642159998417,1.49044156074524 111 | "Bangladesh",110,4.60799980163574,4.68982165828347,4.52617794498801,0.586682975292206,0.735131740570068,0.533241033554077,0.478356659412384,0.172255352139473,0.123717859387398,1.97873616218567 112 | "Namibia",111,4.57399988174438,4.77035474091768,4.37764502257109,0.964434325695038,1.0984708070755,0.33861181139946,0.520303547382355,0.0771337449550629,0.0931469723582268,1.4818902015686 113 | "Kenya",112,4.55299997329712,4.65569159060717,4.45030835598707,0.560479462146759,1.06795072555542,0.309988349676132,0.452763766050339,0.444860309362411,0.0646413192152977,1.6519021987915 114 | "Mozambique",113,4.55000019073486,4.77410232633352,4.3258980551362,0.234305649995804,0.870701014995575,0.106654435396194,0.480791091918945,0.322228103876114,0.179436385631561,2.35565090179443 115 | "Myanmar",114,4.54500007629395,4.61473994642496,4.47526020616293,0.367110550403595,1.12323594093323,0.397522568702698,0.514492034912109,0.838075160980225,0.188816204667091,1.11529040336609 116 | "Senegal",115,4.53499984741211,4.6016037812829,4.46839591354132,0.479309022426605,1.17969191074371,0.409362852573395,0.377922266721725,0.183468893170357,0.115460447967052,1.78964614868164 117 | "Zambia",116,4.51399993896484,4.64410550147295,4.38389437645674,0.636406779289246,1.00318729877472,0.257835894823074,0.461603492498398,0.249580144882202,0.0782135501503944,1.82670545578003 118 | "Iraq",117,4.49700021743774,4.62259140968323,4.37140902519226,1.10271048545837,0.978613197803497,0.501180469989777,0.288555532693863,0.19963726401329,0.107215754687786,1.31890726089478 119 | "Gabon",118,4.46500015258789,4.5573617656529,4.37263853952289,1.1982102394104,1.1556202173233,0.356578588485718,0.312328577041626,0.0437853783369064,0.0760467872023582,1.32291626930237 120 | "Ethiopia",119,4.46000003814697,4.54272867664695,4.377271399647,0.339233845472336,0.86466920375824,0.353409707546234,0.408842742443085,0.312650740146637,0.165455713868141,2.01574373245239 121 | "Sri Lanka",120,4.44000005722046,4.55344719231129,4.32655292212963,1.00985014438629,1.25997638702393,0.625130832195282,0.561213254928589,0.490863561630249,0.0736539661884308,0.419389247894287 122 | "Armenia",121,4.37599992752075,4.46673461228609,4.28526524275541,0.900596737861633,1.00748372077942,0.637524425983429,0.198303267359734,0.0834880918264389,0.0266744215041399,1.5214991569519 123 | "India",122,4.31500005722046,4.37152201749384,4.25847809694707,0.792221248149872,0.754372596740723,0.455427616834641,0.469987004995346,0.231538489460945,0.0922268852591515,1.5191171169281 124 | "Mauritania",123,4.29199981689453,4.37716361626983,4.20683601751924,0.648457288742065,1.2720308303833,0.285349279642105,0.0960980430245399,0.201870024204254,0.136957004666328,1.65163731575012 125 | "Congo (Brazzaville)",124,4.29099988937378,4.41005350500345,4.17194627374411,0.808964252471924,0.832044363021851,0.28995743393898,0.435025870800018,0.120852127671242,0.0796181336045265,1.72413563728333 126 | "Georgia",125,4.28599977493286,4.37493396580219,4.19706558406353,0.950612664222717,0.57061493396759,0.649546980857849,0.309410035610199,0.0540088154375553,0.251666635274887,1.50013780593872 127 | "Congo (Kinshasa)",126,4.28000020980835,4.35781083270907,4.20218958690763,0.0921023488044739,1.22902345657349,0.191407024860382,0.235961347818375,0.246455833315849,0.0602413564920425,2.22495865821838 128 | "Mali",127,4.19000005722046,4.26967071101069,4.11032940343022,0.476180493831635,1.28147339820862,0.169365674257278,0.306613743305206,0.183354198932648,0.104970246553421,1.66819095611572 129 | "Ivory Coast",128,4.17999982833862,4.27518256321549,4.08481709346175,0.603048920631409,0.904780030250549,0.0486421696841717,0.447706192731857,0.201237469911575,0.130061775445938,1.84496426582336 130 | "Cambodia",129,4.16800022125244,4.27851781353354,4.05748262897134,0.601765096187592,1.00623834133148,0.429783403873444,0.633375823497772,0.385922968387604,0.0681059509515762,1.04294109344482 131 | "Sudan",130,4.13899993896484,4.34574716508389,3.9322527128458,0.65951669216156,1.21400856971741,0.290920823812485,0.0149958552792668,0.182317450642586,0.089847519993782,1.68706583976746 132 | "Ghana",131,4.11999988555908,4.22270720854402,4.01729256257415,0.667224824428558,0.873664736747742,0.295637726783752,0.423026293516159,0.256923943758011,0.0253363698720932,1.57786750793457 133 | "Ukraine",132,4.09600019454956,4.18541010454297,4.00659028455615,0.89465194940567,1.39453756809235,0.575903952121735,0.122974775731564,0.270061463117599,0.0230294708162546,0.814382314682007 134 | "Uganda",133,4.08099985122681,4.19579996705055,3.96619973540306,0.381430715322495,1.12982773780823,0.217632606625557,0.443185955286026,0.325766056776047,0.057069718837738,1.526362657547 135 | "Burkina Faso",134,4.03200006484985,4.12405906438828,3.93994106531143,0.3502277135849,1.04328000545502,0.215844258666039,0.324367851018906,0.250864684581757,0.120328105986118,1.72721290588379 136 | "Niger",135,4.02799987792969,4.11194681972265,3.94405293613672,0.161925330758095,0.993025004863739,0.26850500702858,0.36365869641304,0.228673845529556,0.138572946190834,1.87398338317871 137 | "Malawi",136,3.97000002861023,4.07747881740332,3.86252123981714,0.233442038297653,0.512568831443787,0.315089583396912,0.466914653778076,0.287170469760895,0.0727116540074348,2.08178615570068 138 | "Chad",137,3.93600010871887,4.0347115239501,3.83728869348764,0.438012987375259,0.953855872154236,0.0411347150802612,0.16234202682972,0.216113850474358,0.0535818822681904,2.07123804092407 139 | "Zimbabwe",138,3.875,3.97869964271784,3.77130035728216,0.375846534967422,1.08309590816498,0.196763753890991,0.336384207010269,0.189143493771553,0.0953753814101219,1.59797024726868 140 | "Lesotho",139,3.80800008773804,4.04434397548437,3.5716561999917,0.521021246910095,1.19009518623352,0,0.390661299228668,0.157497271895409,0.119094640016556,1.42983531951904 141 | "Angola",140,3.79500007629395,3.95164193540812,3.63835821717978,0.858428180217743,1.10441195964813,0.0498686656355858,0,0.097926490008831,0.0697203353047371,1.61448240280151 142 | "Afghanistan",141,3.79399991035461,3.87366141527891,3.71433840543032,0.401477217674255,0.581543326377869,0.180746778845787,0.106179520487785,0.311870932579041,0.0611578300595284,2.15080118179321 143 | "Botswana",142,3.76600003242493,3.87412266626954,3.65787739858031,1.12209415435791,1.22155499458313,0.341755509376526,0.505196332931519,0.0993484482169151,0.0985831990838051,0.3779137134552 144 | "Benin",143,3.65700006484985,3.74578355133533,3.56821657836437,0.431085407733917,0.435299843549728,0.209930211305618,0.425962775945663,0.207948461174965,0.0609290152788162,1.88563096523285 145 | "Madagascar",144,3.64400005340576,3.71431910589337,3.57368100091815,0.305808693170547,0.913020372390747,0.375223308801651,0.189196765422821,0.208732530474663,0.0672319754958153,1.58461260795593 146 | "Haiti",145,3.6029999256134,3.73471479773521,3.47128505349159,0.368610262870789,0.640449821949005,0.277321130037308,0.0303698573261499,0.489203780889511,0.0998721495270729,1.69716763496399 147 | "Yemen",146,3.59299993515015,3.69275031983852,3.49324955046177,0.591683447360992,0.93538224697113,0.310080915689468,0.249463722109795,0.104125209152699,0.0567674227058887,1.34560060501099 148 | "South Sudan",147,3.59100008010864,3.72553858578205,3.45646157443523,0.39724862575531,0.601323127746582,0.163486003875732,0.147062435746193,0.285670816898346,0.116793513298035,1.87956738471985 149 | "Liberia",148,3.53299999237061,3.65375626087189,3.41224372386932,0.119041793048382,0.872117936611176,0.229918196797371,0.332881182432175,0.26654988527298,0.0389482490718365,1.67328596115112 150 | "Guinea",149,3.50699996948242,3.58442812889814,3.4295718100667,0.244549930095673,0.791244685649872,0.194129139184952,0.348587512969971,0.264815092086792,0.110937617719173,1.55231189727783 151 | "Togo",150,3.49499988555908,3.59403811171651,3.39596165940166,0.305444717407227,0.431882530450821,0.247105568647385,0.38042613863945,0.196896150708199,0.0956650152802467,1.83722925186157 152 | "Rwanda",151,3.47099995613098,3.54303023353219,3.39896967872977,0.368745893239975,0.945707023143768,0.326424807310104,0.581843852996826,0.252756029367447,0.455220013856888,0.540061235427856 153 | "Syria",152,3.46199989318848,3.66366855680943,3.26033122956753,0.777153134346008,0.396102607250214,0.50053334236145,0.0815394446253777,0.493663728237152,0.151347130537033,1.06157350540161 154 | "Tanzania",153,3.34899997711182,3.46142975538969,3.23657019883394,0.511135876178741,1.04198980331421,0.364509284496307,0.390017777681351,0.354256361722946,0.0660351067781448,0.621130466461182 155 | "Burundi",154,2.90499997138977,3.07469033300877,2.73530960977077,0.091622568666935,0.629793584346771,0.151610791683197,0.0599007532000542,0.204435184597969,0.0841479450464249,1.68302416801453 156 | "Central African Republic",155,2.69300007820129,2.86488426923752,2.52111588716507,0,0,0.0187726859003305,0.270842045545578,0.280876487493515,0.0565650761127472,2.06600475311279 157 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pandas_tutorial 2 | 3 | channels: 4 | - defaults 5 | 6 | dependencies: 7 | - python=3.7 8 | - ipykernel 9 | - pip 10 | - pandas 11 | - openpyxl 12 | - scipy 13 | - matplotlib 14 | - pip: 15 | - pyarrow 16 | -------------------------------------------------------------------------------- /solutions/02_read_create_data_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### ***EXERCISE 2.1***\n", 17 | "Using the pandas documentation, find a way to read the file `data/wine-reviews/exercise_2.1.txt` in the following way:\n", 18 | "- read the file as tab-separated\n", 19 | "- parse the date in the `date of rating` column, in day/month format\n", 20 | "- set `country` as the index\n", 21 | "- skip the 3rd and 4th rows" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | "
wine_iddescriptiondesignationpointsdate of rating
country
Italy0Aromas include tropical fruit, broom, brimston...Vulkà Bianco872015-12-01
US3Pineapple rind, lemon pith and orange blossom ...Reserve Late Harvest872015-08-07
US4Much like the regular bottling from 2012, this...Vintner's Reserve Wild Child Block872014-10-17
Spain5Blackberry and raspberry aromas show a typical...Ars In Vitro872015-11-26
\n", 100 | "
" 101 | ], 102 | "text/plain": [ 103 | " wine_id description \\\n", 104 | "country \n", 105 | "Italy 0 Aromas include tropical fruit, broom, brimston... \n", 106 | "US 3 Pineapple rind, lemon pith and orange blossom ... \n", 107 | "US 4 Much like the regular bottling from 2012, this... \n", 108 | "Spain 5 Blackberry and raspberry aromas show a typical... \n", 109 | "\n", 110 | " designation points date of rating \n", 111 | "country \n", 112 | "Italy Vulkà Bianco 87 2015-12-01 \n", 113 | "US Reserve Late Harvest 87 2015-08-07 \n", 114 | "US Vintner's Reserve Wild Child Block 87 2014-10-17 \n", 115 | "Spain Ars In Vitro 87 2015-11-26 " 116 | ] 117 | }, 118 | "execution_count": 2, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "pd.read_csv(\n", 125 | " '../data/wine-reviews/exercise_2.1.txt', \n", 126 | " sep='\\t', \n", 127 | " parse_dates=['date of rating'], \n", 128 | " dayfirst=True,\n", 129 | " index_col='country',\n", 130 | " skiprows=[2,3]\n", 131 | ")" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "### ***EXERCISE 2.2***\n", 139 | "Create a DataFrame like the following:\n", 140 | "\n", 141 | "| \t| r \t| g \t| b \t| hex \t|\n", 142 | "|--------\t|-----\t|-----\t|----\t|---------\t|\n", 143 | "| blue \t| 0 \t| 0 \t| 1 \t| #0000ff \t|\n", 144 | "| olive \t| 85 \t| 107 \t| 47 \t| #556B2F \t|\n", 145 | "| sienna \t| 160 \t| 82 \t| 45 \t| #A0522D \t|" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 3, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/html": [ 156 | "
\n", 157 | "\n", 170 | "\n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
rgbhex
blue001#0000ff
olive8510747#556B2F
sienna1608245#A0522D
\n", 204 | "
" 205 | ], 206 | "text/plain": [ 207 | " r g b hex\n", 208 | "blue 0 0 1 #0000ff\n", 209 | "olive 85 107 47 #556B2F\n", 210 | "sienna 160 82 45 #A0522D" 211 | ] 212 | }, 213 | "execution_count": 3, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "pd.DataFrame({\n", 220 | " 'r': {'blue': 0, 'olive': 85, 'sienna': 160},\n", 221 | " 'g': {'blue': 0, 'olive': 107, 'sienna': 82},\n", 222 | " 'b': {'blue': 1, 'olive': 47, 'sienna': 45},\n", 223 | " 'hex': {'blue': '#0000ff', 'olive': '#556B2F', 'sienna': '#A0522D'}\n", 224 | "})" 225 | ] 226 | } 227 | ], 228 | "metadata": { 229 | "kernelspec": { 230 | "display_name": "Python 3", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.7.3" 245 | } 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 4 249 | } 250 | -------------------------------------------------------------------------------- /solutions/03_exploring_data_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "\n", 11 | "df = pd.read_csv('../data/wine-reviews/winemag-data-130k-v2.csv', index_col='wine_id')" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### ***EXERCISE 3.1***\n", 19 | "Get a DataFrame with just `count` and `mean` (first 2 rows) of the numeric columns" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | "
pointsprice
count129971.000000120975.000000
mean88.44713835.363389
\n", 65 | "
" 66 | ], 67 | "text/plain": [ 68 | " points price\n", 69 | "count 129971.000000 120975.000000\n", 70 | "mean 88.447138 35.363389" 71 | ] 72 | }, 73 | "execution_count": 2, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "df.describe().head(2)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### ***EXERCISE 3.2***\n", 87 | "Show the 6 least common countries (in absolute value) and the 3 most common wine varieties (in percentage)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 3, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "Luxembourg 6\n", 99 | "Armenia 2\n", 100 | "Bosnia and Herzegovina 2\n", 101 | "China 1\n", 102 | "Egypt 1\n", 103 | "Slovakia 1\n", 104 | "Name: country, dtype: int64" 105 | ] 106 | }, 107 | "execution_count": 3, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "df['country'].value_counts().tail(6)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 4, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "text/plain": [ 124 | "Pinot Noir 13272\n", 125 | "Chardonnay 11753\n", 126 | "Cabernet Sauvignon 9472\n", 127 | "Name: variety, dtype: int64" 128 | ] 129 | }, 130 | "execution_count": 4, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "df['variety'].value_counts().head(3)" 137 | ] 138 | } 139 | ], 140 | "metadata": { 141 | "kernelspec": { 142 | "display_name": "Python 3", 143 | "language": "python", 144 | "name": "python3" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 3 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython3", 156 | "version": "3.7.3" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 4 161 | } 162 | -------------------------------------------------------------------------------- /solutions/04_subsetting_data_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "Index(['country', 'description', 'designation', 'points', 'price', 'province',\n", 12 | " 'region_1', 'region_2', 'taster_name', 'taster_twitter_handle', 'title',\n", 13 | " 'variety', 'winery'],\n", 14 | " dtype='object')" 15 | ] 16 | }, 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "output_type": "execute_result" 20 | } 21 | ], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "\n", 25 | "df = pd.read_csv('../data/wine-reviews/winemag-data-130k-v2.csv', index_col='wine_id')\n", 26 | "df.columns" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "### ***EXERCISE 4.1***\n", 34 | "Select all the geographical information of the 32nd row in the `df` " 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "country Italy\n", 46 | "province Sicily & Sardinia\n", 47 | "region_1 Sicilia\n", 48 | "region_2 NaN\n", 49 | "Name: 31, dtype: object" 50 | ] 51 | }, 52 | "execution_count": 2, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "geographical_cols = ['country', 'province', 'region_1', 'region_2']\n", 59 | "df.iloc[31].loc[geographical_cols]" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "### ***EXERCISE 4.2***\n", 67 | "Find the wines that meet the following conditions:\n", 68 | " - the taster name starts with \"J\"\n", 69 | " - the score (points) is either higher than 99 or lower than 81\n", 70 | " - it does not come from France\n", 71 | " - the province contanins a \"y\" (anywhere in the name)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/html": [ 82 | "
\n", 83 | "\n", 96 | "\n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | "
countrydescriptiondesignationpointspriceprovinceregion_1region_2taster_nametaster_twitter_handletitlevarietywinery
wine_id
19124New ZealandA tartly acidic wine, with some sweet molasses...NaN8016.0CanterburyNaNNaNJoe Czerwinski@JoeCzWillow Creek 2000 Pinot Noir (Canterbury)Pinot NoirWillow Creek
39959USSometimes Franc can have a lovely, alluringly ...Estate Bottled8019.0New YorkNorth Fork of Long IslandLong IslandJoe Czerwinski@JoeCzMacari 1997 Estate Bottled Cabernet Franc (Nor...Cabernet FrancMacari
39960USVanilla, butterscotch and some harsher oak-der...Reserve8018.0New YorkNorth Fork of Long IslandLong IslandJoe Czerwinski@JoeCzLaurel Lake 1998 Reserve Chardonnay (North For...ChardonnayLaurel Lake
73865ChileThere's not much point in making a reserve-sty...Prima Reserva8013.0Maipo ValleyNaNNaNJoe Czerwinski@JoeCzDe Martino 1999 Prima Reserva Merlot (Maipo Va...MerlotDe Martino
\n", 198 | "
" 199 | ], 200 | "text/plain": [ 201 | " country description \\\n", 202 | "wine_id \n", 203 | "19124 New Zealand A tartly acidic wine, with some sweet molasses... \n", 204 | "39959 US Sometimes Franc can have a lovely, alluringly ... \n", 205 | "39960 US Vanilla, butterscotch and some harsher oak-der... \n", 206 | "73865 Chile There's not much point in making a reserve-sty... \n", 207 | "\n", 208 | " designation points price province \\\n", 209 | "wine_id \n", 210 | "19124 NaN 80 16.0 Canterbury \n", 211 | "39959 Estate Bottled 80 19.0 New York \n", 212 | "39960 Reserve 80 18.0 New York \n", 213 | "73865 Prima Reserva 80 13.0 Maipo Valley \n", 214 | "\n", 215 | " region_1 region_2 taster_name \\\n", 216 | "wine_id \n", 217 | "19124 NaN NaN Joe Czerwinski \n", 218 | "39959 North Fork of Long Island Long Island Joe Czerwinski \n", 219 | "39960 North Fork of Long Island Long Island Joe Czerwinski \n", 220 | "73865 NaN NaN Joe Czerwinski \n", 221 | "\n", 222 | " taster_twitter_handle \\\n", 223 | "wine_id \n", 224 | "19124 @JoeCz \n", 225 | "39959 @JoeCz \n", 226 | "39960 @JoeCz \n", 227 | "73865 @JoeCz \n", 228 | "\n", 229 | " title variety \\\n", 230 | "wine_id \n", 231 | "19124 Willow Creek 2000 Pinot Noir (Canterbury) Pinot Noir \n", 232 | "39959 Macari 1997 Estate Bottled Cabernet Franc (Nor... Cabernet Franc \n", 233 | "39960 Laurel Lake 1998 Reserve Chardonnay (North For... Chardonnay \n", 234 | "73865 De Martino 1999 Prima Reserva Merlot (Maipo Va... Merlot \n", 235 | "\n", 236 | " winery \n", 237 | "wine_id \n", 238 | "19124 Willow Creek \n", 239 | "39959 Macari \n", 240 | "39960 Laurel Lake \n", 241 | "73865 De Martino " 242 | ] 243 | }, 244 | "execution_count": 3, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "df[\n", 251 | " df.taster_name.str.startswith('J')\n", 252 | " & ((df.points > 99) | (df.points < 81))\n", 253 | " & (df.country != 'France')\n", 254 | " & ((df.province.str.upper()).str.contains('Y'))\n", 255 | "]" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 3", 269 | "language": "python", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 3 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython3", 282 | "version": "3.7.3" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 4 287 | } 288 | -------------------------------------------------------------------------------- /solutions/05_aggregating_data_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "Index(['country', 'description', 'designation', 'points', 'price', 'province',\n", 12 | " 'region_1', 'region_2', 'taster_name', 'taster_twitter_handle', 'title',\n", 13 | " 'variety', 'winery'],\n", 14 | " dtype='object')" 15 | ] 16 | }, 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "output_type": "execute_result" 20 | } 21 | ], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "\n", 25 | "df = pd.read_csv('../data/wine-reviews/winemag-data-130k-v2.csv', index_col='wine_id')\n", 26 | "df.columns" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "### ***EXERCISE 5.1***\n", 34 | "Find the taster that has reviewed most wine countries\n", 35 | "\n", 36 | "**HINT**: to count the unique values in a Series you can use the method `nunique`" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "taster_name\n", 48 | "Anna Lee C. Iijima 18\n", 49 | "Susan Kostrzewa 15\n", 50 | "Jeff Jenssen 14\n", 51 | "Michael Schachner 11\n", 52 | "Joe Czerwinski 11\n", 53 | "Name: country, dtype: int64" 54 | ] 55 | }, 56 | "execution_count": 2, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "df.groupby('taster_name')['country'].nunique()\\\n", 63 | " .sort_values(ascending=False).head()\n", 64 | "\n", 65 | "# sort_values and head are just to show the top 5\n", 66 | "# to just get the name of the top one use .idxmax()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "taster_name\n", 78 | "Roger Voss 779647.0\n", 79 | "Virginie Boone 443235.0\n", 80 | "Kerin O’Keefe 414248.0\n", 81 | "Michael Schachner 377231.0\n", 82 | "Paul Gregutt 319559.0\n", 83 | "Name: price, dtype: float64" 84 | ] 85 | }, 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "df.groupby('taster_name')['price'].sum()\\\n", 93 | " .sort_values(ascending=False).head()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "### ***EXERCISE 5.2***\n", 101 | "Find the most controversial wine variety" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "variety\n", 113 | "Cabernet-Shiraz 7.071068\n", 114 | "Tempranillo-Syrah 6.363961\n", 115 | "Tinta del Toro 5.686241\n", 116 | "Trousseau Gris 5.196152\n", 117 | "Pinot Noir-Syrah 5.000000\n", 118 | "Name: points, dtype: float64" 119 | ] 120 | }, 121 | "execution_count": 4, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "df.groupby(['variety'])['points'].std()\\\n", 128 | " .sort_values(ascending=False).head()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### ***EXERCISE 5.3***\n", 136 | "Show the maximum point given by each taster for each region2" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "taster_name region_2 \n", 148 | "Anna Lee C. Iijima Finger Lakes 94\n", 149 | " Long Island 94\n", 150 | " New York Other 91\n", 151 | "Christina Pickard Napa-Sonoma 89\n", 152 | " Sonoma 89\n", 153 | "Jim Gordon California Other 94\n", 154 | " Central Coast 94\n", 155 | " Central Valley 94\n", 156 | " Napa 95\n", 157 | " Napa-Sonoma 94\n", 158 | " North Coast 97\n", 159 | " Sierra Foothills 96\n", 160 | " Sonoma 95\n", 161 | "Joe Czerwinski California Other 91\n", 162 | " Central Coast 89\n", 163 | " Finger Lakes 90\n", 164 | " Long Island 88\n", 165 | " Napa 88\n", 166 | " Napa-Sonoma 87\n", 167 | " New York Other 86\n", 168 | " Oregon Other 90\n", 169 | " Sonoma 91\n", 170 | " Southern Oregon 88\n", 171 | " Willamette Valley 92\n", 172 | "Lauren Buzzeo Finger Lakes 86\n", 173 | " Long Island 86\n", 174 | "Matt Kettmann California Other 95\n", 175 | " Central Coast 97\n", 176 | " Central Valley 89\n", 177 | " Napa 93\n", 178 | " ... \n", 179 | "Paul Gregutt Central Valley 82\n", 180 | " Columbia Valley 100\n", 181 | " Napa 90\n", 182 | " Napa-Sonoma 90\n", 183 | " North Coast 88\n", 184 | " Oregon Other 99\n", 185 | " Sierra Foothills 90\n", 186 | " Sonoma 92\n", 187 | " Southern Oregon 94\n", 188 | " Washington Other 97\n", 189 | " Willamette Valley 96\n", 190 | "Roger Voss Central Coast 88\n", 191 | " Napa-Sonoma 91\n", 192 | "Sean P. Sullivan Columbia Valley 97\n", 193 | " Oregon Other 96\n", 194 | " Southern Oregon 92\n", 195 | " Washington Other 95\n", 196 | " Willamette Valley 92\n", 197 | "Susan Kostrzewa Finger Lakes 89\n", 198 | " Long Island 88\n", 199 | " New York Other 86\n", 200 | "Virginie Boone California Other 94\n", 201 | " Central Coast 94\n", 202 | " Central Valley 94\n", 203 | " Napa 99\n", 204 | " Napa-Sonoma 96\n", 205 | " North Coast 96\n", 206 | " Sierra Foothills 97\n", 207 | " Sonoma 98\n", 208 | " South Coast 90\n", 209 | "Name: points, Length: 72, dtype: int64" 210 | ] 211 | }, 212 | "execution_count": 5, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "df.groupby(['taster_name', 'region_2'])['points'].max()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "### ***EXERCISE 5.4***\n", 226 | "Create the equivalent of the following using only `pivot_table` and `stack/unstack`:\n", 227 | "```python\n", 228 | "df.groupby(['country', 'variety'])['points'].mean()\n", 229 | "```\n", 230 | "\n", 231 | "**HINT** To get rid of `NaN` values, add `.dropna()` at the end" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 6, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "data": { 241 | "text/plain": [ 242 | "True" 243 | ] 244 | }, 245 | "execution_count": 6, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "a = df.groupby(['country', 'variety'])['points'].mean()\n", 252 | "b = df.pivot_table(index='variety', columns='country', values='points', aggfunc='mean').unstack().dropna()\n", 253 | "a.equals(b)" 254 | ] 255 | } 256 | ], 257 | "metadata": { 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | }, 263 | "language_info": { 264 | "codemirror_mode": { 265 | "name": "ipython", 266 | "version": 3 267 | }, 268 | "file_extension": ".py", 269 | "mimetype": "text/x-python", 270 | "name": "python", 271 | "nbconvert_exporter": "python", 272 | "pygments_lexer": "ipython3", 273 | "version": "3.7.3" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 4 278 | } 279 | -------------------------------------------------------------------------------- /solutions/07_data_manipulation_solutions-Copy1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### ***EXERCISE 7.1***\n", 17 | "Take the Dataframe provided below and add a column called 'hello' containing a list of 3 greetings of your choice.\n", 18 | "Then rename it to 'greetings'." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | "
NameCatsDogsgreetings
0Bob21hi
1Mary02yo
2John34how do you do?
\n", 77 | "
" 78 | ], 79 | "text/plain": [ 80 | " Name Cats Dogs greetings\n", 81 | "0 Bob 2 1 hi\n", 82 | "1 Mary 0 2 yo\n", 83 | "2 John 3 4 how do you do?" 84 | ] 85 | }, 86 | "execution_count": 2, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "df = pd.DataFrame({'Name': ['Bob', 'Mary', 'John'], 'Cats':[2,0,3], 'Dogs':[1,2,4]})\n", 93 | "\n", 94 | "df['hello'] = ['hi', 'yo', 'how do you do?']\n", 95 | "df.rename(columns={'hello': 'greetings'}, inplace=True)\n", 96 | "\n", 97 | "df" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "### ***EXERCISE 7.2***\n", 105 | "Parse the dates and remove the duplicates timestamps of the data below" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 3, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/html": [ 116 | "
\n", 117 | "\n", 130 | "\n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | "
012
2016-10-030.4701410.5846661.020962
2016-10-04-0.0923260.3407240.780739
2016-10-05-0.1288322.482028-2.250360
\n", 160 | "
" 161 | ], 162 | "text/plain": [ 163 | " 0 1 2\n", 164 | "2016-10-03 0.470141 0.584666 1.020962\n", 165 | "2016-10-04 -0.092326 0.340724 0.780739\n", 166 | "2016-10-05 -0.128832 2.482028 -2.250360" 167 | ] 168 | }, 169 | "execution_count": 3, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "df = pd.DataFrame({i: pd.np.random.randn(6) for i in range(3)}, index=[f'D0{i}M10Y2016' for i in (3,4,3,4,5,3)])\n", 176 | "df.index = pd.to_datetime(df.index, format='D%dM%mY%Y')\n", 177 | "df=df[~df.index.duplicated()]\n", 178 | "df" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### ***EXERCISE 7.3***\n", 186 | "create and apply a function over the rows that returns the squared number of cats if the number of dogs is even, 0 otherwise" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 4, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "0 0\n", 198 | "1 0\n", 199 | "2 9\n", 200 | "dtype: int64" 201 | ] 202 | }, 203 | "execution_count": 4, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "df = pd.DataFrame({'Name': ['Bob', 'Mary', 'John'], 'Cats':[2,0,3], 'Dogs':[1,2,4]})\n", 210 | "def squared_or_zero_cats(row):\n", 211 | " return row['Cats']**2 if row['Dogs']%2==0 else 0\n", 212 | "\n", 213 | "df.apply(squared_or_zero_cats, axis=1)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [] 222 | } 223 | ], 224 | "metadata": { 225 | "kernelspec": { 226 | "display_name": "DataScience", 227 | "language": "python", 228 | "name": "ds_env" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.7.4" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 4 245 | } 246 | -------------------------------------------------------------------------------- /solutions/08_missing_data_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "### ***EXERCISE 8.1***\n", 18 | "Prove that main pandas operation ignore missing data by creating a copy of the following `s1` Series provided, where all NaN have been removed. Name the amended copy `s2`.\n", 19 | "Compare the `.mean()` results of the two Series.\n", 20 | "\n", 21 | "***HINT***: the same methods used for the dataframes above work for Series" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "0 1.0\n", 34 | "1 2.0\n", 35 | "2 NaN\n", 36 | "3 4.0\n", 37 | "dtype: float64 \n", 38 | "sums up to: 2.3333333333333335 \n", 39 | "\n", 40 | " 0 1.0\n", 41 | "1 2.0\n", 42 | "3 4.0\n", 43 | "dtype: float64 \n", 44 | "sums up to: 2.3333333333333335\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "s1 = pd.Series([1,2,np.nan,4])\n", 50 | "s2 = s1.dropna()\n", 51 | "\n", 52 | "print(s1, '\\nsums up to:', s1.mean(), '\\n\\n', s2, '\\nsums up to:', s2.mean())" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### ***EXERCISE 8.2***\n", 60 | "Get the sum total of all the values in the `df` provided, only including rows where either 'quality1' or quality2' is not missing." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "30" 72 | ] 73 | }, 74 | "execution_count": 3, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "df = pd.DataFrame({\n", 81 | " 'quality1': [100,92,30,np.nan,np.nan,15], \n", 82 | " 'value':[7,4,8,1,9,2],\n", 83 | " 'quality2': [89,88,np.nan,np.nan,1,100], \n", 84 | "})\n", 85 | "\n", 86 | "df.dropna(how='all', subset=['quality1', 'quality2'])['value'].sum()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "DataScience", 100 | "language": "python", 101 | "name": "ds_env" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.7.4" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 4 118 | } 119 | -------------------------------------------------------------------------------- /solutions/09_extra_tips_solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### ***EXERCISE 9.1***\n", 17 | "Using the `df` provided below, get the mean score of people whose name stats with 'J'" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": [ 28 | "5.4" 29 | ] 30 | }, 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": [ 37 | "df = pd.DataFrame({\n", 38 | " 'name': ['John', 'Albert', 'Jack', 'Josef', 'Bob', 'Juliette', 'Mary', 'Jane'], \n", 39 | " 'score': [5,8,6,4,8,7,3,5]\n", 40 | "})\n", 41 | "\n", 42 | "df.loc[df['name'].str.contains('J'), 'score'].mean()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "DataScience", 56 | "language": "python", 57 | "name": "ds_env" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.7.4" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 4 74 | } 75 | --------------------------------------------------------------------------------