├── .DS_Store ├── .gitignore ├── 00_sources └── poster1.png ├── 00_Books and Guides └── MS Excell for Data Analysis.pdf ├── 02_pandas_tips&tricks ├── pandas_tips_and_tricks │ ├── kashti.xlsx │ ├── tips_save.xlsx │ ├── excel ka data.csv │ └── tips_save.csv ├── test.py ├── Excercises │ ├── 07_grouping.ipynb │ ├── 08_grouping.ipynb │ ├── 04_filtering_and_sorting.ipynb │ ├── 11_apply.ipynb │ ├── 03_Know_your_Data.ipynb │ ├── 10_apply.ipynb │ ├── 09_grouping.ipynb │ ├── 05_filtering_and_sorting.ipynb │ ├── 02_Know_your_Data.ipynb │ ├── 01_Know_your_Data.ipynb │ └── 06_filtering_and_sorting.ipynb ├── Solutions │ ├── Food_Ananlysis_Report.md │ ├── 07_grouping.ipynb │ ├── 03_Know_your_Data.ipynb │ ├── 08_grouping.ipynb │ ├── 09_grouping.ipynb │ └── 01_Know_your_Data.ipynb └── tips.csv ├── 01_Introduction └── introduction.ipynb ├── README.md └── LICENSE /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | intro.md 2 | Contents to be covered.docx 3 | /02_pandas_tips&tricks/Excercises/exercises_templates/* -------------------------------------------------------------------------------- /00_sources/poster1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/00_sources/poster1.png -------------------------------------------------------------------------------- /00_Books and Guides/MS Excell for Data Analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/00_Books and Guides/MS Excell for Data Analysis.pdf -------------------------------------------------------------------------------- /02_pandas_tips&tricks/pandas_tips_and_tricks/kashti.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/02_pandas_tips&tricks/pandas_tips_and_tricks/kashti.xlsx -------------------------------------------------------------------------------- /02_pandas_tips&tricks/pandas_tips_and_tricks/tips_save.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/02_pandas_tips&tricks/pandas_tips_and_tricks/tips_save.xlsx -------------------------------------------------------------------------------- /02_pandas_tips&tricks/pandas_tips_and_tricks/excel ka data.csv: -------------------------------------------------------------------------------- 1 | ,"pd.DataFrame([[1,","'12345',","'factory'],","[2,","'34567',","'warehouse']]," 2 | 0,"columns=['user_id',","'zip',",'location_type']),,, 3 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | tips = sns.load_dataset('tips') 7 | sns.lineplot(x='day', y='total_bill', data=tips) -------------------------------------------------------------------------------- /01_Introduction/introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "language_info": { 13 | "name": "python" 14 | }, 15 | "orig_nbformat": 4 16 | }, 17 | "nbformat": 4, 18 | "nbformat_minor": 2 19 | } 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pythonkachilla_version2 2 | This repository contains materials and files for Python ka chilla version 2.0 (version means course ka version not of python). 3 | This is an advance course led by Dr Aammar Tufail (Python for Data Science in Urdu language) 4 | ## youtube channel: [Codanics](https://www.youtube.com/c/Codanics) 5 | 6 | ### Playlist for this course [Click here](https://youtube.com/playlist?list=PL9XvIvvVL50EyRNp6fnYwMve1CJqJCHj8) 7 | 8 | ![Poster](00_sources/poster1.png) -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/07_grouping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex - GroupBy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "GroupBy can be summarized as Split-Apply-Combine.\n", 17 | "\n", 18 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 19 | "\n", 20 | "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png) \n", 21 | "### Step 1. Import the necessary libraries" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Step 3. Assign it to a variable called drinks." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "### Step 4. Which continent drinks more beer on average?" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Step 5. For each continent print the statistics for wine consumption." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "### Step 6. Print the mean alcohol consumption per continent for every column" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Step 7. Print the median alcohol consumption per continent for every column" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "### Step 8. Print the mean, min and max values for spirit consumption.\n", 113 | "#### This time output a DataFrame" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "Python 3.9.7 ('base')", 127 | "language": "python", 128 | "name": "python3" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 2 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython2", 140 | "version": "3.9.7" 141 | }, 142 | "vscode": { 143 | "interpreter": { 144 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 145 | } 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 0 150 | } 151 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/08_grouping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Occupation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Step 3. Assign it to a variable called users." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Step 4. Discover what is the mean age per occupation" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Step 6. For each occupation, calculate the minimum and maximum ages" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Step 7. For each combination of occupation and gender, calculate the mean age" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Step 8. For each occupation present the percentage of women and men" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3.9.7 ('base')", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 2 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython2", 150 | "version": "3.9.7" 151 | }, 152 | "vscode": { 153 | "interpreter": { 154 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 155 | } 156 | } 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 0 160 | } 161 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/04_filtering_and_sorting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex1 - Filtering and Sorting Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Step 1. Import the necessary libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Step 3. Assign it to a variable called chipo." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Step 4. How many products cost more than $10.00?" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [], 63 | "source": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### Step 5. What is the price of each item? \n", 70 | "###### print a data frame with only two columns item_name and item_price" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### Step 6. Sort by the name of the item" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### Step 7. What was the quantity of the most expensive item ordered?" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Step 8. How many times was a Veggie Salad Bowl ordered?" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "### Step 9. How many times did someone order more than one Canned Soda?" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 3.9.7 ('base')", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 2 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython2", 163 | "version": "3.9.7" 164 | }, 165 | "vscode": { 166 | "interpreter": { 167 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 168 | } 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 0 173 | } 174 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/11_apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# United States - Crime Rates - 1960 - 2014" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will create a data \n", 17 | "\n", 18 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 19 | "\n", 20 | "### Step 1. Import the necessary libraries" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": false 28 | }, 29 | "outputs": [], 30 | "source": [] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### Step 3. Assign it to a variable called crime." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### Step 4. What is the type of the columns?" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "##### Have you noticed that the type of Year is int64. But pandas has a different type to work with Time Series. Let's see it now.\n", 76 | "\n", 77 | "### Step 5. Convert the type of the column Year to datetime64" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "### Step 6. Set the Year column as the index of the dataframe" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "### Step 7. Delete the Total column" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### Step 8. Group the year by decades and sum the values\n", 126 | "\n", 127 | "#### Pay attention to the Population column number, summing this column is a mistake" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false, 135 | "scrolled": true 136 | }, 137 | "outputs": [], 138 | "source": [] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "### Step 9. What is the most dangerous decade to live in the US?" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [] 155 | } 156 | ], 157 | "metadata": { 158 | "anaconda-cloud": {}, 159 | "kernelspec": { 160 | "display_name": "Python [default]", 161 | "language": "python", 162 | "name": "python2" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 2 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython2", 174 | "version": "2.7.12" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 0 179 | } 180 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/03_Know_your_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex3 - Getting and Knowing your Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Step 2. Download the dataset to your computer and unzip it." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### Step 3. Use the tsv file and assign it to a dataframe called food" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Step 4. See the first 5 entries" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### Step 5. What is the number of observations in the dataset?" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "### Step 6. What is the number of columns in the dataset?" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "### Step 7. Print the name of all the columns." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "### Step 8. What is the name of 105th column?" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "### Step 9. What is the type of the observations of the 105th column?" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "### Step 10. How is the dataset indexed?" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "### Step 11. What is the product name of the 19th observation?" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "outputs": [], 166 | "source": [] 167 | } 168 | ], 169 | "metadata": { 170 | "anaconda-cloud": {}, 171 | "kernelspec": { 172 | "display_name": "Python 3.9.7 ('base')", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 2 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython2", 186 | "version": "3.9.7" 187 | }, 188 | "vscode": { 189 | "interpreter": { 190 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 191 | } 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 0 196 | } 197 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/10_apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Student Alcohol Consumption" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will download a dataset from the UCI.\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Step 3. Assign it to a variable called df." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Step 5. Create a lambda function that will capitalize strings." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Step 6. Capitalize both Mjob and Fjob" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Step 7. Print the last elements of the data set." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [], 147 | "source": [] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [], 156 | "source": [] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### Step 10. Multiply every number of the dataset by 10. \n", 163 | "##### I know this makes no sense, don't forget it is just an exercise" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [], 173 | "source": [] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": false 180 | }, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "anaconda-cloud": {}, 187 | "kernelspec": { 188 | "display_name": "Python 3.9.7 ('base')", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 2 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython2", 202 | "version": "3.9.7" 203 | }, 204 | "vscode": { 205 | "interpreter": { 206 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 207 | } 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 0 212 | } 213 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/09_grouping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Regiment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. Create the DataFrame with the following values:" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 51, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n", 46 | " 'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n", 47 | " 'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n", 48 | " 'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n", 49 | " 'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "### Step 3. Assign it to a variable called regiment.\n", 57 | "#### Don't forget to name each column" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Step 5. Present general statistics by company" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Step 6. What is the mean of each company's preTestScore?" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Step 7. Present the mean preTestScores grouped by regiment and company" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [], 147 | "source": [] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "### Step 9. Group the entire dataframe by regiment and company" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "### Step 10. What is the number of observations in each regiment and company" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": false 177 | }, 178 | "outputs": [], 179 | "source": [] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Step 11. Iterate over a group and print the name and the whole data from the regiment" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3.9.7 ('base')", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 2 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython2", 214 | "version": "3.9.7" 215 | }, 216 | "vscode": { 217 | "interpreter": { 218 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 219 | } 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 0 224 | } 225 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/05_filtering_and_sorting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex2 - Filtering and Sorting Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This time we are going to pull data directly from the internet.\n", 15 | "\n", 16 | "### Step 1. Import the necessary libraries" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). " 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "### Step 3. Assign it to a variable called euro12." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### Step 4. Select only the Goal column." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Step 5. How many team participated in the Euro2012?" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [], 81 | "source": [] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Step 6. What is the number of columns in the dataset?" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Step 7. View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### Step 8. Sort the teams by Red Cards, then to Yellow Cards" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false, 127 | "scrolled": true 128 | }, 129 | "outputs": [], 130 | "source": [] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### Step 9. Calculate the mean Yellow Cards given per Team" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "collapsed": false 144 | }, 145 | "outputs": [], 146 | "source": [] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "### Step 10. Filter teams that scored more than 6 goals" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "### Step 11. Select the teams that start with G" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [], 178 | "source": [] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "### Step 12. Select the first 7 columns" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [], 194 | "source": [] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "### Step 13. Select all columns except the last 3." 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "### Step 14. Present only the Shooting Accuracy from England, Italy and Russia" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": false 224 | }, 225 | "outputs": [], 226 | "source": [] 227 | } 228 | ], 229 | "metadata": { 230 | "anaconda-cloud": {}, 231 | "kernelspec": { 232 | "display_name": "Python 3.9.7 ('base')", 233 | "language": "python", 234 | "name": "python3" 235 | }, 236 | "language_info": { 237 | "codemirror_mode": { 238 | "name": "ipython", 239 | "version": 2 240 | }, 241 | "file_extension": ".py", 242 | "mimetype": "text/x-python", 243 | "name": "python", 244 | "nbconvert_exporter": "python", 245 | "pygments_lexer": "ipython2", 246 | "version": "3.9.7" 247 | }, 248 | "vscode": { 249 | "interpreter": { 250 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 251 | } 252 | } 253 | }, 254 | "nbformat": 4, 255 | "nbformat_minor": 0 256 | } 257 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/02_Know_your_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex2 - Getting and Knowing your Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Step 1. Import the necessary libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Step 3. Assign it to a variable called users and use the 'user_id' as index" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Step 4. See the first 25 entries" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false, 61 | "scrolled": true 62 | }, 63 | "outputs": [], 64 | "source": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Step 5. See the last 10 entries" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false, 78 | "scrolled": true 79 | }, 80 | "outputs": [], 81 | "source": [] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Step 6. What is the number of observations in the dataset?" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Step 7. What is the number of columns in the dataset?" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### Step 8. Print the name of all the columns." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Step 9. How is the dataset indexed?" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [], 145 | "source": [] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "### Step 10. What is the data type of each column?" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "### Step 11. Print only the occupation column" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "### Step 12. How many different occupations are in this dataset?" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "### Step 13. What is the most frequent occupation?" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [], 209 | "source": [] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "### Step 14. Summarize the DataFrame." 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": false 223 | }, 224 | "outputs": [], 225 | "source": [] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "### Step 15. Summarize all the columns" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [], 241 | "source": [] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "### Step 16. Summarize only the occupation column" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "### Step 17. What is the mean age of users?" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": { 270 | "collapsed": false 271 | }, 272 | "outputs": [], 273 | "source": [] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### Step 18. What is the age with least occurrence?" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [], 289 | "source": [] 290 | } 291 | ], 292 | "metadata": { 293 | "anaconda-cloud": {}, 294 | "kernelspec": { 295 | "display_name": "Python 3.9.7 ('base')", 296 | "language": "python", 297 | "name": "python3" 298 | }, 299 | "language_info": { 300 | "codemirror_mode": { 301 | "name": "ipython", 302 | "version": 2 303 | }, 304 | "file_extension": ".py", 305 | "mimetype": "text/x-python", 306 | "name": "python", 307 | "nbconvert_exporter": "python", 308 | "pygments_lexer": "ipython2", 309 | "version": "3.9.7" 310 | }, 311 | "vscode": { 312 | "interpreter": { 313 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 314 | } 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 0 319 | } 320 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/01_Know_your_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex1 - Know your Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Step 1. Import the necessary libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Step 3. Assign it to a variable called chipo." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Step 4. See the first 10 entries" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false, 61 | "scrolled": false 62 | }, 63 | "outputs": [], 64 | "source": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Step 5. What is the number of observations in the dataset?" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 1, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "# Solution 1\n", 82 | "\n" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 2, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "# Solution 2\n", 94 | "\n" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### Step 6. What is the number of columns in the dataset?" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### Step 7. Print the name of all the columns." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### Step 8. How is the dataset indexed?" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [], 143 | "source": [] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### Step 9. Which was the most-ordered item? " 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "### Step 10. For the most-ordered item, how many items were ordered?" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [], 175 | "source": [] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "### Step 11. What was the most ordered item in the choice_description column?" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [], 191 | "source": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### Step 12. How many items were orderd in total?" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "### Step 13. Turn the item price into a float" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "#### Step 13.a. Check the item price type" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [], 230 | "source": [] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "#### Step 13.b. Create a lambda function and change the type of item price" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": true 244 | }, 245 | "outputs": [], 246 | "source": [] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "#### Step 13.c. Check the item price type" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": { 259 | "collapsed": false 260 | }, 261 | "outputs": [], 262 | "source": [] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "### Step 14. How much was the revenue for the period in the dataset?" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": false 276 | }, 277 | "outputs": [], 278 | "source": [] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "### Step 15. How many orders were made in the period?" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": { 291 | "collapsed": false 292 | }, 293 | "outputs": [], 294 | "source": [] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "### Step 16. What is the average revenue amount per order?" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 3, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "# Solution 1\n", 312 | "\n" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 4, 318 | "metadata": { 319 | "collapsed": false 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "# Solution 2\n", 324 | "\n" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### Step 17. How many different items are sold?" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [], 341 | "source": [] 342 | } 343 | ], 344 | "metadata": { 345 | "anaconda-cloud": {}, 346 | "kernelspec": { 347 | "display_name": "Python 3.9.7 ('base')", 348 | "language": "python", 349 | "name": "python3" 350 | }, 351 | "language_info": { 352 | "codemirror_mode": { 353 | "name": "ipython", 354 | "version": 2 355 | }, 356 | "file_extension": ".py", 357 | "mimetype": "text/x-python", 358 | "name": "python", 359 | "nbconvert_exporter": "python", 360 | "pygments_lexer": "ipython2", 361 | "version": "3.9.7" 362 | }, 363 | "vscode": { 364 | "interpreter": { 365 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 366 | } 367 | } 368 | }, 369 | "nbformat": 4, 370 | "nbformat_minor": 0 371 | } 372 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Excercises/06_filtering_and_sorting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex3 - Filtering and Sorting Data - Fictional Army" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This exercise was inspired by this [page](http://chrisalbon.com/python/)\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. This is the data given as a dictionary" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 4, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "# Create an example dataframe about a fictional army\n", 46 | "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'],\n", 47 | " 'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'],\n", 48 | " 'deaths': [523, 52, 25, 616, 43, 234, 523, 62, 62, 73, 37, 35],\n", 49 | " 'battles': [5, 42, 2, 2, 4, 7, 8, 3, 4, 7, 8, 9],\n", 50 | " 'size': [1045, 957, 1099, 1400, 1592, 1006, 987, 849, 973, 1005, 1099, 1523],\n", 51 | " 'veterans': [1, 5, 62, 26, 73, 37, 949, 48, 48, 435, 63, 345],\n", 52 | " 'readiness': [1, 2, 3, 3, 2, 1, 2, 3, 2, 1, 2, 3],\n", 53 | " 'armored': [1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1],\n", 54 | " 'deserters': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n", 55 | " 'origin': ['Arizona', 'California', 'Texas', 'Florida', 'Maine', 'Iowa', 'Alaska', 'Washington', 'Oregon', 'Wyoming', 'Louisana', 'Georgia']}" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### Step 3. Create a dataframe and assign it to a variable called army. \n", 63 | "\n", 64 | "#### Don't forget to include the columns names in the order presented in the dictionary ('regiment', 'company', 'deaths'...) so that the column index order is consistent with the solutions. If omitted, pandas will order the columns alphabetically." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "### Step 4. Set the 'origin' colum as the index of the dataframe" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "### Step 5. Print only the column veterans" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### Step 6. Print the columns 'veterans' and 'deaths'" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "### Step 7. Print the name of all the columns." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "### Step 8. Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### Step 9. Select the rows 3 to 7 and the columns 3 to 6" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### Step 10. Select every row after the fourth row and all columns" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "### Step 11. Select every row up to the 4th row and all columns" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "### Step 12. Select the 3rd column up to the 7th column" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Step 13. Select rows where df.deaths is greater than 50" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "### Step 14. Select rows where df.deaths is greater than 500 or less than 50" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "### Step 15. Select all the regiments not named \"Dragoons\"" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### Step 16. Select the rows called Texas and Arizona" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "### Step 17. Select the third cell in the row named Arizona" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "### Step 18. Select the third cell down in the column named deaths" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [] 283 | } 284 | ], 285 | "metadata": { 286 | "kernelspec": { 287 | "display_name": "Python 3.9.7 ('base')", 288 | "language": "python", 289 | "name": "python3" 290 | }, 291 | "language_info": { 292 | "codemirror_mode": { 293 | "name": "ipython", 294 | "version": 3 295 | }, 296 | "file_extension": ".py", 297 | "mimetype": "text/x-python", 298 | "name": "python", 299 | "nbconvert_exporter": "python", 300 | "pygments_lexer": "ipython3", 301 | "version": "3.9.7" 302 | }, 303 | "vscode": { 304 | "interpreter": { 305 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 306 | } 307 | } 308 | }, 309 | "nbformat": 4, 310 | "nbformat_minor": 1 311 | } 312 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Solutions/Food_Ananlysis_Report.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | | Submitted By | Kashif Raza | 4 | |--------|--------------| 5 | 6 |
7 | 8 | # Exploratory Data Analysis on Open Food Facts Data 9 | ## About the Dataset 10 | - Open Food Facts is a non-profit association of volunteers. 11 | 5000+ contributors like you have added 600 000+ products from 150 countries using our Android, iPhone or Windows Phone app or their camera to scan barcodes and upload pictures of products and their labels. 12 | ## Important steps performed in the analysis 13 | ## 1. Data Shape 14 | Shape of the original datset is: 15 | ``` 16 | food.shape 17 | ``` 18 | (356027,163) 19 | - Total number of observations/rows: $356027$ 20 | - Total number of columns: $163$ 21 | 22 | ## 2. Data Structure 23 | The data is structured in the following way: 24 | ``` 25 | food.info() 26 | ``` 27 | - Data has the range index of : 356027 entries, 0 to 356026 28 | - The columns are:163 starts from `code` to `water-hardness_100g` 29 | - Total memory usage is: 442.8+ MB 30 | - From $163$ columns: $107$ columns has the data type of `float64` and the rest $56$ has the data type of `object` 31 | 32 | ## 3. Finding Missing Data 33 | To find the missing data, we can use the following code: 34 | ``` 35 | nan_per=food.isnull().sum().sort_values(ascending=False)/food.shape[0]*100 36 | ``` 37 | - Plotting the columns with the highest percentage of missing data: 38 | ``` 39 | plt.figure(dpi=600) 40 | nan_per.plot(kind='bar', title='Percentage of missing values per feature', figsize=(15,5), 41 | color='red', fontsize=5) 42 | ``` 43 | 44 | ![](Pictures/food_eda/nan1.png) 45 | 46 | - Plotting the Nan percentage with features columns: 47 | ``` 48 | plt.figure(figsize=(10,5)) 49 | plt.figure(dpi=600) 50 | sns.distplot(nan_per, bins=100, kde=False) 51 | plt.xlabel("NaN percentage") 52 | plt.ylabel("Number of columns") 53 | plt.title("Percentage of nans per feature column") 54 | ``` 55 | ![](Pictures/food_eda/nan2.png) 56 | 57 | ### **Key Takeaways** 58 | 1. A large percentage of nana data is present in the columns with the highest percentage of missing data. So not all the information seems to be useful. 59 | 2. Most of the featured columns contains 100% nan values 60 | 3. There is a group of columns that have the 20% of nan values. 61 | 4. Nan values feature is not useful for the analysis. 62 | 5. Columns having Nan values can be dropped. 63 | 64 | ### **Useless features columns** 65 | Useless features columns are: 66 | ``` 67 | useless_features=nan_per[nan_per==100].index 68 | print('Useless features:', useless_features) 69 | ``` 70 | ![](Pictures/food_eda/useless_features.png) 71 | 72 | ### **Length of Useless features** 73 | Length of the useless features: 74 | ``` 75 | print('Length of useless features:', len(useless_features)) 76 | ``` 77 | - Length of useless features: 16 78 | 79 | ### **Drop the useless features** 80 | Drop the useless features: 81 | ``` 82 | food.drop(useless_features, axis=1, inplace=True) 83 | print('Shape of the dataset after dropping the useless features:', food.shape) 84 | ``` 85 | Shape of the food dataset is: (356027, 147) 86 | 87 | ### **Features with zero nan values** 88 | Features with zero nan values are: 89 | ``` 90 | zero_nan_features=nan_per[nan_per==0].index 91 | print('Features with zero nan values:', zero_nan_features) 92 | ``` 93 | Zero NaN features: Index(['last_modified_datetime', 'last_modified_t'], dtype='object') 94 | 95 | ### **Splitting the data into NaN groups** 96 | Splitting the data into NaN groups: 97 | 1. Columns with low Nan values: (0-20%) 98 | 2. Columns with medium NaN values: (20-50%) 99 | 3. Columns with High NaN values: (50-100%) 100 | ``` 101 | Low NaN features columns: 102 | sodium_100g 18.631 103 | salt_100g 18.619 104 | proteins_100g 17.377 105 | energy_100g 17.038 106 | brands_tags 8.165 107 | brands 8.159 108 | product_name 4.919 109 | countries 0.077 110 | countries_en 0.077 111 | countries_tags 0.077 112 | states_en 0.015 113 | states_tags 0.015 114 | states 0.015 115 | url 0.007 116 | code 0.007 117 | created_datetime 0.003 118 | created_t 0.001 119 | creator 0.001 120 | last_modified_datetime 0.000 121 | last_modified_t 0.000 122 | dtype: float64 123 | ``` 124 | Medium NaN features columns: 125 | ``` 126 | Medium NaN features: 127 | serving_size 39.156 128 | fiber_100g 38.015 129 | nutrition_grade_fr 28.417 130 | nutrition-score-fr_100g 28.417 131 | nutrition-score-uk_100g 28.417 132 | saturated-fat_100g 25.898 133 | sugars_100g 21.583 134 | carbohydrates_100g 21.573 135 | fat_100g 21.496 136 | additives 20.280 137 | additives_n 20.268 138 | ingredients_from_palm_oil_n 20.268 139 | ingredients_that_may_be_from_palm_oil_n 20.268 140 | ingredients_text 20.261 141 | dtype: float64 142 | ``` 143 | High NaN features columns has the 129 length. 144 | 145 | ### **Columns with fewest NaN values plot** 146 | ``` 147 | plt.figure(figsize=(20,5)) 148 | lows = sns.barplot(x=low_nan_features.index.values, y=low_nan_features.values, palette="rocket") 149 | lows.set_xticklabels(low_nan_features.index.values,rotation=45) 150 | plt.title("Features with fewest nan-values") 151 | plt.ylabel("% of nans ") 152 | ``` 153 | 154 | ![](Pictures/food_eda/low_nan.png) 155 | 156 | - Plot shows that there are many features that occurs multiple times like 157 | - countries 158 | - coutries_tags 159 | - countries_en 160 | - additives 161 | - additives_n 162 | ### **Columns which have NaN values percentage between 20-50%** 163 | 164 | ``` 165 | plt.figure(figsize=(20,5)) 166 | lows = sns.barplot(x=med_nan_features.index.values, y=med_nan_features.values, palette="Spectral") 167 | lows.set_xticklabels(med_nan_features.index.values,rotation=45) 168 | plt.title("Features with medium percentage of nan-values") 169 | plt.ylabel("% of nans ") 170 | ``` 171 | ![](Pictures/food_eda/med_nan.png) 172 | 173 | ### **Columns which have NaN values percentage >50%** 174 | ``` 175 | plt.figure(figsize=(15,30)) 176 | high = sns.barplot(y=high_nan_features.index.values, x=high_nan_features.values, palette="Blues") 177 | plt.title("Features with most nan-values") 178 | plt.ylabel("% of nans ") 179 | ``` 180 | ![](Pictures/food_eda/high_nan.png) 181 | 182 | ### Dropping the columns with high NaN values 183 | - Dropping the columns that have high NaN values: 184 | ``` 185 | for i in high_nan_features.index: 186 | if i in food.columns: 187 | food.drop(i, axis=1, inplace=True) 188 | print('Shape of the dataset after dropping the high NaN features:', food.shape) 189 | ``` 190 | (356027, 34) 191 | - Now we have 34 features. 192 | 193 | ### **Drop the NaN values from the data** 194 | - Drop the nan values from the data and then print the shape of data 195 | ``` 196 | food.dropna(inplace=True) 197 | print('Shape of the food dataset after dropping NaN values is:', food.shape) 198 | ``` 199 | Shape of the food dataset after dropping NaN values is: (157157, 34) 200 | ## Data Structure of cleaned data 201 | - New cleaned data has the total entries of 157157. 202 | - Total number of features are: 34 203 | - 14 features has the data type of float64. 204 | - 20 features has the data type of object. 205 | - Total memory usage of the cleaned data is: 42.0 MB 206 | 207 | ## Step 4: Type casting/Conversion the data type of data 208 | - To convert the data type of specific column 209 | ``` 210 | food['serving_size'] = food['serving_size'].astype(str) 211 | food['product_name'] = food['product_name'].astype(str) 212 | ``` 213 | - Now we have the data type of serving_size as string and product_name as string also. 214 | 215 | ## Step5: Summary Statistics of the data 216 | - Summary Statistics of the data: 217 | ``` 218 | food.describe() 219 | ``` 220 | ![](Pictures/food_eda/summary_statistics.png) 221 | ## Step 6: Value Counts 222 | - Value Counts of the data: 223 | ``` 224 | food.product_name.value_counts() 225 | ``` 226 | - Ice Cream is the most popular product name. It occurs $405$ forllowed by Potato Chips occurs $276$ times. 227 | - Most products occurs are: 228 | 229 | ![](Pictures/food_eda/most_occ_prod.png) 230 | - Least occurs products are: 231 | 232 | ![](Pictures/food_eda/least_occ_prod.png) 233 | 234 | ## Step-7: Deal with Duplicates 235 | - Dropping the duplicates 236 | ``` 237 | food.drop_duplicates(inplace=True) 238 | print('Shape of Dataset after dropping the duplicates', food.shape) 239 | ``` 240 | Shape of Dataset after dropping the duplicates (157157, 34) 241 | 242 | ## Step-8: Check the Normality of Data 243 | - To check the distribution of data whether it is normal or not. 244 | - Distribution of data of `nutrition-score-fr_100g` column 245 | ![](Pictures/food_eda/dist_plot.png) 246 | 247 | ## Step-9: Correlation of Dataset 248 | - Correlation plot tells the relation between the column. Withe the increase or decrease in one quantity how much it affect the 2nd quantity. 249 | ![](Pictures/food_eda/corr.png) 250 | 251 | 252 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/tips.csv: -------------------------------------------------------------------------------- 1 | ,total_bill,tip,sex,smoker,day,time,size 2 | 0,16.99,1.01,Female,No,Sun,Dinner,2 3 | 1,10.34,1.66,Male,No,Sun,Dinner,3 4 | 2,21.01,3.5,Male,No,Sun,Dinner,3 5 | 3,23.68,3.31,Male,No,Sun,Dinner,2 6 | 4,24.59,3.61,Female,No,Sun,Dinner,4 7 | 5,25.29,4.71,Male,No,Sun,Dinner,4 8 | 6,8.77,2.0,Male,No,Sun,Dinner,2 9 | 7,26.88,3.12,Male,No,Sun,Dinner,4 10 | 8,15.04,1.96,Male,No,Sun,Dinner,2 11 | 9,14.78,3.23,Male,No,Sun,Dinner,2 12 | 10,10.27,1.71,Male,No,Sun,Dinner,2 13 | 11,35.26,5.0,Female,No,Sun,Dinner,4 14 | 12,15.42,1.57,Male,No,Sun,Dinner,2 15 | 13,18.43,3.0,Male,No,Sun,Dinner,4 16 | 14,14.83,3.02,Female,No,Sun,Dinner,2 17 | 15,21.58,3.92,Male,No,Sun,Dinner,2 18 | 16,10.33,1.67,Female,No,Sun,Dinner,3 19 | 17,16.29,3.71,Male,No,Sun,Dinner,3 20 | 18,16.97,3.5,Female,No,Sun,Dinner,3 21 | 19,20.65,3.35,Male,No,Sat,Dinner,3 22 | 20,17.92,4.08,Male,No,Sat,Dinner,2 23 | 21,20.29,2.75,Female,No,Sat,Dinner,2 24 | 22,15.77,2.23,Female,No,Sat,Dinner,2 25 | 23,39.42,7.58,Male,No,Sat,Dinner,4 26 | 24,19.82,3.18,Male,No,Sat,Dinner,2 27 | 25,17.81,2.34,Male,No,Sat,Dinner,4 28 | 26,13.37,2.0,Male,No,Sat,Dinner,2 29 | 27,12.69,2.0,Male,No,Sat,Dinner,2 30 | 28,21.7,4.3,Male,No,Sat,Dinner,2 31 | 29,19.65,3.0,Female,No,Sat,Dinner,2 32 | 30,9.55,1.45,Male,No,Sat,Dinner,2 33 | 31,18.35,2.5,Male,No,Sat,Dinner,4 34 | 32,15.06,3.0,Female,No,Sat,Dinner,2 35 | 33,20.69,2.45,Female,No,Sat,Dinner,4 36 | 34,17.78,3.27,Male,No,Sat,Dinner,2 37 | 35,24.06,3.6,Male,No,Sat,Dinner,3 38 | 36,16.31,2.0,Male,No,Sat,Dinner,3 39 | 37,16.93,3.07,Female,No,Sat,Dinner,3 40 | 38,18.69,2.31,Male,No,Sat,Dinner,3 41 | 39,31.27,5.0,Male,No,Sat,Dinner,3 42 | 40,16.04,2.24,Male,No,Sat,Dinner,3 43 | 41,17.46,2.54,Male,No,Sun,Dinner,2 44 | 42,13.94,3.06,Male,No,Sun,Dinner,2 45 | 43,9.68,1.32,Male,No,Sun,Dinner,2 46 | 44,30.4,5.6,Male,No,Sun,Dinner,4 47 | 45,18.29,3.0,Male,No,Sun,Dinner,2 48 | 46,22.23,5.0,Male,No,Sun,Dinner,2 49 | 47,32.4,6.0,Male,No,Sun,Dinner,4 50 | 48,28.55,2.05,Male,No,Sun,Dinner,3 51 | 49,18.04,3.0,Male,No,Sun,Dinner,2 52 | 50,12.54,2.5,Male,No,Sun,Dinner,2 53 | 51,10.29,2.6,Female,No,Sun,Dinner,2 54 | 52,34.81,5.2,Female,No,Sun,Dinner,4 55 | 53,9.94,1.56,Male,No,Sun,Dinner,2 56 | 54,25.56,4.34,Male,No,Sun,Dinner,4 57 | 55,19.49,3.51,Male,No,Sun,Dinner,2 58 | 56,38.01,3.0,Male,Yes,Sat,Dinner,4 59 | 57,26.41,1.5,Female,No,Sat,Dinner,2 60 | 58,11.24,1.76,Male,Yes,Sat,Dinner,2 61 | 59,48.27,6.73,Male,No,Sat,Dinner,4 62 | 60,20.29,3.21,Male,Yes,Sat,Dinner,2 63 | 61,13.81,2.0,Male,Yes,Sat,Dinner,2 64 | 62,11.02,1.98,Male,Yes,Sat,Dinner,2 65 | 63,18.29,3.76,Male,Yes,Sat,Dinner,4 66 | 64,17.59,2.64,Male,No,Sat,Dinner,3 67 | 65,20.08,3.15,Male,No,Sat,Dinner,3 68 | 66,16.45,2.47,Female,No,Sat,Dinner,2 69 | 67,3.07,1.0,Female,Yes,Sat,Dinner,1 70 | 68,20.23,2.01,Male,No,Sat,Dinner,2 71 | 69,15.01,2.09,Male,Yes,Sat,Dinner,2 72 | 70,12.02,1.97,Male,No,Sat,Dinner,2 73 | 71,17.07,3.0,Female,No,Sat,Dinner,3 74 | 72,26.86,3.14,Female,Yes,Sat,Dinner,2 75 | 73,25.28,5.0,Female,Yes,Sat,Dinner,2 76 | 74,14.73,2.2,Female,No,Sat,Dinner,2 77 | 75,10.51,1.25,Male,No,Sat,Dinner,2 78 | 76,17.92,3.08,Male,Yes,Sat,Dinner,2 79 | 77,27.2,4.0,Male,No,Thur,Lunch,4 80 | 78,22.76,3.0,Male,No,Thur,Lunch,2 81 | 79,17.29,2.71,Male,No,Thur,Lunch,2 82 | 80,19.44,3.0,Male,Yes,Thur,Lunch,2 83 | 81,16.66,3.4,Male,No,Thur,Lunch,2 84 | 82,10.07,1.83,Female,No,Thur,Lunch,1 85 | 83,32.68,5.0,Male,Yes,Thur,Lunch,2 86 | 84,15.98,2.03,Male,No,Thur,Lunch,2 87 | 85,34.83,5.17,Female,No,Thur,Lunch,4 88 | 86,13.03,2.0,Male,No,Thur,Lunch,2 89 | 87,18.28,4.0,Male,No,Thur,Lunch,2 90 | 88,24.71,5.85,Male,No,Thur,Lunch,2 91 | 89,21.16,3.0,Male,No,Thur,Lunch,2 92 | 90,28.97,3.0,Male,Yes,Fri,Dinner,2 93 | 91,22.49,3.5,Male,No,Fri,Dinner,2 94 | 92,5.75,1.0,Female,Yes,Fri,Dinner,2 95 | 93,16.32,4.3,Female,Yes,Fri,Dinner,2 96 | 94,22.75,3.25,Female,No,Fri,Dinner,2 97 | 95,40.17,4.73,Male,Yes,Fri,Dinner,4 98 | 96,27.28,4.0,Male,Yes,Fri,Dinner,2 99 | 97,12.03,1.5,Male,Yes,Fri,Dinner,2 100 | 98,21.01,3.0,Male,Yes,Fri,Dinner,2 101 | 99,12.46,1.5,Male,No,Fri,Dinner,2 102 | 100,11.35,2.5,Female,Yes,Fri,Dinner,2 103 | 101,15.38,3.0,Female,Yes,Fri,Dinner,2 104 | 102,44.3,2.5,Female,Yes,Sat,Dinner,3 105 | 103,22.42,3.48,Female,Yes,Sat,Dinner,2 106 | 104,20.92,4.08,Female,No,Sat,Dinner,2 107 | 105,15.36,1.64,Male,Yes,Sat,Dinner,2 108 | 106,20.49,4.06,Male,Yes,Sat,Dinner,2 109 | 107,25.21,4.29,Male,Yes,Sat,Dinner,2 110 | 108,18.24,3.76,Male,No,Sat,Dinner,2 111 | 109,14.31,4.0,Female,Yes,Sat,Dinner,2 112 | 110,14.0,3.0,Male,No,Sat,Dinner,2 113 | 111,7.25,1.0,Female,No,Sat,Dinner,1 114 | 112,38.07,4.0,Male,No,Sun,Dinner,3 115 | 113,23.95,2.55,Male,No,Sun,Dinner,2 116 | 114,25.71,4.0,Female,No,Sun,Dinner,3 117 | 115,17.31,3.5,Female,No,Sun,Dinner,2 118 | 116,29.93,5.07,Male,No,Sun,Dinner,4 119 | 117,10.65,1.5,Female,No,Thur,Lunch,2 120 | 118,12.43,1.8,Female,No,Thur,Lunch,2 121 | 119,24.08,2.92,Female,No,Thur,Lunch,4 122 | 120,11.69,2.31,Male,No,Thur,Lunch,2 123 | 121,13.42,1.68,Female,No,Thur,Lunch,2 124 | 122,14.26,2.5,Male,No,Thur,Lunch,2 125 | 123,15.95,2.0,Male,No,Thur,Lunch,2 126 | 124,12.48,2.52,Female,No,Thur,Lunch,2 127 | 125,29.8,4.2,Female,No,Thur,Lunch,6 128 | 126,8.52,1.48,Male,No,Thur,Lunch,2 129 | 127,14.52,2.0,Female,No,Thur,Lunch,2 130 | 128,11.38,2.0,Female,No,Thur,Lunch,2 131 | 129,22.82,2.18,Male,No,Thur,Lunch,3 132 | 130,19.08,1.5,Male,No,Thur,Lunch,2 133 | 131,20.27,2.83,Female,No,Thur,Lunch,2 134 | 132,11.17,1.5,Female,No,Thur,Lunch,2 135 | 133,12.26,2.0,Female,No,Thur,Lunch,2 136 | 134,18.26,3.25,Female,No,Thur,Lunch,2 137 | 135,8.51,1.25,Female,No,Thur,Lunch,2 138 | 136,10.33,2.0,Female,No,Thur,Lunch,2 139 | 137,14.15,2.0,Female,No,Thur,Lunch,2 140 | 138,16.0,2.0,Male,Yes,Thur,Lunch,2 141 | 139,13.16,2.75,Female,No,Thur,Lunch,2 142 | 140,17.47,3.5,Female,No,Thur,Lunch,2 143 | 141,34.3,6.7,Male,No,Thur,Lunch,6 144 | 142,41.19,5.0,Male,No,Thur,Lunch,5 145 | 143,27.05,5.0,Female,No,Thur,Lunch,6 146 | 144,16.43,2.3,Female,No,Thur,Lunch,2 147 | 145,8.35,1.5,Female,No,Thur,Lunch,2 148 | 146,18.64,1.36,Female,No,Thur,Lunch,3 149 | 147,11.87,1.63,Female,No,Thur,Lunch,2 150 | 148,9.78,1.73,Male,No,Thur,Lunch,2 151 | 149,7.51,2.0,Male,No,Thur,Lunch,2 152 | 150,14.07,2.5,Male,No,Sun,Dinner,2 153 | 151,13.13,2.0,Male,No,Sun,Dinner,2 154 | 152,17.26,2.74,Male,No,Sun,Dinner,3 155 | 153,24.55,2.0,Male,No,Sun,Dinner,4 156 | 154,19.77,2.0,Male,No,Sun,Dinner,4 157 | 155,29.85,5.14,Female,No,Sun,Dinner,5 158 | 156,48.17,5.0,Male,No,Sun,Dinner,6 159 | 157,25.0,3.75,Female,No,Sun,Dinner,4 160 | 158,13.39,2.61,Female,No,Sun,Dinner,2 161 | 159,16.49,2.0,Male,No,Sun,Dinner,4 162 | 160,21.5,3.5,Male,No,Sun,Dinner,4 163 | 161,12.66,2.5,Male,No,Sun,Dinner,2 164 | 162,16.21,2.0,Female,No,Sun,Dinner,3 165 | 163,13.81,2.0,Male,No,Sun,Dinner,2 166 | 164,17.51,3.0,Female,Yes,Sun,Dinner,2 167 | 165,24.52,3.48,Male,No,Sun,Dinner,3 168 | 166,20.76,2.24,Male,No,Sun,Dinner,2 169 | 167,31.71,4.5,Male,No,Sun,Dinner,4 170 | 168,10.59,1.61,Female,Yes,Sat,Dinner,2 171 | 169,10.63,2.0,Female,Yes,Sat,Dinner,2 172 | 170,50.81,10.0,Male,Yes,Sat,Dinner,3 173 | 171,15.81,3.16,Male,Yes,Sat,Dinner,2 174 | 172,7.25,5.15,Male,Yes,Sun,Dinner,2 175 | 173,31.85,3.18,Male,Yes,Sun,Dinner,2 176 | 174,16.82,4.0,Male,Yes,Sun,Dinner,2 177 | 175,32.9,3.11,Male,Yes,Sun,Dinner,2 178 | 176,17.89,2.0,Male,Yes,Sun,Dinner,2 179 | 177,14.48,2.0,Male,Yes,Sun,Dinner,2 180 | 178,9.6,4.0,Female,Yes,Sun,Dinner,2 181 | 179,34.63,3.55,Male,Yes,Sun,Dinner,2 182 | 180,34.65,3.68,Male,Yes,Sun,Dinner,4 183 | 181,23.33,5.65,Male,Yes,Sun,Dinner,2 184 | 182,45.35,3.5,Male,Yes,Sun,Dinner,3 185 | 183,23.17,6.5,Male,Yes,Sun,Dinner,4 186 | 184,40.55,3.0,Male,Yes,Sun,Dinner,2 187 | 185,20.69,5.0,Male,No,Sun,Dinner,5 188 | 186,20.9,3.5,Female,Yes,Sun,Dinner,3 189 | 187,30.46,2.0,Male,Yes,Sun,Dinner,5 190 | 188,18.15,3.5,Female,Yes,Sun,Dinner,3 191 | 189,23.1,4.0,Male,Yes,Sun,Dinner,3 192 | 190,15.69,1.5,Male,Yes,Sun,Dinner,2 193 | 191,19.81,4.19,Female,Yes,Thur,Lunch,2 194 | 192,28.44,2.56,Male,Yes,Thur,Lunch,2 195 | 193,15.48,2.02,Male,Yes,Thur,Lunch,2 196 | 194,16.58,4.0,Male,Yes,Thur,Lunch,2 197 | 195,7.56,1.44,Male,No,Thur,Lunch,2 198 | 196,10.34,2.0,Male,Yes,Thur,Lunch,2 199 | 197,43.11,5.0,Female,Yes,Thur,Lunch,4 200 | 198,13.0,2.0,Female,Yes,Thur,Lunch,2 201 | 199,13.51,2.0,Male,Yes,Thur,Lunch,2 202 | 200,18.71,4.0,Male,Yes,Thur,Lunch,3 203 | 201,12.74,2.01,Female,Yes,Thur,Lunch,2 204 | 202,13.0,2.0,Female,Yes,Thur,Lunch,2 205 | 203,16.4,2.5,Female,Yes,Thur,Lunch,2 206 | 204,20.53,4.0,Male,Yes,Thur,Lunch,4 207 | 205,16.47,3.23,Female,Yes,Thur,Lunch,3 208 | 206,26.59,3.41,Male,Yes,Sat,Dinner,3 209 | 207,38.73,3.0,Male,Yes,Sat,Dinner,4 210 | 208,24.27,2.03,Male,Yes,Sat,Dinner,2 211 | 209,12.76,2.23,Female,Yes,Sat,Dinner,2 212 | 210,30.06,2.0,Male,Yes,Sat,Dinner,3 213 | 211,25.89,5.16,Male,Yes,Sat,Dinner,4 214 | 212,48.33,9.0,Male,No,Sat,Dinner,4 215 | 213,13.27,2.5,Female,Yes,Sat,Dinner,2 216 | 214,28.17,6.5,Female,Yes,Sat,Dinner,3 217 | 215,12.9,1.1,Female,Yes,Sat,Dinner,2 218 | 216,28.15,3.0,Male,Yes,Sat,Dinner,5 219 | 217,11.59,1.5,Male,Yes,Sat,Dinner,2 220 | 218,7.74,1.44,Male,Yes,Sat,Dinner,2 221 | 219,30.14,3.09,Female,Yes,Sat,Dinner,4 222 | 220,12.16,2.2,Male,Yes,Fri,Lunch,2 223 | 221,13.42,3.48,Female,Yes,Fri,Lunch,2 224 | 222,8.58,1.92,Male,Yes,Fri,Lunch,1 225 | 223,15.98,3.0,Female,No,Fri,Lunch,3 226 | 224,13.42,1.58,Male,Yes,Fri,Lunch,2 227 | 225,16.27,2.5,Female,Yes,Fri,Lunch,2 228 | 226,10.09,2.0,Female,Yes,Fri,Lunch,2 229 | 227,20.45,3.0,Male,No,Sat,Dinner,4 230 | 228,13.28,2.72,Male,No,Sat,Dinner,2 231 | 229,22.12,2.88,Female,Yes,Sat,Dinner,2 232 | 230,24.01,2.0,Male,Yes,Sat,Dinner,4 233 | 231,15.69,3.0,Male,Yes,Sat,Dinner,3 234 | 232,11.61,3.39,Male,No,Sat,Dinner,2 235 | 233,10.77,1.47,Male,No,Sat,Dinner,2 236 | 234,15.53,3.0,Male,Yes,Sat,Dinner,2 237 | 235,10.07,1.25,Male,No,Sat,Dinner,2 238 | 236,12.6,1.0,Male,Yes,Sat,Dinner,2 239 | 237,32.83,1.17,Male,Yes,Sat,Dinner,2 240 | 238,35.83,4.67,Female,No,Sat,Dinner,3 241 | 239,29.03,5.92,Male,No,Sat,Dinner,3 242 | 240,27.18,2.0,Female,Yes,Sat,Dinner,2 243 | 241,22.67,2.0,Male,Yes,Sat,Dinner,2 244 | 242,17.82,1.75,Male,No,Sat,Dinner,2 245 | 243,18.78,3.0,Female,No,Thur,Dinner,2 246 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/pandas_tips_and_tricks/tips_save.csv: -------------------------------------------------------------------------------- 1 | ,total_bill,tip,sex,smoker,day,time,size 2 | 0,16.99,1.01,Female,No,Sun,Dinner,2 3 | 1,10.34,1.66,Male,No,Sun,Dinner,3 4 | 2,21.01,3.5,Male,No,Sun,Dinner,3 5 | 3,23.68,3.31,Male,No,Sun,Dinner,2 6 | 4,24.59,3.61,Female,No,Sun,Dinner,4 7 | 5,25.29,4.71,Male,No,Sun,Dinner,4 8 | 6,8.77,2.0,Male,No,Sun,Dinner,2 9 | 7,26.88,3.12,Male,No,Sun,Dinner,4 10 | 8,15.04,1.96,Male,No,Sun,Dinner,2 11 | 9,14.78,3.23,Male,No,Sun,Dinner,2 12 | 10,10.27,1.71,Male,No,Sun,Dinner,2 13 | 11,35.26,5.0,Female,No,Sun,Dinner,4 14 | 12,15.42,1.57,Male,No,Sun,Dinner,2 15 | 13,18.43,3.0,Male,No,Sun,Dinner,4 16 | 14,14.83,3.02,Female,No,Sun,Dinner,2 17 | 15,21.58,3.92,Male,No,Sun,Dinner,2 18 | 16,10.33,1.67,Female,No,Sun,Dinner,3 19 | 17,16.29,3.71,Male,No,Sun,Dinner,3 20 | 18,16.97,3.5,Female,No,Sun,Dinner,3 21 | 19,20.65,3.35,Male,No,Sat,Dinner,3 22 | 20,17.92,4.08,Male,No,Sat,Dinner,2 23 | 21,20.29,2.75,Female,No,Sat,Dinner,2 24 | 22,15.77,2.23,Female,No,Sat,Dinner,2 25 | 23,39.42,7.58,Male,No,Sat,Dinner,4 26 | 24,19.82,3.18,Male,No,Sat,Dinner,2 27 | 25,17.81,2.34,Male,No,Sat,Dinner,4 28 | 26,13.37,2.0,Male,No,Sat,Dinner,2 29 | 27,12.69,2.0,Male,No,Sat,Dinner,2 30 | 28,21.7,4.3,Male,No,Sat,Dinner,2 31 | 29,19.65,3.0,Female,No,Sat,Dinner,2 32 | 30,9.55,1.45,Male,No,Sat,Dinner,2 33 | 31,18.35,2.5,Male,No,Sat,Dinner,4 34 | 32,15.06,3.0,Female,No,Sat,Dinner,2 35 | 33,20.69,2.45,Female,No,Sat,Dinner,4 36 | 34,17.78,3.27,Male,No,Sat,Dinner,2 37 | 35,24.06,3.6,Male,No,Sat,Dinner,3 38 | 36,16.31,2.0,Male,No,Sat,Dinner,3 39 | 37,16.93,3.07,Female,No,Sat,Dinner,3 40 | 38,18.69,2.31,Male,No,Sat,Dinner,3 41 | 39,31.27,5.0,Male,No,Sat,Dinner,3 42 | 40,16.04,2.24,Male,No,Sat,Dinner,3 43 | 41,17.46,2.54,Male,No,Sun,Dinner,2 44 | 42,13.94,3.06,Male,No,Sun,Dinner,2 45 | 43,9.68,1.32,Male,No,Sun,Dinner,2 46 | 44,30.4,5.6,Male,No,Sun,Dinner,4 47 | 45,18.29,3.0,Male,No,Sun,Dinner,2 48 | 46,22.23,5.0,Male,No,Sun,Dinner,2 49 | 47,32.4,6.0,Male,No,Sun,Dinner,4 50 | 48,28.55,2.05,Male,No,Sun,Dinner,3 51 | 49,18.04,3.0,Male,No,Sun,Dinner,2 52 | 50,12.54,2.5,Male,No,Sun,Dinner,2 53 | 51,10.29,2.6,Female,No,Sun,Dinner,2 54 | 52,34.81,5.2,Female,No,Sun,Dinner,4 55 | 53,9.94,1.56,Male,No,Sun,Dinner,2 56 | 54,25.56,4.34,Male,No,Sun,Dinner,4 57 | 55,19.49,3.51,Male,No,Sun,Dinner,2 58 | 56,38.01,3.0,Male,Yes,Sat,Dinner,4 59 | 57,26.41,1.5,Female,No,Sat,Dinner,2 60 | 58,11.24,1.76,Male,Yes,Sat,Dinner,2 61 | 59,48.27,6.73,Male,No,Sat,Dinner,4 62 | 60,20.29,3.21,Male,Yes,Sat,Dinner,2 63 | 61,13.81,2.0,Male,Yes,Sat,Dinner,2 64 | 62,11.02,1.98,Male,Yes,Sat,Dinner,2 65 | 63,18.29,3.76,Male,Yes,Sat,Dinner,4 66 | 64,17.59,2.64,Male,No,Sat,Dinner,3 67 | 65,20.08,3.15,Male,No,Sat,Dinner,3 68 | 66,16.45,2.47,Female,No,Sat,Dinner,2 69 | 67,3.07,1.0,Female,Yes,Sat,Dinner,1 70 | 68,20.23,2.01,Male,No,Sat,Dinner,2 71 | 69,15.01,2.09,Male,Yes,Sat,Dinner,2 72 | 70,12.02,1.97,Male,No,Sat,Dinner,2 73 | 71,17.07,3.0,Female,No,Sat,Dinner,3 74 | 72,26.86,3.14,Female,Yes,Sat,Dinner,2 75 | 73,25.28,5.0,Female,Yes,Sat,Dinner,2 76 | 74,14.73,2.2,Female,No,Sat,Dinner,2 77 | 75,10.51,1.25,Male,No,Sat,Dinner,2 78 | 76,17.92,3.08,Male,Yes,Sat,Dinner,2 79 | 77,27.2,4.0,Male,No,Thur,Lunch,4 80 | 78,22.76,3.0,Male,No,Thur,Lunch,2 81 | 79,17.29,2.71,Male,No,Thur,Lunch,2 82 | 80,19.44,3.0,Male,Yes,Thur,Lunch,2 83 | 81,16.66,3.4,Male,No,Thur,Lunch,2 84 | 82,10.07,1.83,Female,No,Thur,Lunch,1 85 | 83,32.68,5.0,Male,Yes,Thur,Lunch,2 86 | 84,15.98,2.03,Male,No,Thur,Lunch,2 87 | 85,34.83,5.17,Female,No,Thur,Lunch,4 88 | 86,13.03,2.0,Male,No,Thur,Lunch,2 89 | 87,18.28,4.0,Male,No,Thur,Lunch,2 90 | 88,24.71,5.85,Male,No,Thur,Lunch,2 91 | 89,21.16,3.0,Male,No,Thur,Lunch,2 92 | 90,28.97,3.0,Male,Yes,Fri,Dinner,2 93 | 91,22.49,3.5,Male,No,Fri,Dinner,2 94 | 92,5.75,1.0,Female,Yes,Fri,Dinner,2 95 | 93,16.32,4.3,Female,Yes,Fri,Dinner,2 96 | 94,22.75,3.25,Female,No,Fri,Dinner,2 97 | 95,40.17,4.73,Male,Yes,Fri,Dinner,4 98 | 96,27.28,4.0,Male,Yes,Fri,Dinner,2 99 | 97,12.03,1.5,Male,Yes,Fri,Dinner,2 100 | 98,21.01,3.0,Male,Yes,Fri,Dinner,2 101 | 99,12.46,1.5,Male,No,Fri,Dinner,2 102 | 100,11.35,2.5,Female,Yes,Fri,Dinner,2 103 | 101,15.38,3.0,Female,Yes,Fri,Dinner,2 104 | 102,44.3,2.5,Female,Yes,Sat,Dinner,3 105 | 103,22.42,3.48,Female,Yes,Sat,Dinner,2 106 | 104,20.92,4.08,Female,No,Sat,Dinner,2 107 | 105,15.36,1.64,Male,Yes,Sat,Dinner,2 108 | 106,20.49,4.06,Male,Yes,Sat,Dinner,2 109 | 107,25.21,4.29,Male,Yes,Sat,Dinner,2 110 | 108,18.24,3.76,Male,No,Sat,Dinner,2 111 | 109,14.31,4.0,Female,Yes,Sat,Dinner,2 112 | 110,14.0,3.0,Male,No,Sat,Dinner,2 113 | 111,7.25,1.0,Female,No,Sat,Dinner,1 114 | 112,38.07,4.0,Male,No,Sun,Dinner,3 115 | 113,23.95,2.55,Male,No,Sun,Dinner,2 116 | 114,25.71,4.0,Female,No,Sun,Dinner,3 117 | 115,17.31,3.5,Female,No,Sun,Dinner,2 118 | 116,29.93,5.07,Male,No,Sun,Dinner,4 119 | 117,10.65,1.5,Female,No,Thur,Lunch,2 120 | 118,12.43,1.8,Female,No,Thur,Lunch,2 121 | 119,24.08,2.92,Female,No,Thur,Lunch,4 122 | 120,11.69,2.31,Male,No,Thur,Lunch,2 123 | 121,13.42,1.68,Female,No,Thur,Lunch,2 124 | 122,14.26,2.5,Male,No,Thur,Lunch,2 125 | 123,15.95,2.0,Male,No,Thur,Lunch,2 126 | 124,12.48,2.52,Female,No,Thur,Lunch,2 127 | 125,29.8,4.2,Female,No,Thur,Lunch,6 128 | 126,8.52,1.48,Male,No,Thur,Lunch,2 129 | 127,14.52,2.0,Female,No,Thur,Lunch,2 130 | 128,11.38,2.0,Female,No,Thur,Lunch,2 131 | 129,22.82,2.18,Male,No,Thur,Lunch,3 132 | 130,19.08,1.5,Male,No,Thur,Lunch,2 133 | 131,20.27,2.83,Female,No,Thur,Lunch,2 134 | 132,11.17,1.5,Female,No,Thur,Lunch,2 135 | 133,12.26,2.0,Female,No,Thur,Lunch,2 136 | 134,18.26,3.25,Female,No,Thur,Lunch,2 137 | 135,8.51,1.25,Female,No,Thur,Lunch,2 138 | 136,10.33,2.0,Female,No,Thur,Lunch,2 139 | 137,14.15,2.0,Female,No,Thur,Lunch,2 140 | 138,16.0,2.0,Male,Yes,Thur,Lunch,2 141 | 139,13.16,2.75,Female,No,Thur,Lunch,2 142 | 140,17.47,3.5,Female,No,Thur,Lunch,2 143 | 141,34.3,6.7,Male,No,Thur,Lunch,6 144 | 142,41.19,5.0,Male,No,Thur,Lunch,5 145 | 143,27.05,5.0,Female,No,Thur,Lunch,6 146 | 144,16.43,2.3,Female,No,Thur,Lunch,2 147 | 145,8.35,1.5,Female,No,Thur,Lunch,2 148 | 146,18.64,1.36,Female,No,Thur,Lunch,3 149 | 147,11.87,1.63,Female,No,Thur,Lunch,2 150 | 148,9.78,1.73,Male,No,Thur,Lunch,2 151 | 149,7.51,2.0,Male,No,Thur,Lunch,2 152 | 150,14.07,2.5,Male,No,Sun,Dinner,2 153 | 151,13.13,2.0,Male,No,Sun,Dinner,2 154 | 152,17.26,2.74,Male,No,Sun,Dinner,3 155 | 153,24.55,2.0,Male,No,Sun,Dinner,4 156 | 154,19.77,2.0,Male,No,Sun,Dinner,4 157 | 155,29.85,5.14,Female,No,Sun,Dinner,5 158 | 156,48.17,5.0,Male,No,Sun,Dinner,6 159 | 157,25.0,3.75,Female,No,Sun,Dinner,4 160 | 158,13.39,2.61,Female,No,Sun,Dinner,2 161 | 159,16.49,2.0,Male,No,Sun,Dinner,4 162 | 160,21.5,3.5,Male,No,Sun,Dinner,4 163 | 161,12.66,2.5,Male,No,Sun,Dinner,2 164 | 162,16.21,2.0,Female,No,Sun,Dinner,3 165 | 163,13.81,2.0,Male,No,Sun,Dinner,2 166 | 164,17.51,3.0,Female,Yes,Sun,Dinner,2 167 | 165,24.52,3.48,Male,No,Sun,Dinner,3 168 | 166,20.76,2.24,Male,No,Sun,Dinner,2 169 | 167,31.71,4.5,Male,No,Sun,Dinner,4 170 | 168,10.59,1.61,Female,Yes,Sat,Dinner,2 171 | 169,10.63,2.0,Female,Yes,Sat,Dinner,2 172 | 170,50.81,10.0,Male,Yes,Sat,Dinner,3 173 | 171,15.81,3.16,Male,Yes,Sat,Dinner,2 174 | 172,7.25,5.15,Male,Yes,Sun,Dinner,2 175 | 173,31.85,3.18,Male,Yes,Sun,Dinner,2 176 | 174,16.82,4.0,Male,Yes,Sun,Dinner,2 177 | 175,32.9,3.11,Male,Yes,Sun,Dinner,2 178 | 176,17.89,2.0,Male,Yes,Sun,Dinner,2 179 | 177,14.48,2.0,Male,Yes,Sun,Dinner,2 180 | 178,9.6,4.0,Female,Yes,Sun,Dinner,2 181 | 179,34.63,3.55,Male,Yes,Sun,Dinner,2 182 | 180,34.65,3.68,Male,Yes,Sun,Dinner,4 183 | 181,23.33,5.65,Male,Yes,Sun,Dinner,2 184 | 182,45.35,3.5,Male,Yes,Sun,Dinner,3 185 | 183,23.17,6.5,Male,Yes,Sun,Dinner,4 186 | 184,40.55,3.0,Male,Yes,Sun,Dinner,2 187 | 185,20.69,5.0,Male,No,Sun,Dinner,5 188 | 186,20.9,3.5,Female,Yes,Sun,Dinner,3 189 | 187,30.46,2.0,Male,Yes,Sun,Dinner,5 190 | 188,18.15,3.5,Female,Yes,Sun,Dinner,3 191 | 189,23.1,4.0,Male,Yes,Sun,Dinner,3 192 | 190,15.69,1.5,Male,Yes,Sun,Dinner,2 193 | 191,19.81,4.19,Female,Yes,Thur,Lunch,2 194 | 192,28.44,2.56,Male,Yes,Thur,Lunch,2 195 | 193,15.48,2.02,Male,Yes,Thur,Lunch,2 196 | 194,16.58,4.0,Male,Yes,Thur,Lunch,2 197 | 195,7.56,1.44,Male,No,Thur,Lunch,2 198 | 196,10.34,2.0,Male,Yes,Thur,Lunch,2 199 | 197,43.11,5.0,Female,Yes,Thur,Lunch,4 200 | 198,13.0,2.0,Female,Yes,Thur,Lunch,2 201 | 199,13.51,2.0,Male,Yes,Thur,Lunch,2 202 | 200,18.71,4.0,Male,Yes,Thur,Lunch,3 203 | 201,12.74,2.01,Female,Yes,Thur,Lunch,2 204 | 202,13.0,2.0,Female,Yes,Thur,Lunch,2 205 | 203,16.4,2.5,Female,Yes,Thur,Lunch,2 206 | 204,20.53,4.0,Male,Yes,Thur,Lunch,4 207 | 205,16.47,3.23,Female,Yes,Thur,Lunch,3 208 | 206,26.59,3.41,Male,Yes,Sat,Dinner,3 209 | 207,38.73,3.0,Male,Yes,Sat,Dinner,4 210 | 208,24.27,2.03,Male,Yes,Sat,Dinner,2 211 | 209,12.76,2.23,Female,Yes,Sat,Dinner,2 212 | 210,30.06,2.0,Male,Yes,Sat,Dinner,3 213 | 211,25.89,5.16,Male,Yes,Sat,Dinner,4 214 | 212,48.33,9.0,Male,No,Sat,Dinner,4 215 | 213,13.27,2.5,Female,Yes,Sat,Dinner,2 216 | 214,28.17,6.5,Female,Yes,Sat,Dinner,3 217 | 215,12.9,1.1,Female,Yes,Sat,Dinner,2 218 | 216,28.15,3.0,Male,Yes,Sat,Dinner,5 219 | 217,11.59,1.5,Male,Yes,Sat,Dinner,2 220 | 218,7.74,1.44,Male,Yes,Sat,Dinner,2 221 | 219,30.14,3.09,Female,Yes,Sat,Dinner,4 222 | 220,12.16,2.2,Male,Yes,Fri,Lunch,2 223 | 221,13.42,3.48,Female,Yes,Fri,Lunch,2 224 | 222,8.58,1.92,Male,Yes,Fri,Lunch,1 225 | 223,15.98,3.0,Female,No,Fri,Lunch,3 226 | 224,13.42,1.58,Male,Yes,Fri,Lunch,2 227 | 225,16.27,2.5,Female,Yes,Fri,Lunch,2 228 | 226,10.09,2.0,Female,Yes,Fri,Lunch,2 229 | 227,20.45,3.0,Male,No,Sat,Dinner,4 230 | 228,13.28,2.72,Male,No,Sat,Dinner,2 231 | 229,22.12,2.88,Female,Yes,Sat,Dinner,2 232 | 230,24.01,2.0,Male,Yes,Sat,Dinner,4 233 | 231,15.69,3.0,Male,Yes,Sat,Dinner,3 234 | 232,11.61,3.39,Male,No,Sat,Dinner,2 235 | 233,10.77,1.47,Male,No,Sat,Dinner,2 236 | 234,15.53,3.0,Male,Yes,Sat,Dinner,2 237 | 235,10.07,1.25,Male,No,Sat,Dinner,2 238 | 236,12.6,1.0,Male,Yes,Sat,Dinner,2 239 | 237,32.83,1.17,Male,Yes,Sat,Dinner,2 240 | 238,35.83,4.67,Female,No,Sat,Dinner,3 241 | 239,29.03,5.92,Male,No,Sat,Dinner,3 242 | 240,27.18,2.0,Female,Yes,Sat,Dinner,2 243 | 241,22.67,2.0,Male,Yes,Sat,Dinner,2 244 | 242,17.82,1.75,Male,No,Sat,Dinner,2 245 | 243,18.78,3.0,Female,No,Thur,Dinner,2 246 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Solutions/07_grouping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex - GroupBy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Step 3. Assign it to a variable called drinks." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | "
countrybeer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcoholcontinent
0Afghanistan0000.0AS
1Albania89132544.9EU
2Algeria250140.7AF
3Andorra24513831212.4EU
4Angola21757455.9AF
\n", 102 | "
" 103 | ], 104 | "text/plain": [ 105 | " country beer_servings spirit_servings wine_servings \\\n", 106 | "0 Afghanistan 0 0 0 \n", 107 | "1 Albania 89 132 54 \n", 108 | "2 Algeria 25 0 14 \n", 109 | "3 Andorra 245 138 312 \n", 110 | "4 Angola 217 57 45 \n", 111 | "\n", 112 | " total_litres_of_pure_alcohol continent \n", 113 | "0 0.0 AS \n", 114 | "1 4.9 EU \n", 115 | "2 0.7 AF \n", 116 | "3 12.4 EU \n", 117 | "4 5.9 AF " 118 | ] 119 | }, 120 | "execution_count": 4, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "drinks = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv')\n", 127 | "drinks.head()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "### Step 4. Which continent drinks more beer on average?" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "continent\n", 146 | "AF 61.471698\n", 147 | "AS 37.045455\n", 148 | "EU 193.777778\n", 149 | "OC 89.687500\n", 150 | "SA 175.083333\n", 151 | "Name: beer_servings, dtype: float64" 152 | ] 153 | }, 154 | "execution_count": 6, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "drinks.groupby('continent').beer_servings.mean()" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "### Step 5. For each continent print the statistics for wine consumption." 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 9, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "continent \n", 179 | "AF count 53.000000\n", 180 | " mean 16.264151\n", 181 | " std 38.846419\n", 182 | " min 0.000000\n", 183 | " 25% 1.000000\n", 184 | " 50% 2.000000\n", 185 | " 75% 13.000000\n", 186 | " max 233.000000\n", 187 | "AS count 44.000000\n", 188 | " mean 9.068182\n", 189 | " std 21.667034\n", 190 | " min 0.000000\n", 191 | " 25% 0.000000\n", 192 | " 50% 1.000000\n", 193 | " 75% 8.000000\n", 194 | " max 123.000000\n", 195 | "EU count 45.000000\n", 196 | " mean 142.222222\n", 197 | " std 97.421738\n", 198 | " min 0.000000\n", 199 | " 25% 59.000000\n", 200 | " 50% 128.000000\n", 201 | " 75% 195.000000\n", 202 | " max 370.000000\n", 203 | "OC count 16.000000\n", 204 | " mean 35.625000\n", 205 | " std 64.555790\n", 206 | " min 0.000000\n", 207 | " 25% 1.000000\n", 208 | " 50% 8.500000\n", 209 | " 75% 23.250000\n", 210 | " max 212.000000\n", 211 | "SA count 12.000000\n", 212 | " mean 62.416667\n", 213 | " std 88.620189\n", 214 | " min 1.000000\n", 215 | " 25% 3.000000\n", 216 | " 50% 12.000000\n", 217 | " 75% 98.500000\n", 218 | " max 221.000000\n", 219 | "dtype: float64" 220 | ] 221 | }, 222 | "execution_count": 9, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "drinks.groupby('continent').wine_servings.describe()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "### Step 6. Print the mean alcohol consumption per continent for every column" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 10, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/html": [ 246 | "
\n", 247 | "\n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | "
beer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcohol
continent
AF61.47169816.33962316.2641513.007547
AS37.04545560.8409099.0681822.170455
EU193.777778132.555556142.2222228.617778
OC89.68750058.43750035.6250003.381250
SA175.083333114.75000062.4166676.308333
\n", 302 | "
" 303 | ], 304 | "text/plain": [ 305 | " beer_servings spirit_servings wine_servings \\\n", 306 | "continent \n", 307 | "AF 61.471698 16.339623 16.264151 \n", 308 | "AS 37.045455 60.840909 9.068182 \n", 309 | "EU 193.777778 132.555556 142.222222 \n", 310 | "OC 89.687500 58.437500 35.625000 \n", 311 | "SA 175.083333 114.750000 62.416667 \n", 312 | "\n", 313 | " total_litres_of_pure_alcohol \n", 314 | "continent \n", 315 | "AF 3.007547 \n", 316 | "AS 2.170455 \n", 317 | "EU 8.617778 \n", 318 | "OC 3.381250 \n", 319 | "SA 6.308333 " 320 | ] 321 | }, 322 | "execution_count": 10, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "drinks.groupby('continent').mean()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "### Step 7. Print the median alcohol consumption per continent for every column" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 14, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/html": [ 346 | "
\n", 347 | "\n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | "
beer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcohol
continent
AF32.03.02.02.30
AS17.516.01.01.20
EU219.0122.0128.010.00
OC52.537.08.51.75
SA162.5108.512.06.85
\n", 402 | "
" 403 | ], 404 | "text/plain": [ 405 | " beer_servings spirit_servings wine_servings \\\n", 406 | "continent \n", 407 | "AF 32.0 3.0 2.0 \n", 408 | "AS 17.5 16.0 1.0 \n", 409 | "EU 219.0 122.0 128.0 \n", 410 | "OC 52.5 37.0 8.5 \n", 411 | "SA 162.5 108.5 12.0 \n", 412 | "\n", 413 | " total_litres_of_pure_alcohol \n", 414 | "continent \n", 415 | "AF 2.30 \n", 416 | "AS 1.20 \n", 417 | "EU 10.00 \n", 418 | "OC 1.75 \n", 419 | "SA 6.85 " 420 | ] 421 | }, 422 | "execution_count": 14, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "drinks.groupby('continent').median()" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "### Step 8. Print the mean, min and max values for spirit consumption.\n", 436 | "#### This time output a DataFrame" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": 15, 442 | "metadata": {}, 443 | "outputs": [ 444 | { 445 | "data": { 446 | "text/html": [ 447 | "
\n", 448 | "\n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | "
meanminmax
continent
AF16.3396230152
AS60.8409090326
EU132.5555560373
OC58.4375000254
SA114.75000025302
\n", 496 | "
" 497 | ], 498 | "text/plain": [ 499 | " mean min max\n", 500 | "continent \n", 501 | "AF 16.339623 0 152\n", 502 | "AS 60.840909 0 326\n", 503 | "EU 132.555556 0 373\n", 504 | "OC 58.437500 0 254\n", 505 | "SA 114.750000 25 302" 506 | ] 507 | }, 508 | "execution_count": 15, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "drinks.groupby('continent').spirit_servings.agg(['mean', 'min', 'max'])" 515 | ] 516 | } 517 | ], 518 | "metadata": { 519 | "kernelspec": { 520 | "display_name": "Python 3.9.7 ('base')", 521 | "language": "python", 522 | "name": "python3" 523 | }, 524 | "language_info": { 525 | "codemirror_mode": { 526 | "name": "ipython", 527 | "version": 3 528 | }, 529 | "file_extension": ".py", 530 | "mimetype": "text/x-python", 531 | "name": "python", 532 | "nbconvert_exporter": "python", 533 | "pygments_lexer": "ipython3", 534 | "version": "3.9.7" 535 | }, 536 | "toc": { 537 | "base_numbering": 1, 538 | "nav_menu": {}, 539 | "number_sections": true, 540 | "sideBar": true, 541 | "skip_h1_title": false, 542 | "title_cell": "Table of Contents", 543 | "title_sidebar": "Contents", 544 | "toc_cell": false, 545 | "toc_position": {}, 546 | "toc_section_display": true, 547 | "toc_window_display": false 548 | }, 549 | "vscode": { 550 | "interpreter": { 551 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 552 | } 553 | } 554 | }, 555 | "nbformat": 4, 556 | "nbformat_minor": 1 557 | } 558 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Solutions/03_Know_your_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex3 - Getting and knowing your Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Step 2. Download the dataset to your computer and unzip it." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import pandas as pd\n", 33 | "import numpy as np" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Step 3. Use the tsv file and assign it to a dataframe called food" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stderr", 50 | "output_type": "stream", 51 | "text": [ 52 | "//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0,3,5,19,20,24,25,26,27,28,36,37,38,39,48) have mixed types. Specify dtype option on import or set low_memory=False.\n", 53 | " interactivity=interactivity, compiler=compiler, result=result)\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "food = pd.read_csv('~/Desktop/en.openfoodfacts.org.products.tsv', sep='\\t')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "### Step 4. See the first 5 entries" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/html": [ 76 | "
\n", 77 | "\n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | "
codeurlcreatorcreated_tcreated_datetimelast_modified_tlast_modified_datetimeproduct_namegeneric_namequantity...fruits-vegetables-nuts_100gfruits-vegetables-nuts-estimate_100gcollagen-meat-protein-ratio_100gcocoa_100gchlorophyl_100gcarbon-footprint_100gnutrition-score-fr_100gnutrition-score-uk_100gglycemic-index_100gwater-hardness_100g
03087http://world-en.openfoodfacts.org/product/0000...openfoodfacts-contributors14741038662016-09-17T09:17:46Z14741038932016-09-17T09:18:13ZFarine de blé noirNaN1kg...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
14530http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890699572017-03-09T14:32:37Z14890699572017-03-09T14:32:37ZBanana Chips Sweetened (Whole)NaNNaN...NaNNaNNaNNaNNaNNaN14.014.0NaNNaN
24559http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890699572017-03-09T14:32:37Z14890699572017-03-09T14:32:37ZPeanutsNaNNaN...NaNNaNNaNNaNNaNNaN0.00.0NaNNaN
316087http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890557312017-03-09T10:35:31Z14890557312017-03-09T10:35:31ZOrganic Salted Nut MixNaNNaN...NaNNaNNaNNaNNaNNaN12.012.0NaNNaN
416094http://world-en.openfoodfacts.org/product/0000...usda-ndb-import14890556532017-03-09T10:34:13Z14890556532017-03-09T10:34:13ZOrganic PolentaNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", 227 | "

5 rows × 163 columns

\n", 228 | "
" 229 | ], 230 | "text/plain": [ 231 | " code url \\\n", 232 | "0 3087 http://world-en.openfoodfacts.org/product/0000... \n", 233 | "1 4530 http://world-en.openfoodfacts.org/product/0000... \n", 234 | "2 4559 http://world-en.openfoodfacts.org/product/0000... \n", 235 | "3 16087 http://world-en.openfoodfacts.org/product/0000... \n", 236 | "4 16094 http://world-en.openfoodfacts.org/product/0000... \n", 237 | "\n", 238 | " creator created_t created_datetime \\\n", 239 | "0 openfoodfacts-contributors 1474103866 2016-09-17T09:17:46Z \n", 240 | "1 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n", 241 | "2 usda-ndb-import 1489069957 2017-03-09T14:32:37Z \n", 242 | "3 usda-ndb-import 1489055731 2017-03-09T10:35:31Z \n", 243 | "4 usda-ndb-import 1489055653 2017-03-09T10:34:13Z \n", 244 | "\n", 245 | " last_modified_t last_modified_datetime product_name \\\n", 246 | "0 1474103893 2016-09-17T09:18:13Z Farine de blé noir \n", 247 | "1 1489069957 2017-03-09T14:32:37Z Banana Chips Sweetened (Whole) \n", 248 | "2 1489069957 2017-03-09T14:32:37Z Peanuts \n", 249 | "3 1489055731 2017-03-09T10:35:31Z Organic Salted Nut Mix \n", 250 | "4 1489055653 2017-03-09T10:34:13Z Organic Polenta \n", 251 | "\n", 252 | " generic_name quantity ... fruits-vegetables-nuts_100g \\\n", 253 | "0 NaN 1kg ... NaN \n", 254 | "1 NaN NaN ... NaN \n", 255 | "2 NaN NaN ... NaN \n", 256 | "3 NaN NaN ... NaN \n", 257 | "4 NaN NaN ... NaN \n", 258 | "\n", 259 | " fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g \\\n", 260 | "0 NaN NaN \n", 261 | "1 NaN NaN \n", 262 | "2 NaN NaN \n", 263 | "3 NaN NaN \n", 264 | "4 NaN NaN \n", 265 | "\n", 266 | " cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g \\\n", 267 | "0 NaN NaN NaN NaN \n", 268 | "1 NaN NaN NaN 14.0 \n", 269 | "2 NaN NaN NaN 0.0 \n", 270 | "3 NaN NaN NaN 12.0 \n", 271 | "4 NaN NaN NaN NaN \n", 272 | "\n", 273 | " nutrition-score-uk_100g glycemic-index_100g water-hardness_100g \n", 274 | "0 NaN NaN NaN \n", 275 | "1 14.0 NaN NaN \n", 276 | "2 0.0 NaN NaN \n", 277 | "3 12.0 NaN NaN \n", 278 | "4 NaN NaN NaN \n", 279 | "\n", 280 | "[5 rows x 163 columns]" 281 | ] 282 | }, 283 | "execution_count": 4, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "food.head()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "### Step 5. What is the number of observations in the dataset?" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 5, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "(356027, 163)" 308 | ] 309 | }, 310 | "execution_count": 5, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "food.shape #will give you both (observations/rows, columns)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 6, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "356027" 328 | ] 329 | }, 330 | "execution_count": 6, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "food.shape[0] #will give you only the observations/rows number" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "### Step 6. What is the number of columns in the dataset?" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 7, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "(356027, 163)\n", 356 | "163\n", 357 | "\n", 358 | "RangeIndex: 356027 entries, 0 to 356026\n", 359 | "Columns: 163 entries, code to water-hardness_100g\n", 360 | "dtypes: float64(107), object(56)\n", 361 | "memory usage: 442.8+ MB\n" 362 | ] 363 | } 364 | ], 365 | "source": [ 366 | "print(food.shape) #will give you both (observations/rows, columns)\n", 367 | "print(food.shape[1]) #will give you only the columns number\n", 368 | "\n", 369 | "#OR\n", 370 | "\n", 371 | "food.info() #Columns: 163 entries" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "### Step 7. Print the name of all the columns." 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 8, 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "data": { 388 | "text/plain": [ 389 | "Index([u'code', u'url', u'creator', u'created_t', u'created_datetime',\n", 390 | " u'last_modified_t', u'last_modified_datetime', u'product_name',\n", 391 | " u'generic_name', u'quantity',\n", 392 | " ...\n", 393 | " u'fruits-vegetables-nuts_100g', u'fruits-vegetables-nuts-estimate_100g',\n", 394 | " u'collagen-meat-protein-ratio_100g', u'cocoa_100g', u'chlorophyl_100g',\n", 395 | " u'carbon-footprint_100g', u'nutrition-score-fr_100g',\n", 396 | " u'nutrition-score-uk_100g', u'glycemic-index_100g',\n", 397 | " u'water-hardness_100g'],\n", 398 | " dtype='object', length=163)" 399 | ] 400 | }, 401 | "execution_count": 8, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "food.columns" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### Step 8. What is the name of 105th column?" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 9, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "text/plain": [ 425 | "'-glucose_100g'" 426 | ] 427 | }, 428 | "execution_count": 9, 429 | "metadata": {}, 430 | "output_type": "execute_result" 431 | } 432 | ], 433 | "source": [ 434 | "food.columns[104]" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "### Step 9. What is the type of the observations of the 105th column?" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 10, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/plain": [ 452 | "dtype('float64')" 453 | ] 454 | }, 455 | "execution_count": 10, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [ 461 | "food.dtypes['-glucose_100g']" 462 | ] 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": {}, 467 | "source": [ 468 | "### Step 10. How is the dataset indexed?" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 11, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/plain": [ 479 | "RangeIndex(start=0, stop=356027, step=1)" 480 | ] 481 | }, 482 | "execution_count": 11, 483 | "metadata": {}, 484 | "output_type": "execute_result" 485 | } 486 | ], 487 | "source": [ 488 | "food.index" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "### Step 11. What is the product name of the 19th observation?" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 13, 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "text/plain": [ 506 | "'Lotus Organic Brown Jasmine Rice'" 507 | ] 508 | }, 509 | "execution_count": 13, 510 | "metadata": {}, 511 | "output_type": "execute_result" 512 | } 513 | ], 514 | "source": [ 515 | "food.values[18][7]" 516 | ] 517 | } 518 | ], 519 | "metadata": { 520 | "anaconda-cloud": {}, 521 | "kernelspec": { 522 | "display_name": "Python 3.10.5 64-bit (windows store)", 523 | "language": "python", 524 | "name": "python3" 525 | }, 526 | "language_info": { 527 | "codemirror_mode": { 528 | "name": "ipython", 529 | "version": 3 530 | }, 531 | "file_extension": ".py", 532 | "mimetype": "text/x-python", 533 | "name": "python", 534 | "nbconvert_exporter": "python", 535 | "pygments_lexer": "ipython3", 536 | "version": "3.10.5" 537 | }, 538 | "toc": { 539 | "base_numbering": 1, 540 | "nav_menu": {}, 541 | "number_sections": true, 542 | "sideBar": true, 543 | "skip_h1_title": false, 544 | "title_cell": "Table of Contents", 545 | "title_sidebar": "Contents", 546 | "toc_cell": false, 547 | "toc_position": {}, 548 | "toc_section_display": true, 549 | "toc_window_display": false 550 | }, 551 | "vscode": { 552 | "interpreter": { 553 | "hash": "2a50a5f25cb96edfd9254847fbf2f642dc98c240ea8775b6be580f22b3253a59" 554 | } 555 | } 556 | }, 557 | "nbformat": 4, 558 | "nbformat_minor": 1 559 | } 560 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Solutions/08_grouping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Occupation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 64, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). " 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Step 3. Assign it to a variable called users." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 65, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | "
agegenderoccupationzip_code
user_id
124Mtechnician85711
253Fother94043
323Mwriter32067
424Mtechnician43537
533Fother15213
\n", 97 | "
" 98 | ], 99 | "text/plain": [ 100 | " age gender occupation zip_code\n", 101 | "user_id \n", 102 | "1 24 M technician 85711\n", 103 | "2 53 F other 94043\n", 104 | "3 23 M writer 32067\n", 105 | "4 24 M technician 43537\n", 106 | "5 33 F other 15213" 107 | ] 108 | }, 109 | "execution_count": 65, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "users = pd.read_table('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user', \n", 116 | " sep='|', index_col='user_id')\n", 117 | "users.head()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "### Step 4. Discover what is the mean age per occupation" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 66, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "occupation\n", 136 | "administrator 38.746835\n", 137 | "artist 31.392857\n", 138 | "doctor 43.571429\n", 139 | "educator 42.010526\n", 140 | "engineer 36.388060\n", 141 | "entertainment 29.222222\n", 142 | "executive 38.718750\n", 143 | "healthcare 41.562500\n", 144 | "homemaker 32.571429\n", 145 | "lawyer 36.750000\n", 146 | "librarian 40.000000\n", 147 | "marketing 37.615385\n", 148 | "none 26.555556\n", 149 | "other 34.523810\n", 150 | "programmer 33.121212\n", 151 | "retired 63.071429\n", 152 | "salesman 35.666667\n", 153 | "scientist 35.548387\n", 154 | "student 22.081633\n", 155 | "technician 33.148148\n", 156 | "writer 36.311111\n", 157 | "Name: age, dtype: float64" 158 | ] 159 | }, 160 | "execution_count": 66, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "users.groupby('occupation').age.mean()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 150, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "doctor 100.000000\n", 185 | "engineer 97.014925\n", 186 | "technician 96.296296\n", 187 | "retired 92.857143\n", 188 | "programmer 90.909091\n", 189 | "executive 90.625000\n", 190 | "scientist 90.322581\n", 191 | "entertainment 88.888889\n", 192 | "lawyer 83.333333\n", 193 | "salesman 75.000000\n", 194 | "educator 72.631579\n", 195 | "student 69.387755\n", 196 | "other 65.714286\n", 197 | "marketing 61.538462\n", 198 | "writer 57.777778\n", 199 | "none 55.555556\n", 200 | "administrator 54.430380\n", 201 | "artist 53.571429\n", 202 | "librarian 43.137255\n", 203 | "healthcare 31.250000\n", 204 | "homemaker 14.285714\n", 205 | "dtype: float64" 206 | ] 207 | }, 208 | "execution_count": 150, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "# create a function\n", 215 | "def gender_to_numeric(x):\n", 216 | " if x == 'M':\n", 217 | " return 1\n", 218 | " if x == 'F':\n", 219 | " return 0\n", 220 | "\n", 221 | "# apply the function to the gender column and create a new column\n", 222 | "users['gender_n'] = users['gender'].apply(gender_to_numeric)\n", 223 | "\n", 224 | "\n", 225 | "a = users.groupby('occupation').gender_n.sum() / users.occupation.value_counts() * 100 \n", 226 | "\n", 227 | "# sort to the most male \n", 228 | "a.sort_values(ascending = False)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "### Step 6. For each occupation, calculate the minimum and maximum ages" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 151, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/html": [ 246 | "
\n", 247 | "\n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | "
minmax
occupation
administrator2170
artist1948
doctor2864
educator2363
engineer2270
entertainment1550
executive2269
healthcare2262
homemaker2050
lawyer2153
librarian2369
marketing2455
none1155
other1364
programmer2063
retired5173
salesman1866
scientist2355
student742
technician2155
writer1860
\n", 368 | "
" 369 | ], 370 | "text/plain": [ 371 | " min max\n", 372 | "occupation \n", 373 | "administrator 21 70\n", 374 | "artist 19 48\n", 375 | "doctor 28 64\n", 376 | "educator 23 63\n", 377 | "engineer 22 70\n", 378 | "entertainment 15 50\n", 379 | "executive 22 69\n", 380 | "healthcare 22 62\n", 381 | "homemaker 20 50\n", 382 | "lawyer 21 53\n", 383 | "librarian 23 69\n", 384 | "marketing 24 55\n", 385 | "none 11 55\n", 386 | "other 13 64\n", 387 | "programmer 20 63\n", 388 | "retired 51 73\n", 389 | "salesman 18 66\n", 390 | "scientist 23 55\n", 391 | "student 7 42\n", 392 | "technician 21 55\n", 393 | "writer 18 60" 394 | ] 395 | }, 396 | "execution_count": 151, 397 | "metadata": {}, 398 | "output_type": "execute_result" 399 | } 400 | ], 401 | "source": [ 402 | "users.groupby('occupation').age.agg(['min', 'max'])" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": {}, 408 | "source": [ 409 | "### Step 7. For each combination of occupation and gender, calculate the mean age" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 152, 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "occupation gender\n", 421 | "administrator F 40.638889\n", 422 | " M 37.162791\n", 423 | "artist F 30.307692\n", 424 | " M 32.333333\n", 425 | "doctor M 43.571429\n", 426 | "educator F 39.115385\n", 427 | " M 43.101449\n", 428 | "engineer F 29.500000\n", 429 | " M 36.600000\n", 430 | "entertainment F 31.000000\n", 431 | " M 29.000000\n", 432 | "executive F 44.000000\n", 433 | " M 38.172414\n", 434 | "healthcare F 39.818182\n", 435 | " M 45.400000\n", 436 | "homemaker F 34.166667\n", 437 | " M 23.000000\n", 438 | "lawyer F 39.500000\n", 439 | " M 36.200000\n", 440 | "librarian F 40.000000\n", 441 | " M 40.000000\n", 442 | "marketing F 37.200000\n", 443 | " M 37.875000\n", 444 | "none F 36.500000\n", 445 | " M 18.600000\n", 446 | "other F 35.472222\n", 447 | " M 34.028986\n", 448 | "programmer F 32.166667\n", 449 | " M 33.216667\n", 450 | "retired F 70.000000\n", 451 | " M 62.538462\n", 452 | "salesman F 27.000000\n", 453 | " M 38.555556\n", 454 | "scientist F 28.333333\n", 455 | " M 36.321429\n", 456 | "student F 20.750000\n", 457 | " M 22.669118\n", 458 | "technician F 38.000000\n", 459 | " M 32.961538\n", 460 | "writer F 37.631579\n", 461 | " M 35.346154\n", 462 | "Name: age, dtype: float64" 463 | ] 464 | }, 465 | "execution_count": 152, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "users.groupby(['occupation', 'gender']).age.mean()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "### Step 8. For each occupation present the percentage of women and men" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 154, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/plain": [ 489 | "occupation gender\n", 490 | "administrator F 45.569620\n", 491 | " M 54.430380\n", 492 | "artist F 46.428571\n", 493 | " M 53.571429\n", 494 | "doctor M 100.000000\n", 495 | "educator F 27.368421\n", 496 | " M 72.631579\n", 497 | "engineer F 2.985075\n", 498 | " M 97.014925\n", 499 | "entertainment F 11.111111\n", 500 | " M 88.888889\n", 501 | "executive F 9.375000\n", 502 | " M 90.625000\n", 503 | "healthcare F 68.750000\n", 504 | " M 31.250000\n", 505 | "homemaker F 85.714286\n", 506 | " M 14.285714\n", 507 | "lawyer F 16.666667\n", 508 | " M 83.333333\n", 509 | "librarian F 56.862745\n", 510 | " M 43.137255\n", 511 | "marketing F 38.461538\n", 512 | " M 61.538462\n", 513 | "none F 44.444444\n", 514 | " M 55.555556\n", 515 | "other F 34.285714\n", 516 | " M 65.714286\n", 517 | "programmer F 9.090909\n", 518 | " M 90.909091\n", 519 | "retired F 7.142857\n", 520 | " M 92.857143\n", 521 | "salesman F 25.000000\n", 522 | " M 75.000000\n", 523 | "scientist F 9.677419\n", 524 | " M 90.322581\n", 525 | "student F 30.612245\n", 526 | " M 69.387755\n", 527 | "technician F 3.703704\n", 528 | " M 96.296296\n", 529 | "writer F 42.222222\n", 530 | " M 57.777778\n", 531 | "Name: gender, dtype: float64" 532 | ] 533 | }, 534 | "execution_count": 154, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "# create a data frame and apply count to gender\n", 541 | "gender_ocup = users.groupby(['occupation', 'gender']).agg({'gender': 'count'})\n", 542 | "\n", 543 | "# create a DataFrame and apply count for each occupation\n", 544 | "occup_count = users.groupby(['occupation']).agg('count')\n", 545 | "\n", 546 | "# divide the gender_ocup per the occup_count and multiply per 100\n", 547 | "occup_gender = gender_ocup.div(occup_count, level = \"occupation\") * 100\n", 548 | "\n", 549 | "# present all rows from the 'gender column'\n", 550 | "occup_gender.loc[: , 'gender']" 551 | ] 552 | } 553 | ], 554 | "metadata": { 555 | "kernelspec": { 556 | "display_name": "Python 3.9.7 ('base')", 557 | "language": "python", 558 | "name": "python3" 559 | }, 560 | "language_info": { 561 | "codemirror_mode": { 562 | "name": "ipython", 563 | "version": 3 564 | }, 565 | "file_extension": ".py", 566 | "mimetype": "text/x-python", 567 | "name": "python", 568 | "nbconvert_exporter": "python", 569 | "pygments_lexer": "ipython3", 570 | "version": "3.9.7" 571 | }, 572 | "toc": { 573 | "base_numbering": 1, 574 | "nav_menu": {}, 575 | "number_sections": true, 576 | "sideBar": true, 577 | "skip_h1_title": false, 578 | "title_cell": "Table of Contents", 579 | "title_sidebar": "Contents", 580 | "toc_cell": false, 581 | "toc_position": {}, 582 | "toc_section_display": true, 583 | "toc_window_display": false 584 | }, 585 | "vscode": { 586 | "interpreter": { 587 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 588 | } 589 | } 590 | }, 591 | "nbformat": 4, 592 | "nbformat_minor": 1 593 | } 594 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Solutions/09_grouping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Regiment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### Step 2. Create the DataFrame with the following values:" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n", 35 | " 'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n", 36 | " 'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n", 37 | " 'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n", 38 | " 'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "### Step 3. Assign it to a variable called regiment.\n", 46 | "#### Don't forget to name each column" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 6, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | "
regimentcompanynamepreTestScorepostTestScore
0Nighthawks1stMiller425
1Nighthawks1stJacobson2494
2Nighthawks2ndAli3157
3Nighthawks2ndMilner262
4Dragoons1stCooze370
5Dragoons1stJacon425
6Dragoons2ndRyaner2494
7Dragoons2ndSone3157
8Scouts1stSloan262
9Scouts1stPiger370
10Scouts2ndRiani262
11Scouts2ndAli370
\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " regiment company name preTestScore postTestScore\n", 172 | "0 Nighthawks 1st Miller 4 25\n", 173 | "1 Nighthawks 1st Jacobson 24 94\n", 174 | "2 Nighthawks 2nd Ali 31 57\n", 175 | "3 Nighthawks 2nd Milner 2 62\n", 176 | "4 Dragoons 1st Cooze 3 70\n", 177 | "5 Dragoons 1st Jacon 4 25\n", 178 | "6 Dragoons 2nd Ryaner 24 94\n", 179 | "7 Dragoons 2nd Sone 31 57\n", 180 | "8 Scouts 1st Sloan 2 62\n", 181 | "9 Scouts 1st Piger 3 70\n", 182 | "10 Scouts 2nd Riani 2 62\n", 183 | "11 Scouts 2nd Ali 3 70" 184 | ] 185 | }, 186 | "execution_count": 6, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "regiment = pd.DataFrame(raw_data, columns = raw_data.keys())\n", 193 | "regiment" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 26, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/html": [ 211 | "
\n", 212 | "\n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | "
preTestScorepostTestScore
regiment
Dragoons15.5061.5
Nighthawks15.2559.5
Scouts2.5066.0
\n", 243 | "
" 244 | ], 245 | "text/plain": [ 246 | " preTestScore postTestScore\n", 247 | "regiment \n", 248 | "Dragoons 15.50 61.5\n", 249 | "Nighthawks 15.25 59.5\n", 250 | "Scouts 2.50 66.0" 251 | ] 252 | }, 253 | "execution_count": 26, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "regiment[regiment['regiment'] == 'Nighthawks'].groupby('regiment').mean()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "### Step 5. Present general statistics by company" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 29, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/html": [ 277 | "
\n", 278 | "\n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | "
postTestScorepreTestScore
company
1stcount6.0000006.000000
mean57.6666676.666667
std27.4857548.524475
min25.0000002.000000
25%34.2500003.000000
50%66.0000003.500000
75%70.0000004.000000
max94.00000024.000000
2ndcount6.0000006.000000
mean67.00000015.500000
std14.05702714.652645
min57.0000002.000000
25%58.2500002.250000
50%62.00000013.500000
75%68.00000029.250000
max94.00000031.000000
\n", 378 | "
" 379 | ], 380 | "text/plain": [ 381 | " postTestScore preTestScore\n", 382 | "company \n", 383 | "1st count 6.000000 6.000000\n", 384 | " mean 57.666667 6.666667\n", 385 | " std 27.485754 8.524475\n", 386 | " min 25.000000 2.000000\n", 387 | " 25% 34.250000 3.000000\n", 388 | " 50% 66.000000 3.500000\n", 389 | " 75% 70.000000 4.000000\n", 390 | " max 94.000000 24.000000\n", 391 | "2nd count 6.000000 6.000000\n", 392 | " mean 67.000000 15.500000\n", 393 | " std 14.057027 14.652645\n", 394 | " min 57.000000 2.000000\n", 395 | " 25% 58.250000 2.250000\n", 396 | " 50% 62.000000 13.500000\n", 397 | " 75% 68.000000 29.250000\n", 398 | " max 94.000000 31.000000" 399 | ] 400 | }, 401 | "execution_count": 29, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "regiment.groupby('company').describe()" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### Step 6. What is the mean of each company's preTestScore?" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 33, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "text/plain": [ 425 | "company\n", 426 | "1st 6.666667\n", 427 | "2nd 15.500000\n", 428 | "Name: preTestScore, dtype: float64" 429 | ] 430 | }, 431 | "execution_count": 33, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "regiment.groupby('company').preTestScore.mean()" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "### Step 7. Present the mean preTestScores grouped by regiment and company" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 35, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "regiment company\n", 456 | "Dragoons 1st 3.5\n", 457 | " 2nd 27.5\n", 458 | "Nighthawks 1st 14.0\n", 459 | " 2nd 16.5\n", 460 | "Scouts 1st 2.5\n", 461 | " 2nd 2.5\n", 462 | "Name: preTestScore, dtype: float64" 463 | ] 464 | }, 465 | "execution_count": 35, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "regiment.groupby(['regiment', 'company']).preTestScore.mean()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 36, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/html": [ 489 | "
\n", 490 | "\n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | "
company1st2nd
regiment
Dragoons3.527.5
Nighthawks14.016.5
Scouts2.52.5
\n", 521 | "
" 522 | ], 523 | "text/plain": [ 524 | "company 1st 2nd\n", 525 | "regiment \n", 526 | "Dragoons 3.5 27.5\n", 527 | "Nighthawks 14.0 16.5\n", 528 | "Scouts 2.5 2.5" 529 | ] 530 | }, 531 | "execution_count": 36, 532 | "metadata": {}, 533 | "output_type": "execute_result" 534 | } 535 | ], 536 | "source": [ 537 | "regiment.groupby(['regiment', 'company']).preTestScore.mean().unstack()" 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "### Step 9. Group the entire dataframe by regiment and company" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 37, 550 | "metadata": {}, 551 | "outputs": [ 552 | { 553 | "data": { 554 | "text/html": [ 555 | "
\n", 556 | "\n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | "
preTestScorepostTestScore
regimentcompany
Dragoons1st3.547.5
2nd27.575.5
Nighthawks1st14.059.5
2nd16.559.5
Scouts1st2.566.0
2nd2.566.0
\n", 607 | "
" 608 | ], 609 | "text/plain": [ 610 | " preTestScore postTestScore\n", 611 | "regiment company \n", 612 | "Dragoons 1st 3.5 47.5\n", 613 | " 2nd 27.5 75.5\n", 614 | "Nighthawks 1st 14.0 59.5\n", 615 | " 2nd 16.5 59.5\n", 616 | "Scouts 1st 2.5 66.0\n", 617 | " 2nd 2.5 66.0" 618 | ] 619 | }, 620 | "execution_count": 37, 621 | "metadata": {}, 622 | "output_type": "execute_result" 623 | } 624 | ], 625 | "source": [ 626 | "regiment.groupby(['regiment', 'company']).mean()" 627 | ] 628 | }, 629 | { 630 | "cell_type": "markdown", 631 | "metadata": {}, 632 | "source": [ 633 | "### Step 10. What is the number of observations in each regiment and company" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 41, 639 | "metadata": {}, 640 | "outputs": [ 641 | { 642 | "data": { 643 | "text/plain": [ 644 | "company regiment \n", 645 | "1st Dragoons 2\n", 646 | " Nighthawks 2\n", 647 | " Scouts 2\n", 648 | "2nd Dragoons 2\n", 649 | " Nighthawks 2\n", 650 | " Scouts 2\n", 651 | "dtype: int64" 652 | ] 653 | }, 654 | "execution_count": 41, 655 | "metadata": {}, 656 | "output_type": "execute_result" 657 | } 658 | ], 659 | "source": [ 660 | "regiment.groupby(['company', 'regiment']).size()" 661 | ] 662 | }, 663 | { 664 | "cell_type": "markdown", 665 | "metadata": {}, 666 | "source": [ 667 | "### Step 11. Iterate over a group and print the name and the whole data from the regiment" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": 50, 673 | "metadata": {}, 674 | "outputs": [ 675 | { 676 | "name": "stdout", 677 | "output_type": "stream", 678 | "text": [ 679 | "Dragoons\n", 680 | " regiment company name preTestScore postTestScore\n", 681 | "4 Dragoons 1st Cooze 3 70\n", 682 | "5 Dragoons 1st Jacon 4 25\n", 683 | "6 Dragoons 2nd Ryaner 24 94\n", 684 | "7 Dragoons 2nd Sone 31 57\n", 685 | "Nighthawks\n", 686 | " regiment company name preTestScore postTestScore\n", 687 | "0 Nighthawks 1st Miller 4 25\n", 688 | "1 Nighthawks 1st Jacobson 24 94\n", 689 | "2 Nighthawks 2nd Ali 31 57\n", 690 | "3 Nighthawks 2nd Milner 2 62\n", 691 | "Scouts\n", 692 | " regiment company name preTestScore postTestScore\n", 693 | "8 Scouts 1st Sloan 2 62\n", 694 | "9 Scouts 1st Piger 3 70\n", 695 | "10 Scouts 2nd Riani 2 62\n", 696 | "11 Scouts 2nd Ali 3 70\n" 697 | ] 698 | } 699 | ], 700 | "source": [ 701 | "# Group the dataframe by regiment, and for each regiment,\n", 702 | "for name, group in regiment.groupby('regiment'):\n", 703 | " # print the name of the regiment\n", 704 | " print(name)\n", 705 | " # print the data of that regiment\n", 706 | " print(group)" 707 | ] 708 | } 709 | ], 710 | "metadata": { 711 | "kernelspec": { 712 | "display_name": "Python 3.9.7 ('base')", 713 | "language": "python", 714 | "name": "python3" 715 | }, 716 | "language_info": { 717 | "codemirror_mode": { 718 | "name": "ipython", 719 | "version": 3 720 | }, 721 | "file_extension": ".py", 722 | "mimetype": "text/x-python", 723 | "name": "python", 724 | "nbconvert_exporter": "python", 725 | "pygments_lexer": "ipython3", 726 | "version": "3.9.7" 727 | }, 728 | "toc": { 729 | "base_numbering": 1, 730 | "nav_menu": {}, 731 | "number_sections": true, 732 | "sideBar": true, 733 | "skip_h1_title": false, 734 | "title_cell": "Table of Contents", 735 | "title_sidebar": "Contents", 736 | "toc_cell": false, 737 | "toc_position": {}, 738 | "toc_section_display": true, 739 | "toc_window_display": false 740 | }, 741 | "vscode": { 742 | "interpreter": { 743 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 744 | } 745 | } 746 | }, 747 | "nbformat": 4, 748 | "nbformat_minor": 1 749 | } 750 | -------------------------------------------------------------------------------- /02_pandas_tips&tricks/Solutions/01_Know_your_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex1 - Know your Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Step 1. Import the necessary libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 34, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd\n", 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Step 3. Assign it to a variable called chipo." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 35, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n", 52 | "\n", 53 | "chipo = pd.read_csv(url, sep='\\t')" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### Step 4. See the first 10 entries" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 36, 66 | "metadata": { 67 | "collapsed": false, 68 | "scrolled": false 69 | }, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/html": [ 74 | "
\n", 75 | "\n", 88 | "\n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | "
order_idquantityitem_namechoice_descriptionitem_price
011Chips and Fresh Tomato SalsaNaN$2.39
111Izze[Clementine]$3.39
211Nantucket Nectar[Apple]$3.39
311Chips and Tomatillo-Green Chili SalsaNaN$2.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans...$16.98
531Chicken Bowl[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...$10.98
631Side of ChipsNaN$1.69
741Steak Burrito[Tomatillo Red Chili Salsa, [Fajita Vegetables...$11.75
841Steak Soft Tacos[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...$9.25
951Steak Burrito[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...$9.25
\n", 182 | "
" 183 | ], 184 | "text/plain": [ 185 | " order_id quantity item_name \\\n", 186 | "0 1 1 Chips and Fresh Tomato Salsa \n", 187 | "1 1 1 Izze \n", 188 | "2 1 1 Nantucket Nectar \n", 189 | "3 1 1 Chips and Tomatillo-Green Chili Salsa \n", 190 | "4 2 2 Chicken Bowl \n", 191 | "5 3 1 Chicken Bowl \n", 192 | "6 3 1 Side of Chips \n", 193 | "7 4 1 Steak Burrito \n", 194 | "8 4 1 Steak Soft Tacos \n", 195 | "9 5 1 Steak Burrito \n", 196 | "\n", 197 | " choice_description item_price \n", 198 | "0 NaN $2.39 \n", 199 | "1 [Clementine] $3.39 \n", 200 | "2 [Apple] $3.39 \n", 201 | "3 NaN $2.39 \n", 202 | "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n", 203 | "5 [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... $10.98 \n", 204 | "6 NaN $1.69 \n", 205 | "7 [Tomatillo Red Chili Salsa, [Fajita Vegetables... $11.75 \n", 206 | "8 [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... $9.25 \n", 207 | "9 [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... $9.25 " 208 | ] 209 | }, 210 | "execution_count": 36, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "chipo.head(10)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### Step 5. What is the number of observations in the dataset?" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 37, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [ 233 | { 234 | "name": "stdout", 235 | "output_type": "stream", 236 | "text": [ 237 | "The number of observation are: 4622\n" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "# Solution 1\n", 243 | "\n", 244 | "print('The number of observation are:' , chipo.shape[0])" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 38, 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "\n", 259 | "RangeIndex: 4622 entries, 0 to 4621\n", 260 | "Data columns (total 5 columns):\n", 261 | " # Column Non-Null Count Dtype \n", 262 | "--- ------ -------------- ----- \n", 263 | " 0 order_id 4622 non-null int64 \n", 264 | " 1 quantity 4622 non-null int64 \n", 265 | " 2 item_name 4622 non-null object\n", 266 | " 3 choice_description 3376 non-null object\n", 267 | " 4 item_price 4622 non-null object\n", 268 | "dtypes: int64(2), object(3)\n", 269 | "memory usage: 180.7+ KB\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "# Solution 2\n", 275 | "chipo.info()\n" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "### Step 6. What is the number of columns in the dataset?" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 39, 288 | "metadata": { 289 | "collapsed": false 290 | }, 291 | "outputs": [ 292 | { 293 | "data": { 294 | "text/plain": [ 295 | "5" 296 | ] 297 | }, 298 | "execution_count": 39, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "chipo.shape[1]" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "### Step 7. Print the name of all the columns." 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 40, 317 | "metadata": { 318 | "collapsed": false 319 | }, 320 | "outputs": [ 321 | { 322 | "data": { 323 | "text/plain": [ 324 | "Index(['order_id', 'quantity', 'item_name', 'choice_description',\n", 325 | " 'item_price'],\n", 326 | " dtype='object')" 327 | ] 328 | }, 329 | "execution_count": 40, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "chipo.columns" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "### Step 8. How is the dataset indexed?" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 41, 348 | "metadata": { 349 | "collapsed": false 350 | }, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "RangeIndex(start=0, stop=4622, step=1)" 356 | ] 357 | }, 358 | "execution_count": 41, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "chipo.index" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "### Step 9. Which was the most-ordered item? " 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 42, 377 | "metadata": { 378 | "collapsed": false 379 | }, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/html": [ 384 | "
\n", 385 | "\n", 398 | "\n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | "
order_idquantity
item_name
Chicken Bowl713926761
\n", 419 | "
" 420 | ], 421 | "text/plain": [ 422 | " order_id quantity\n", 423 | "item_name \n", 424 | "Chicken Bowl 713926 761" 425 | ] 426 | }, 427 | "execution_count": 42, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [ 433 | "chipo_count = chipo.groupby('item_name').sum()\n", 434 | "chipo_count_more = chipo_count.sort_values(['quantity'], ascending=False)\n", 435 | "chipo_count_more.head(1)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 43, 441 | "metadata": {}, 442 | "outputs": [ 443 | { 444 | "data": { 445 | "text/html": [ 446 | "
\n", 447 | "\n", 460 | "\n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | "
order_idquantity
item_name
Chicken Bowl713926761
\n", 481 | "
" 482 | ], 483 | "text/plain": [ 484 | " order_id quantity\n", 485 | "item_name \n", 486 | "Chicken Bowl 713926 761" 487 | ] 488 | }, 489 | "execution_count": 43, 490 | "metadata": {}, 491 | "output_type": "execute_result" 492 | } 493 | ], 494 | "source": [ 495 | "# second way\n", 496 | "c = chipo.groupby('item_name').sum()\n", 497 | "c = c.sort_values(['quantity'], ascending=False)\n", 498 | "c.head(1)" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "### Step 10. For the most-ordered item, how many items were ordered?" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 44, 511 | "metadata": { 512 | "collapsed": false 513 | }, 514 | "outputs": [ 515 | { 516 | "data": { 517 | "text/html": [ 518 | "
\n", 519 | "\n", 532 | "\n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | "
order_idquantity
item_name
Chicken Bowl713926761
\n", 553 | "
" 554 | ], 555 | "text/plain": [ 556 | " order_id quantity\n", 557 | "item_name \n", 558 | "Chicken Bowl 713926 761" 559 | ] 560 | }, 561 | "execution_count": 44, 562 | "metadata": {}, 563 | "output_type": "execute_result" 564 | } 565 | ], 566 | "source": [ 567 | "c = chipo.groupby('item_name')\n", 568 | "c = c.sum()\n", 569 | "c = c.sort_values(['quantity'], ascending=False)\n", 570 | "c.head(1)" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "### Step 11. What was the most ordered item in the choice_description column?" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": 45, 583 | "metadata": { 584 | "collapsed": false 585 | }, 586 | "outputs": [ 587 | { 588 | "data": { 589 | "text/html": [ 590 | "
\n", 591 | "\n", 604 | "\n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | "
order_idquantity
choice_description
[Diet Coke]123455159
\n", 625 | "
" 626 | ], 627 | "text/plain": [ 628 | " order_id quantity\n", 629 | "choice_description \n", 630 | "[Diet Coke] 123455 159" 631 | ] 632 | }, 633 | "execution_count": 45, 634 | "metadata": {}, 635 | "output_type": "execute_result" 636 | } 637 | ], 638 | "source": [ 639 | "c = chipo.groupby('choice_description').sum()\n", 640 | "c = c.sort_values(['quantity'], ascending=False)\n", 641 | "c.head(1)" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": {}, 647 | "source": [ 648 | "### Step 12. How many items were orderd in total?" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 46, 654 | "metadata": { 655 | "collapsed": false 656 | }, 657 | "outputs": [ 658 | { 659 | "data": { 660 | "text/plain": [ 661 | "4972" 662 | ] 663 | }, 664 | "execution_count": 46, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "total_items_ordered = chipo.quantity.sum()\n", 671 | "total_items_ordered" 672 | ] 673 | }, 674 | { 675 | "cell_type": "markdown", 676 | "metadata": {}, 677 | "source": [ 678 | "### Step 13. Turn the item price into a float" 679 | ] 680 | }, 681 | { 682 | "cell_type": "markdown", 683 | "metadata": {}, 684 | "source": [ 685 | "#### Step 13.a. Check the item price type" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 47, 691 | "metadata": { 692 | "collapsed": false 693 | }, 694 | "outputs": [ 695 | { 696 | "data": { 697 | "text/plain": [ 698 | "dtype('O')" 699 | ] 700 | }, 701 | "execution_count": 47, 702 | "metadata": {}, 703 | "output_type": "execute_result" 704 | } 705 | ], 706 | "source": [ 707 | "chipo.item_price.dtype" 708 | ] 709 | }, 710 | { 711 | "cell_type": "markdown", 712 | "metadata": {}, 713 | "source": [ 714 | "#### Step 13.b. Create a lambda function and change the type of item price" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 48, 720 | "metadata": { 721 | "collapsed": true 722 | }, 723 | "outputs": [], 724 | "source": [ 725 | "dollarizer = lambda x: float(x[1:-1])\n", 726 | "chipo.item_price = chipo.item_price.apply(dollarizer)" 727 | ] 728 | }, 729 | { 730 | "cell_type": "markdown", 731 | "metadata": {}, 732 | "source": [ 733 | "#### Step 13.c. Check the item price type" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": 49, 739 | "metadata": { 740 | "collapsed": false 741 | }, 742 | "outputs": [ 743 | { 744 | "data": { 745 | "text/plain": [ 746 | "dtype('float64')" 747 | ] 748 | }, 749 | "execution_count": 49, 750 | "metadata": {}, 751 | "output_type": "execute_result" 752 | } 753 | ], 754 | "source": [ 755 | "chipo.item_price.dtype" 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": {}, 761 | "source": [ 762 | "### Step 14. How much was the revenue for the period in the dataset?" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 50, 768 | "metadata": { 769 | "collapsed": false 770 | }, 771 | "outputs": [ 772 | { 773 | "name": "stdout", 774 | "output_type": "stream", 775 | "text": [ 776 | "Revenue was: $39237.02\n" 777 | ] 778 | } 779 | ], 780 | "source": [ 781 | "revenue = (chipo['quantity']* chipo['item_price']).sum()\n", 782 | "\n", 783 | "print('Revenue was: $' + str(np.round(revenue,2)))" 784 | ] 785 | }, 786 | { 787 | "cell_type": "markdown", 788 | "metadata": {}, 789 | "source": [ 790 | "### Step 15. How many orders were made in the period?" 791 | ] 792 | }, 793 | { 794 | "cell_type": "code", 795 | "execution_count": 51, 796 | "metadata": { 797 | "collapsed": false 798 | }, 799 | "outputs": [ 800 | { 801 | "data": { 802 | "text/plain": [ 803 | "1834" 804 | ] 805 | }, 806 | "execution_count": 51, 807 | "metadata": {}, 808 | "output_type": "execute_result" 809 | } 810 | ], 811 | "source": [ 812 | "orders = chipo.order_id.value_counts().count()\n", 813 | "orders" 814 | ] 815 | }, 816 | { 817 | "cell_type": "markdown", 818 | "metadata": {}, 819 | "source": [ 820 | "### Step 16. What is the average revenue amount per order?" 821 | ] 822 | }, 823 | { 824 | "cell_type": "code", 825 | "execution_count": 52, 826 | "metadata": { 827 | "collapsed": false 828 | }, 829 | "outputs": [ 830 | { 831 | "data": { 832 | "text/plain": [ 833 | "21.394231188658654" 834 | ] 835 | }, 836 | "execution_count": 52, 837 | "metadata": {}, 838 | "output_type": "execute_result" 839 | } 840 | ], 841 | "source": [ 842 | "# Solution 1\n", 843 | "# Solution 1\n", 844 | "\n", 845 | "chipo['revenue'] = chipo['quantity'] * chipo['item_price']\n", 846 | "order_grouped = chipo.groupby(by=['order_id']).sum()\n", 847 | "order_grouped.mean()['revenue']\n" 848 | ] 849 | }, 850 | { 851 | "cell_type": "code", 852 | "execution_count": 53, 853 | "metadata": { 854 | "collapsed": false 855 | }, 856 | "outputs": [ 857 | { 858 | "data": { 859 | "text/plain": [ 860 | "21.394231188658654" 861 | ] 862 | }, 863 | "execution_count": 53, 864 | "metadata": {}, 865 | "output_type": "execute_result" 866 | } 867 | ], 868 | "source": [ 869 | "# Solution 2\n", 870 | "\n", 871 | "chipo.groupby(by=['order_id']).sum().mean()['revenue']\n" 872 | ] 873 | }, 874 | { 875 | "cell_type": "markdown", 876 | "metadata": {}, 877 | "source": [ 878 | "### Step 17. How many different items are sold?" 879 | ] 880 | }, 881 | { 882 | "cell_type": "code", 883 | "execution_count": 54, 884 | "metadata": { 885 | "collapsed": false 886 | }, 887 | "outputs": [ 888 | { 889 | "data": { 890 | "text/plain": [ 891 | "50" 892 | ] 893 | }, 894 | "execution_count": 54, 895 | "metadata": {}, 896 | "output_type": "execute_result" 897 | } 898 | ], 899 | "source": [ 900 | "chipo.item_name.value_counts().count()" 901 | ] 902 | } 903 | ], 904 | "metadata": { 905 | "anaconda-cloud": {}, 906 | "kernelspec": { 907 | "display_name": "Python 3.9.7 ('base')", 908 | "language": "python", 909 | "name": "python3" 910 | }, 911 | "language_info": { 912 | "codemirror_mode": { 913 | "name": "ipython", 914 | "version": 3 915 | }, 916 | "file_extension": ".py", 917 | "mimetype": "text/x-python", 918 | "name": "python", 919 | "nbconvert_exporter": "python", 920 | "pygments_lexer": "ipython3", 921 | "version": "3.9.7" 922 | }, 923 | "vscode": { 924 | "interpreter": { 925 | "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" 926 | } 927 | } 928 | }, 929 | "nbformat": 4, 930 | "nbformat_minor": 0 931 | } 932 | --------------------------------------------------------------------------------