├── .DS_Store
├── .gitignore
├── 00_sources
    └── poster1.png
├── 00_Books and Guides
    └── MS Excell for Data Analysis.pdf
├── 02_pandas_tips&tricks
    ├── pandas_tips_and_tricks
    │   ├── kashti.xlsx
    │   ├── tips_save.xlsx
    │   ├── excel ka data.csv
    │   └── tips_save.csv
    ├── test.py
    ├── Excercises
    │   ├── 07_grouping.ipynb
    │   ├── 08_grouping.ipynb
    │   ├── 04_filtering_and_sorting.ipynb
    │   ├── 11_apply.ipynb
    │   ├── 03_Know_your_Data.ipynb
    │   ├── 10_apply.ipynb
    │   ├── 09_grouping.ipynb
    │   ├── 05_filtering_and_sorting.ipynb
    │   ├── 02_Know_your_Data.ipynb
    │   ├── 01_Know_your_Data.ipynb
    │   └── 06_filtering_and_sorting.ipynb
    ├── Solutions
    │   ├── Food_Ananlysis_Report.md
    │   ├── 07_grouping.ipynb
    │   ├── 03_Know_your_Data.ipynb
    │   ├── 08_grouping.ipynb
    │   ├── 09_grouping.ipynb
    │   └── 01_Know_your_Data.ipynb
    └── tips.csv
├── 01_Introduction
    └── introduction.ipynb
├── README.md
└── LICENSE


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | intro.md
2 | Contents to be covered.docx
3 | /02_pandas_tips&tricks/Excercises/exercises_templates/*


--------------------------------------------------------------------------------
/00_sources/poster1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/00_sources/poster1.png


--------------------------------------------------------------------------------
/00_Books and Guides/MS Excell for Data Analysis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/00_Books and Guides/MS Excell for Data Analysis.pdf


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/pandas_tips_and_tricks/kashti.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/02_pandas_tips&tricks/pandas_tips_and_tricks/kashti.xlsx


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/pandas_tips_and_tricks/tips_save.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AammarTufail/pythonkachilla_version2/HEAD/02_pandas_tips&tricks/pandas_tips_and_tricks/tips_save.xlsx


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/pandas_tips_and_tricks/excel ka data.csv:
--------------------------------------------------------------------------------
1 | ,"pd.DataFrame([[1,","'12345',","'factory'],","[2,","'34567',","'warehouse']],"
2 | 0,"columns=['user_id',","'zip',",'location_type']),,,
3 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | import seaborn as sns
5 | 
6 | tips = sns.load_dataset('tips')
7 | sns.lineplot(x='day', y='total_bill', data=tips)


--------------------------------------------------------------------------------
/01_Introduction/introduction.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "language_info": {
13 |    "name": "python"
14 |   },
15 |   "orig_nbformat": 4
16 |  },
17 |  "nbformat": 4,
18 |  "nbformat_minor": 2
19 | }
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pythonkachilla_version2
2 | This repository contains materials and files for Python ka chilla version 2.0 (version means course ka version not of python).
3 | This is an advance course led by Dr Aammar Tufail (Python for Data Science in Urdu language)
4 | ## youtube channel: [Codanics](https://www.youtube.com/c/Codanics)
5 | 
6 | ### Playlist for this course [Click here](https://youtube.com/playlist?list=PL9XvIvvVL50EyRNp6fnYwMve1CJqJCHj8)
7 | 
8 | ![Poster](00_sources/poster1.png)


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/07_grouping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex - GroupBy"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Introduction:\n",
 15 |     "\n",
 16 |     "GroupBy can be summarized as Split-Apply-Combine.\n",
 17 |     "\n",
 18 |     "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
 19 |     "\n",
 20 |     "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png)  \n",
 21 |     "### Step 1. Import the necessary libraries"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": []
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). "
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "### Step 3. Assign it to a variable called drinks."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": []
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Step 4. Which continent drinks more beer on average?"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": []
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Step 5. For each continent print the statistics for wine consumption."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": []
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "### Step 6. Print the mean alcohol consumption per continent for every column"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": []
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "### Step 7. Print the median alcohol consumption per continent for every column"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": []
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "### Step 8. Print the mean, min and max values for spirit consumption.\n",
113 |     "#### This time output a DataFrame"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": []
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 3.9.7 ('base')",
127 |    "language": "python",
128 |    "name": "python3"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 2
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython2",
140 |    "version": "3.9.7"
141 |   },
142 |   "vscode": {
143 |    "interpreter": {
144 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
145 |    }
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 0
150 | }
151 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/08_grouping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Occupation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Introduction:\n",
 15 |     "\n",
 16 |     "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
 17 |     "\n",
 18 |     "### Step 1. Import the necessary libraries"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": []
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Step 3. Assign it to a variable called users."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": []
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Step 4. Discover what is the mean age per occupation"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": []
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": []
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "### Step 6. For each occupation, calculate the minimum and maximum ages"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": []
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "### Step 7. For each combination of occupation and gender, calculate the mean age"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": []
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "### Step 8.  For each occupation present the percentage of women and men"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": []
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 3.9.7 ('base')",
137 |    "language": "python",
138 |    "name": "python3"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 2
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython2",
150 |    "version": "3.9.7"
151 |   },
152 |   "vscode": {
153 |    "interpreter": {
154 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
155 |    }
156 |   }
157 |  },
158 |  "nbformat": 4,
159 |  "nbformat_minor": 0
160 | }
161 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/04_filtering_and_sorting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex1 - Filtering and Sorting Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Step 1. Import the necessary libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": []
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### Step 3. Assign it to a variable called chipo."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": []
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Step 4. How many products cost more than $10.00?"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {
 60 |     "collapsed": false
 61 |    },
 62 |    "outputs": [],
 63 |    "source": []
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "### Step 5. What is the price of each item? \n",
 70 |     "###### print a data frame with only two columns item_name and item_price"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": false
 78 |    },
 79 |    "outputs": [],
 80 |    "source": []
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "### Step 6. Sort by the name of the item"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {
 93 |     "collapsed": false
 94 |    },
 95 |    "outputs": [],
 96 |    "source": []
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "### Step 7. What was the quantity of the most expensive item ordered?"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {
109 |     "collapsed": false
110 |    },
111 |    "outputs": [],
112 |    "source": []
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "### Step 8. How many times was a Veggie Salad Bowl ordered?"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [],
128 |    "source": []
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "### Step 9. How many times did someone order more than one Canned Soda?"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": []
145 |   }
146 |  ],
147 |  "metadata": {
148 |   "kernelspec": {
149 |    "display_name": "Python 3.9.7 ('base')",
150 |    "language": "python",
151 |    "name": "python3"
152 |   },
153 |   "language_info": {
154 |    "codemirror_mode": {
155 |     "name": "ipython",
156 |     "version": 2
157 |    },
158 |    "file_extension": ".py",
159 |    "mimetype": "text/x-python",
160 |    "name": "python",
161 |    "nbconvert_exporter": "python",
162 |    "pygments_lexer": "ipython2",
163 |    "version": "3.9.7"
164 |   },
165 |   "vscode": {
166 |    "interpreter": {
167 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
168 |    }
169 |   }
170 |  },
171 |  "nbformat": 4,
172 |  "nbformat_minor": 0
173 | }
174 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/11_apply.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# United States - Crime Rates - 1960 - 2014"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Introduction:\n",
 15 |     "\n",
 16 |     "This time you will create a data \n",
 17 |     "\n",
 18 |     "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
 19 |     "\n",
 20 |     "### Step 1. Import the necessary libraries"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {
 27 |     "collapsed": false
 28 |    },
 29 |    "outputs": [],
 30 |    "source": []
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). "
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "### Step 3. Assign it to a variable called crime."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": []
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Step 4. What is the type of the columns?"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": []
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "##### Have you noticed that the type of Year is int64. But pandas has a different type to work with Time Series. Let's see it now.\n",
 76 |     "\n",
 77 |     "### Step 5. Convert the type of the column Year to datetime64"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": []
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "### Step 6. Set the Year column as the index of the dataframe"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": []
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "### Step 7. Delete the Total column"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {
116 |     "collapsed": false
117 |    },
118 |    "outputs": [],
119 |    "source": []
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "### Step 8. Group the year by decades and sum the values\n",
126 |     "\n",
127 |     "#### Pay attention to the Population column number, summing this column is a mistake"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": false,
135 |     "scrolled": true
136 |    },
137 |    "outputs": [],
138 |    "source": []
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "### Step 9. What is the most dangerous decade to live in the US?"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": []
155 |   }
156 |  ],
157 |  "metadata": {
158 |   "anaconda-cloud": {},
159 |   "kernelspec": {
160 |    "display_name": "Python [default]",
161 |    "language": "python",
162 |    "name": "python2"
163 |   },
164 |   "language_info": {
165 |    "codemirror_mode": {
166 |     "name": "ipython",
167 |     "version": 2
168 |    },
169 |    "file_extension": ".py",
170 |    "mimetype": "text/x-python",
171 |    "name": "python",
172 |    "nbconvert_exporter": "python",
173 |    "pygments_lexer": "ipython2",
174 |    "version": "2.7.12"
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 0
179 | }
180 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/03_Know_your_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex3 - Getting and Knowing your Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Step 2. Download the dataset to your computer and unzip it."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Step 3. Use the tsv file and assign it to a dataframe called food"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": []
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "### Step 4. See the first 5 entries"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": []
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "### Step 5. What is the number of observations in the dataset?"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": []
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "### Step 6. What is the number of columns in the dataset?"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": true
 84 |    },
 85 |    "outputs": [],
 86 |    "source": []
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "### Step 7. Print the name of all the columns."
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": []
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "### Step 8. What is the name of 105th column?"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "collapsed": true
116 |    },
117 |    "outputs": [],
118 |    "source": []
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "### Step 9. What is the type of the observations of the 105th column?"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": []
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "### Step 10. How is the dataset indexed?"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": []
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "### Step 11. What is the product name of the 19th observation?"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "collapsed": true
164 |    },
165 |    "outputs": [],
166 |    "source": []
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "anaconda-cloud": {},
171 |   "kernelspec": {
172 |    "display_name": "Python 3.9.7 ('base')",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 2
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython2",
186 |    "version": "3.9.7"
187 |   },
188 |   "vscode": {
189 |    "interpreter": {
190 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
191 |    }
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 0
196 | }
197 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/10_apply.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Student Alcohol Consumption"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Introduction:\n",
 15 |     "\n",
 16 |     "This time you will download a dataset from the UCI.\n",
 17 |     "\n",
 18 |     "### Step 1. Import the necessary libraries"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": []
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Step 3. Assign it to a variable called df."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": []
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": []
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### Step 5. Create a lambda function that will capitalize strings."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": []
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "### Step 6. Capitalize both Mjob and Fjob"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": []
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "### Step 7. Print the last elements of the data set."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": []
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": []
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [],
147 |    "source": []
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [],
156 |    "source": []
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "### Step 10. Multiply every number of the dataset by 10. \n",
163 |     "##### I know this makes no sense, don't forget it is just an exercise"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": false
171 |    },
172 |    "outputs": [],
173 |    "source": []
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {
179 |     "collapsed": false
180 |    },
181 |    "outputs": [],
182 |    "source": []
183 |   }
184 |  ],
185 |  "metadata": {
186 |   "anaconda-cloud": {},
187 |   "kernelspec": {
188 |    "display_name": "Python 3.9.7 ('base')",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 2
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython2",
202 |    "version": "3.9.7"
203 |   },
204 |   "vscode": {
205 |    "interpreter": {
206 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
207 |    }
208 |   }
209 |  },
210 |  "nbformat": 4,
211 |  "nbformat_minor": 0
212 | }
213 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/09_grouping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Regiment"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Introduction:\n",
 15 |     "\n",
 16 |     "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n",
 17 |     "\n",
 18 |     "### Step 1. Import the necessary libraries"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": []
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Step 2. Create the DataFrame with the following values:"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 51,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n",
 46 |     "        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n",
 47 |     "        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n",
 48 |     "        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n",
 49 |     "        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Step 3. Assign it to a variable called regiment.\n",
 57 |     "#### Don't forget to name each column"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": []
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### Step 4. What is the mean preTestScore from the regiment Nighthawks?  "
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": []
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "### Step 5. Present general statistics by company"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": []
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "### Step 6. What is the mean of each company's preTestScore?"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": []
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "### Step 7. Present the mean preTestScores grouped by regiment and company"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": []
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [],
147 |    "source": []
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "### Step 9. Group the entire dataframe by regiment and company"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [],
163 |    "source": []
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "### Step 10. What is the number of observations in each regiment and company"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {
176 |     "collapsed": false
177 |    },
178 |    "outputs": [],
179 |    "source": []
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "### Step 11. Iterate over a group and print the name and the whole data from the regiment"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {
192 |     "collapsed": false
193 |    },
194 |    "outputs": [],
195 |    "source": []
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "Python 3.9.7 ('base')",
201 |    "language": "python",
202 |    "name": "python3"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 2
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython2",
214 |    "version": "3.9.7"
215 |   },
216 |   "vscode": {
217 |    "interpreter": {
218 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
219 |    }
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 0
224 | }
225 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/05_filtering_and_sorting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex2 - Filtering and Sorting Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This time we are going to pull data directly from the internet.\n",
 15 |     "\n",
 16 |     "### Step 1. Import the necessary libraries"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": []
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv). "
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "### Step 3. Assign it to a variable called euro12."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": []
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "### Step 4. Select only the Goal column."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": []
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Step 5. How many team participated in the Euro2012?"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [],
 81 |    "source": []
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "### Step 6. What is the number of columns in the dataset?"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [],
 97 |    "source": []
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Step 7. View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [],
113 |    "source": []
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "### Step 8. Sort the teams by Red Cards, then to Yellow Cards"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": false,
127 |     "scrolled": true
128 |    },
129 |    "outputs": [],
130 |    "source": []
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Step 9. Calculate the mean Yellow Cards given per Team"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {
143 |     "collapsed": false
144 |    },
145 |    "outputs": [],
146 |    "source": []
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "### Step 10. Filter teams that scored more than 6 goals"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": []
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "### Step 11. Select the teams that start with G"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {
175 |     "collapsed": false
176 |    },
177 |    "outputs": [],
178 |    "source": []
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "### Step 12. Select the first 7 columns"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {
191 |     "collapsed": false
192 |    },
193 |    "outputs": [],
194 |    "source": []
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "### Step 13. Select all columns except the last 3."
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": false
208 |    },
209 |    "outputs": [],
210 |    "source": []
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "### Step 14. Present only the Shooting Accuracy from England, Italy and Russia"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": false
224 |    },
225 |    "outputs": [],
226 |    "source": []
227 |   }
228 |  ],
229 |  "metadata": {
230 |   "anaconda-cloud": {},
231 |   "kernelspec": {
232 |    "display_name": "Python 3.9.7 ('base')",
233 |    "language": "python",
234 |    "name": "python3"
235 |   },
236 |   "language_info": {
237 |    "codemirror_mode": {
238 |     "name": "ipython",
239 |     "version": 2
240 |    },
241 |    "file_extension": ".py",
242 |    "mimetype": "text/x-python",
243 |    "name": "python",
244 |    "nbconvert_exporter": "python",
245 |    "pygments_lexer": "ipython2",
246 |    "version": "3.9.7"
247 |   },
248 |   "vscode": {
249 |    "interpreter": {
250 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
251 |    }
252 |   }
253 |  },
254 |  "nbformat": 4,
255 |  "nbformat_minor": 0
256 | }
257 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/02_Know_your_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex2 - Getting and Knowing your Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Step 1. Import the necessary libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": []
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### Step 3. Assign it to a variable called users and use the 'user_id' as index"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": []
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Step 4. See the first 25 entries"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {
 60 |     "collapsed": false,
 61 |     "scrolled": true
 62 |    },
 63 |    "outputs": [],
 64 |    "source": []
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Step 5. See the last 10 entries"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": false,
 78 |     "scrolled": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": []
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "### Step 6. What is the number of observations in the dataset?"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [],
 97 |    "source": []
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Step 7. What is the number of columns in the dataset?"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [],
113 |    "source": []
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "### Step 8. Print the name of all the columns."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [],
129 |    "source": []
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "### Step 9. How is the dataset indexed?"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [],
145 |    "source": []
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Step 10. What is the data type of each column?"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": []
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "### Step 11. Print only the occupation column"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": []
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "### Step 12. How many different occupations are in this dataset?"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [],
193 |    "source": []
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "### Step 13. What is the most frequent occupation?"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {
206 |     "collapsed": false
207 |    },
208 |    "outputs": [],
209 |    "source": []
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "### Step 14. Summarize the DataFrame."
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {
222 |     "collapsed": false
223 |    },
224 |    "outputs": [],
225 |    "source": []
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "### Step 15. Summarize all the columns"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {
238 |     "collapsed": false
239 |    },
240 |    "outputs": [],
241 |    "source": []
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "### Step 16. Summarize only the occupation column"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {
254 |     "collapsed": false
255 |    },
256 |    "outputs": [],
257 |    "source": []
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "### Step 17. What is the mean age of users?"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {
270 |     "collapsed": false
271 |    },
272 |    "outputs": [],
273 |    "source": []
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "### Step 18. What is the age with least occurrence?"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {
286 |     "collapsed": false
287 |    },
288 |    "outputs": [],
289 |    "source": []
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "anaconda-cloud": {},
294 |   "kernelspec": {
295 |    "display_name": "Python 3.9.7 ('base')",
296 |    "language": "python",
297 |    "name": "python3"
298 |   },
299 |   "language_info": {
300 |    "codemirror_mode": {
301 |     "name": "ipython",
302 |     "version": 2
303 |    },
304 |    "file_extension": ".py",
305 |    "mimetype": "text/x-python",
306 |    "name": "python",
307 |    "nbconvert_exporter": "python",
308 |    "pygments_lexer": "ipython2",
309 |    "version": "3.9.7"
310 |   },
311 |   "vscode": {
312 |    "interpreter": {
313 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
314 |    }
315 |   }
316 |  },
317 |  "nbformat": 4,
318 |  "nbformat_minor": 0
319 | }
320 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/01_Know_your_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex1 - Know your Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Step 1. Import the necessary libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": []
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### Step 3. Assign it to a variable called chipo."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": []
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Step 4. See the first 10 entries"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {
 60 |     "collapsed": false,
 61 |     "scrolled": false
 62 |    },
 63 |    "outputs": [],
 64 |    "source": []
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Step 5. What is the number of observations in the dataset?"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 1,
 76 |    "metadata": {
 77 |     "collapsed": false
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "# Solution 1\n",
 82 |     "\n"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 2,
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# Solution 2\n",
 94 |     "\n"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "### Step 6. What is the number of columns in the dataset?"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [],
111 |    "source": []
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "### Step 7. Print the name of all the columns."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": []
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "### Step 8. How is the dataset indexed?"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {
140 |     "collapsed": false
141 |    },
142 |    "outputs": [],
143 |    "source": []
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### Step 9. Which was the most-ordered item? "
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": false
157 |    },
158 |    "outputs": [],
159 |    "source": []
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "### Step 10. For the most-ordered item, how many items were ordered?"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {
172 |     "collapsed": false
173 |    },
174 |    "outputs": [],
175 |    "source": []
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "### Step 11. What was the most ordered item in the choice_description column?"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {
188 |     "collapsed": false
189 |    },
190 |    "outputs": [],
191 |    "source": []
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "### Step 12. How many items were orderd in total?"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {
204 |     "collapsed": false
205 |    },
206 |    "outputs": [],
207 |    "source": []
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "### Step 13. Turn the item price into a float"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "#### Step 13.a. Check the item price type"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {
227 |     "collapsed": false
228 |    },
229 |    "outputs": [],
230 |    "source": []
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {},
235 |    "source": [
236 |     "#### Step 13.b. Create a lambda function and change the type of item price"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {
243 |     "collapsed": true
244 |    },
245 |    "outputs": [],
246 |    "source": []
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "#### Step 13.c. Check the item price type"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {
259 |     "collapsed": false
260 |    },
261 |    "outputs": [],
262 |    "source": []
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {},
267 |    "source": [
268 |     "### Step 14. How much was the revenue for the period in the dataset?"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": false
276 |    },
277 |    "outputs": [],
278 |    "source": []
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "### Step 15. How many orders were made in the period?"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {
291 |     "collapsed": false
292 |    },
293 |    "outputs": [],
294 |    "source": []
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "### Step 16. What is the average revenue amount per order?"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 3,
306 |    "metadata": {
307 |     "collapsed": false
308 |    },
309 |    "outputs": [],
310 |    "source": [
311 |     "# Solution 1\n",
312 |     "\n"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 4,
318 |    "metadata": {
319 |     "collapsed": false
320 |    },
321 |    "outputs": [],
322 |    "source": [
323 |     "# Solution 2\n",
324 |     "\n"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "### Step 17. How many different items are sold?"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {
338 |     "collapsed": false
339 |    },
340 |    "outputs": [],
341 |    "source": []
342 |   }
343 |  ],
344 |  "metadata": {
345 |   "anaconda-cloud": {},
346 |   "kernelspec": {
347 |    "display_name": "Python 3.9.7 ('base')",
348 |    "language": "python",
349 |    "name": "python3"
350 |   },
351 |   "language_info": {
352 |    "codemirror_mode": {
353 |     "name": "ipython",
354 |     "version": 2
355 |    },
356 |    "file_extension": ".py",
357 |    "mimetype": "text/x-python",
358 |    "name": "python",
359 |    "nbconvert_exporter": "python",
360 |    "pygments_lexer": "ipython2",
361 |    "version": "3.9.7"
362 |   },
363 |   "vscode": {
364 |    "interpreter": {
365 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
366 |    }
367 |   }
368 |  },
369 |  "nbformat": 4,
370 |  "nbformat_minor": 0
371 | }
372 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Excercises/06_filtering_and_sorting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex3 - Filtering and Sorting Data - Fictional Army"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Introduction:\n",
 15 |     "\n",
 16 |     "This exercise was inspired by this [page](http://chrisalbon.com/python/)\n",
 17 |     "\n",
 18 |     "### Step 1. Import the necessary libraries"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import pandas as pd"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Step 2. This is the data given as a dictionary"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 4,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# Create an example dataframe about a fictional army\n",
 46 |     "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'],\n",
 47 |     "            'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'],\n",
 48 |     "            'deaths': [523, 52, 25, 616, 43, 234, 523, 62, 62, 73, 37, 35],\n",
 49 |     "            'battles': [5, 42, 2, 2, 4, 7, 8, 3, 4, 7, 8, 9],\n",
 50 |     "            'size': [1045, 957, 1099, 1400, 1592, 1006, 987, 849, 973, 1005, 1099, 1523],\n",
 51 |     "            'veterans': [1, 5, 62, 26, 73, 37, 949, 48, 48, 435, 63, 345],\n",
 52 |     "            'readiness': [1, 2, 3, 3, 2, 1, 2, 3, 2, 1, 2, 3],\n",
 53 |     "            'armored': [1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1],\n",
 54 |     "            'deserters': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n",
 55 |     "            'origin': ['Arizona', 'California', 'Texas', 'Florida', 'Maine', 'Iowa', 'Alaska', 'Washington', 'Oregon', 'Wyoming', 'Louisana', 'Georgia']}"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "### Step 3. Create a dataframe and assign it to a variable called army. \n",
 63 |     "\n",
 64 |     "#### Don't forget to include the columns names in the order presented in the dictionary ('regiment', 'company', 'deaths'...) so that the column index order is consistent with the solutions. If omitted, pandas will order the columns alphabetically."
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": []
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "### Step 4. Set the 'origin' colum as the index of the dataframe"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": []
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "### Step 5. Print only the column veterans"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": []
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "### Step 6. Print the columns 'veterans' and 'deaths'"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": []
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "### Step 7. Print the name of all the columns."
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": []
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "### Step 8. Select the 'deaths', 'size' and 'deserters' columns from Maine and Alaska"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": []
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "### Step 9. Select the rows 3 to 7 and the columns 3 to 6"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": []
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "### Step 10. Select every row after the fourth row and all columns"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": []
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "### Step 11. Select every row up to the 4th row and all columns"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": []
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "### Step 12. Select the 3rd column up to the 7th column"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": []
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "### Step 13. Select rows where df.deaths is greater than 50"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": []
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "### Step 14. Select rows where df.deaths is greater than 500 or less than 50"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": []
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {},
231 |    "source": [
232 |     "### Step 15. Select all the regiments not named \"Dragoons\""
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": []
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "### Step 16. Select the rows called Texas and Arizona"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": []
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "### Step 17. Select the third cell in the row named Arizona"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": []
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "### Step 18. Select the third cell down in the column named deaths"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": []
283 |   }
284 |  ],
285 |  "metadata": {
286 |   "kernelspec": {
287 |    "display_name": "Python 3.9.7 ('base')",
288 |    "language": "python",
289 |    "name": "python3"
290 |   },
291 |   "language_info": {
292 |    "codemirror_mode": {
293 |     "name": "ipython",
294 |     "version": 3
295 |    },
296 |    "file_extension": ".py",
297 |    "mimetype": "text/x-python",
298 |    "name": "python",
299 |    "nbconvert_exporter": "python",
300 |    "pygments_lexer": "ipython3",
301 |    "version": "3.9.7"
302 |   },
303 |   "vscode": {
304 |    "interpreter": {
305 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
306 |    }
307 |   }
308 |  },
309 |  "nbformat": 4,
310 |  "nbformat_minor": 1
311 | }
312 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Solutions/Food_Ananlysis_Report.md:
--------------------------------------------------------------------------------
  1 | <div style="font-size: 40px">
  2 | 
  3 | |  Submitted By   | Kashif Raza  |
  4 | |--------|--------------|
  5 | 
  6 | </div >
  7 | 
  8 | # Exploratory Data Analysis on Open Food Facts Data
  9 | ## About the Dataset
 10 | - Open Food Facts is a non-profit association of volunteers.
 11 | 5000+ contributors like you have added 600 000+ products from 150 countries using our Android, iPhone or Windows Phone app or their camera to scan barcodes and upload pictures of products and their labels.
 12 | ## Important steps performed in the analysis
 13 | ## 1. Data Shape
 14 | Shape of the original datset is:
 15 | ```
 16 | food.shape
 17 | ```
 18 | (356027,163)
 19 | - Total number of observations/rows: $356027$
 20 | - Total number of columns: $163$
 21 | 
 22 | ## 2. Data Structure
 23 | The data is structured in the following way:
 24 | ```
 25 | food.info()
 26 | ```
 27 | - Data has the range index of : 356027 entries, 0 to 356026
 28 | - The columns are:163 starts from `code` to `water-hardness_100g`
 29 | - Total memory usage is: 442.8+ MB
 30 | - From $163$ columns: $107$ columns has the data type of `float64` and the rest $56$ has the data type of `object`
 31 | 
 32 | ## 3. Finding Missing Data
 33 | To find the missing data, we can use the following code:
 34 | ```
 35 | nan_per=food.isnull().sum().sort_values(ascending=False)/food.shape[0]*100
 36 | ```
 37 | - Plotting the columns with the highest percentage of missing data:
 38 | ```
 39 | plt.figure(dpi=600)
 40 | nan_per.plot(kind='bar', title='Percentage of missing values per feature', figsize=(15,5),
 41 |              color='red', fontsize=5)  
 42 | ```
 43 | 
 44 | ![](Pictures/food_eda/nan1.png)
 45 | 
 46 | - Plotting the Nan percentage with features columns:
 47 | ```
 48 | plt.figure(figsize=(10,5))
 49 | plt.figure(dpi=600)
 50 | sns.distplot(nan_per, bins=100, kde=False)
 51 | plt.xlabel("NaN percentage")
 52 | plt.ylabel("Number of columns")
 53 | plt.title("Percentage of nans per feature column")
 54 | ```
 55 | ![](Pictures/food_eda/nan2.png)
 56 | 
 57 | ### **Key Takeaways**
 58 | 1. A large percentage of nana data is present in the columns with the highest percentage of missing data. So not all the information seems to be useful. 
 59 | 2. Most of the featured columns contains 100% nan values
 60 | 3. There is a group of columns that have the 20% of nan values.
 61 | 4. Nan values feature is not useful for the analysis.
 62 | 5. Columns having Nan values can be dropped.
 63 | 
 64 | ### **Useless features columns**
 65 | Useless features columns are:
 66 | ```
 67 | useless_features=nan_per[nan_per==100].index
 68 | print('Useless features:', useless_features)
 69 | ```
 70 | ![](Pictures/food_eda/useless_features.png)
 71 | 
 72 | ### **Length of Useless features**
 73 | Length of the useless features:
 74 | ```
 75 | print('Length of useless features:', len(useless_features))
 76 | ```
 77 | - Length of useless features: 16
 78 | 
 79 | ### **Drop the useless features**
 80 | Drop the useless features:
 81 | ```
 82 | food.drop(useless_features, axis=1, inplace=True)
 83 | print('Shape of the dataset after dropping the useless features:', food.shape)
 84 | ```
 85 | Shape of the food dataset is: (356027, 147)
 86 | 
 87 | ### **Features with zero nan values**
 88 | Features with zero nan values are:
 89 | ```
 90 | zero_nan_features=nan_per[nan_per==0].index
 91 | print('Features with zero nan values:', zero_nan_features)
 92 | ```
 93 | Zero NaN features: Index(['last_modified_datetime', 'last_modified_t'], dtype='object')
 94 | 
 95 | ### **Splitting the data into NaN groups**
 96 | Splitting the data into NaN groups:
 97 | 1. Columns with low Nan values: (0-20%)
 98 | 2. Columns with medium NaN values: (20-50%)
 99 | 3. Columns with High NaN values: (50-100%)
100 | ```
101 | Low NaN features columns:
102 |  sodium_100g             18.631
103 | salt_100g                18.619
104 | proteins_100g            17.377
105 | energy_100g              17.038
106 | brands_tags               8.165
107 | brands                    8.159
108 | product_name              4.919
109 | countries                 0.077
110 | countries_en              0.077
111 | countries_tags            0.077
112 | states_en                 0.015
113 | states_tags               0.015
114 | states                    0.015
115 | url                       0.007
116 | code                      0.007
117 | created_datetime          0.003
118 | created_t                 0.001
119 | creator                   0.001
120 | last_modified_datetime    0.000
121 | last_modified_t           0.000
122 | dtype: float64
123 | ```
124 | Medium NaN features columns:
125 | ```
126 | Medium NaN features: 
127 | serving_size                              39.156
128 | fiber_100g                                38.015
129 | nutrition_grade_fr                        28.417
130 | nutrition-score-fr_100g                   28.417
131 | nutrition-score-uk_100g                   28.417
132 | saturated-fat_100g                        25.898
133 | sugars_100g                               21.583
134 | carbohydrates_100g                        21.573
135 | fat_100g                                  21.496
136 | additives                                 20.280
137 | additives_n                               20.268
138 | ingredients_from_palm_oil_n               20.268
139 | ingredients_that_may_be_from_palm_oil_n   20.268
140 | ingredients_text                          20.261
141 | dtype: float64
142 | ```
143 | High NaN features columns has the 129 length.
144 | 
145 | ### **Columns with fewest NaN values plot**
146 | ```
147 | plt.figure(figsize=(20,5))
148 | lows = sns.barplot(x=low_nan_features.index.values, y=low_nan_features.values, palette="rocket")
149 | lows.set_xticklabels(low_nan_features.index.values,rotation=45)
150 | plt.title("Features with fewest nan-values")
151 | plt.ylabel("% of nans ")
152 | ```
153 | 
154 | ![](Pictures/food_eda/low_nan.png)
155 | 
156 | - Plot shows that there are many features that occurs multiple times like
157 |   - countries
158 |   - coutries_tags
159 |   - countries_en
160 |   - additives
161 |   - additives_n
162 | ### **Columns which have NaN values percentage between 20-50%**
163 | 
164 | ```
165 | plt.figure(figsize=(20,5))
166 | lows = sns.barplot(x=med_nan_features.index.values, y=med_nan_features.values, palette="Spectral")
167 | lows.set_xticklabels(med_nan_features.index.values,rotation=45)
168 | plt.title("Features with medium percentage of nan-values")
169 | plt.ylabel("% of nans ")
170 | ```
171 | ![](Pictures/food_eda/med_nan.png)
172 | 
173 | ### **Columns which have NaN values percentage >50%**
174 | ```
175 | plt.figure(figsize=(15,30))
176 | high = sns.barplot(y=high_nan_features.index.values, x=high_nan_features.values, palette="Blues")
177 | plt.title("Features with most nan-values")
178 | plt.ylabel("% of nans ")
179 | ```
180 | ![](Pictures/food_eda/high_nan.png)
181 | 
182 | ### Dropping the columns with high NaN values
183 | - Dropping the columns that have high NaN values:
184 | ```
185 | for i in high_nan_features.index:
186 |     if i in food.columns:
187 |         food.drop(i, axis=1, inplace=True)
188 | print('Shape of the dataset after dropping the high NaN features:', food.shape)
189 | ```
190 | (356027, 34)
191 | - Now we have 34 features.
192 | 
193 | ### **Drop the NaN values from the data**
194 | - Drop the nan values from the data and then print the shape of data
195 | ```
196 | food.dropna(inplace=True)
197 | print('Shape of the food dataset after dropping NaN values is:', food.shape)
198 | ```
199 | Shape of the food dataset after dropping NaN values is: (157157, 34)
200 | ## Data Structure of cleaned data
201 | - New cleaned data has the total entries of 157157.
202 | - Total number of features are: 34
203 | - 14 features has the data type of float64.
204 | - 20 features has the data type of object.
205 | - Total memory usage of the cleaned data is: 42.0 MB
206 | 
207 | ## Step 4: Type casting/Conversion the data type of data
208 | - To convert the data type of specific column
209 | ```
210 | food['serving_size'] = food['serving_size'].astype(str)
211 | food['product_name'] = food['product_name'].astype(str)
212 | ```
213 | - Now we have the data type of serving_size as string and product_name as string also.
214 | 
215 | ## Step5: Summary Statistics of the data
216 | - Summary Statistics of the data:
217 | ```
218 | food.describe()
219 | ```
220 | ![](Pictures/food_eda/summary_statistics.png)
221 | ## Step 6: Value Counts
222 | - Value Counts of the data:
223 | ```
224 | food.product_name.value_counts()
225 | ```
226 | - Ice Cream is the most popular product name. It occurs $405$ forllowed by Potato Chips occurs $276$ times.
227 | - Most products occurs are:
228 | 
229 | ![](Pictures/food_eda/most_occ_prod.png)
230 | - Least occurs products are:
231 |   
232 | ![](Pictures/food_eda/least_occ_prod.png)
233 | 
234 | ## Step-7: Deal with Duplicates
235 | - Dropping the duplicates
236 | ```
237 | food.drop_duplicates(inplace=True)
238 | print('Shape of Dataset after dropping the duplicates', food.shape)
239 | ```
240 | Shape of Dataset after dropping the duplicates (157157, 34)
241 | 
242 | ## Step-8: Check the Normality of Data
243 | - To check the distribution of data whether it is normal or not.
244 | - Distribution of data of `nutrition-score-fr_100g` column
245 | ![](Pictures/food_eda/dist_plot.png)
246 | 
247 | ## Step-9: Correlation of Dataset
248 | - Correlation plot tells the relation between the column. Withe the increase or decrease in one quantity how much it affect the 2nd quantity.
249 | ![](Pictures/food_eda/corr.png)
250 | 
251 | 
252 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/tips.csv:
--------------------------------------------------------------------------------
  1 | ,total_bill,tip,sex,smoker,day,time,size
  2 | 0,16.99,1.01,Female,No,Sun,Dinner,2
  3 | 1,10.34,1.66,Male,No,Sun,Dinner,3
  4 | 2,21.01,3.5,Male,No,Sun,Dinner,3
  5 | 3,23.68,3.31,Male,No,Sun,Dinner,2
  6 | 4,24.59,3.61,Female,No,Sun,Dinner,4
  7 | 5,25.29,4.71,Male,No,Sun,Dinner,4
  8 | 6,8.77,2.0,Male,No,Sun,Dinner,2
  9 | 7,26.88,3.12,Male,No,Sun,Dinner,4
 10 | 8,15.04,1.96,Male,No,Sun,Dinner,2
 11 | 9,14.78,3.23,Male,No,Sun,Dinner,2
 12 | 10,10.27,1.71,Male,No,Sun,Dinner,2
 13 | 11,35.26,5.0,Female,No,Sun,Dinner,4
 14 | 12,15.42,1.57,Male,No,Sun,Dinner,2
 15 | 13,18.43,3.0,Male,No,Sun,Dinner,4
 16 | 14,14.83,3.02,Female,No,Sun,Dinner,2
 17 | 15,21.58,3.92,Male,No,Sun,Dinner,2
 18 | 16,10.33,1.67,Female,No,Sun,Dinner,3
 19 | 17,16.29,3.71,Male,No,Sun,Dinner,3
 20 | 18,16.97,3.5,Female,No,Sun,Dinner,3
 21 | 19,20.65,3.35,Male,No,Sat,Dinner,3
 22 | 20,17.92,4.08,Male,No,Sat,Dinner,2
 23 | 21,20.29,2.75,Female,No,Sat,Dinner,2
 24 | 22,15.77,2.23,Female,No,Sat,Dinner,2
 25 | 23,39.42,7.58,Male,No,Sat,Dinner,4
 26 | 24,19.82,3.18,Male,No,Sat,Dinner,2
 27 | 25,17.81,2.34,Male,No,Sat,Dinner,4
 28 | 26,13.37,2.0,Male,No,Sat,Dinner,2
 29 | 27,12.69,2.0,Male,No,Sat,Dinner,2
 30 | 28,21.7,4.3,Male,No,Sat,Dinner,2
 31 | 29,19.65,3.0,Female,No,Sat,Dinner,2
 32 | 30,9.55,1.45,Male,No,Sat,Dinner,2
 33 | 31,18.35,2.5,Male,No,Sat,Dinner,4
 34 | 32,15.06,3.0,Female,No,Sat,Dinner,2
 35 | 33,20.69,2.45,Female,No,Sat,Dinner,4
 36 | 34,17.78,3.27,Male,No,Sat,Dinner,2
 37 | 35,24.06,3.6,Male,No,Sat,Dinner,3
 38 | 36,16.31,2.0,Male,No,Sat,Dinner,3
 39 | 37,16.93,3.07,Female,No,Sat,Dinner,3
 40 | 38,18.69,2.31,Male,No,Sat,Dinner,3
 41 | 39,31.27,5.0,Male,No,Sat,Dinner,3
 42 | 40,16.04,2.24,Male,No,Sat,Dinner,3
 43 | 41,17.46,2.54,Male,No,Sun,Dinner,2
 44 | 42,13.94,3.06,Male,No,Sun,Dinner,2
 45 | 43,9.68,1.32,Male,No,Sun,Dinner,2
 46 | 44,30.4,5.6,Male,No,Sun,Dinner,4
 47 | 45,18.29,3.0,Male,No,Sun,Dinner,2
 48 | 46,22.23,5.0,Male,No,Sun,Dinner,2
 49 | 47,32.4,6.0,Male,No,Sun,Dinner,4
 50 | 48,28.55,2.05,Male,No,Sun,Dinner,3
 51 | 49,18.04,3.0,Male,No,Sun,Dinner,2
 52 | 50,12.54,2.5,Male,No,Sun,Dinner,2
 53 | 51,10.29,2.6,Female,No,Sun,Dinner,2
 54 | 52,34.81,5.2,Female,No,Sun,Dinner,4
 55 | 53,9.94,1.56,Male,No,Sun,Dinner,2
 56 | 54,25.56,4.34,Male,No,Sun,Dinner,4
 57 | 55,19.49,3.51,Male,No,Sun,Dinner,2
 58 | 56,38.01,3.0,Male,Yes,Sat,Dinner,4
 59 | 57,26.41,1.5,Female,No,Sat,Dinner,2
 60 | 58,11.24,1.76,Male,Yes,Sat,Dinner,2
 61 | 59,48.27,6.73,Male,No,Sat,Dinner,4
 62 | 60,20.29,3.21,Male,Yes,Sat,Dinner,2
 63 | 61,13.81,2.0,Male,Yes,Sat,Dinner,2
 64 | 62,11.02,1.98,Male,Yes,Sat,Dinner,2
 65 | 63,18.29,3.76,Male,Yes,Sat,Dinner,4
 66 | 64,17.59,2.64,Male,No,Sat,Dinner,3
 67 | 65,20.08,3.15,Male,No,Sat,Dinner,3
 68 | 66,16.45,2.47,Female,No,Sat,Dinner,2
 69 | 67,3.07,1.0,Female,Yes,Sat,Dinner,1
 70 | 68,20.23,2.01,Male,No,Sat,Dinner,2
 71 | 69,15.01,2.09,Male,Yes,Sat,Dinner,2
 72 | 70,12.02,1.97,Male,No,Sat,Dinner,2
 73 | 71,17.07,3.0,Female,No,Sat,Dinner,3
 74 | 72,26.86,3.14,Female,Yes,Sat,Dinner,2
 75 | 73,25.28,5.0,Female,Yes,Sat,Dinner,2
 76 | 74,14.73,2.2,Female,No,Sat,Dinner,2
 77 | 75,10.51,1.25,Male,No,Sat,Dinner,2
 78 | 76,17.92,3.08,Male,Yes,Sat,Dinner,2
 79 | 77,27.2,4.0,Male,No,Thur,Lunch,4
 80 | 78,22.76,3.0,Male,No,Thur,Lunch,2
 81 | 79,17.29,2.71,Male,No,Thur,Lunch,2
 82 | 80,19.44,3.0,Male,Yes,Thur,Lunch,2
 83 | 81,16.66,3.4,Male,No,Thur,Lunch,2
 84 | 82,10.07,1.83,Female,No,Thur,Lunch,1
 85 | 83,32.68,5.0,Male,Yes,Thur,Lunch,2
 86 | 84,15.98,2.03,Male,No,Thur,Lunch,2
 87 | 85,34.83,5.17,Female,No,Thur,Lunch,4
 88 | 86,13.03,2.0,Male,No,Thur,Lunch,2
 89 | 87,18.28,4.0,Male,No,Thur,Lunch,2
 90 | 88,24.71,5.85,Male,No,Thur,Lunch,2
 91 | 89,21.16,3.0,Male,No,Thur,Lunch,2
 92 | 90,28.97,3.0,Male,Yes,Fri,Dinner,2
 93 | 91,22.49,3.5,Male,No,Fri,Dinner,2
 94 | 92,5.75,1.0,Female,Yes,Fri,Dinner,2
 95 | 93,16.32,4.3,Female,Yes,Fri,Dinner,2
 96 | 94,22.75,3.25,Female,No,Fri,Dinner,2
 97 | 95,40.17,4.73,Male,Yes,Fri,Dinner,4
 98 | 96,27.28,4.0,Male,Yes,Fri,Dinner,2
 99 | 97,12.03,1.5,Male,Yes,Fri,Dinner,2
100 | 98,21.01,3.0,Male,Yes,Fri,Dinner,2
101 | 99,12.46,1.5,Male,No,Fri,Dinner,2
102 | 100,11.35,2.5,Female,Yes,Fri,Dinner,2
103 | 101,15.38,3.0,Female,Yes,Fri,Dinner,2
104 | 102,44.3,2.5,Female,Yes,Sat,Dinner,3
105 | 103,22.42,3.48,Female,Yes,Sat,Dinner,2
106 | 104,20.92,4.08,Female,No,Sat,Dinner,2
107 | 105,15.36,1.64,Male,Yes,Sat,Dinner,2
108 | 106,20.49,4.06,Male,Yes,Sat,Dinner,2
109 | 107,25.21,4.29,Male,Yes,Sat,Dinner,2
110 | 108,18.24,3.76,Male,No,Sat,Dinner,2
111 | 109,14.31,4.0,Female,Yes,Sat,Dinner,2
112 | 110,14.0,3.0,Male,No,Sat,Dinner,2
113 | 111,7.25,1.0,Female,No,Sat,Dinner,1
114 | 112,38.07,4.0,Male,No,Sun,Dinner,3
115 | 113,23.95,2.55,Male,No,Sun,Dinner,2
116 | 114,25.71,4.0,Female,No,Sun,Dinner,3
117 | 115,17.31,3.5,Female,No,Sun,Dinner,2
118 | 116,29.93,5.07,Male,No,Sun,Dinner,4
119 | 117,10.65,1.5,Female,No,Thur,Lunch,2
120 | 118,12.43,1.8,Female,No,Thur,Lunch,2
121 | 119,24.08,2.92,Female,No,Thur,Lunch,4
122 | 120,11.69,2.31,Male,No,Thur,Lunch,2
123 | 121,13.42,1.68,Female,No,Thur,Lunch,2
124 | 122,14.26,2.5,Male,No,Thur,Lunch,2
125 | 123,15.95,2.0,Male,No,Thur,Lunch,2
126 | 124,12.48,2.52,Female,No,Thur,Lunch,2
127 | 125,29.8,4.2,Female,No,Thur,Lunch,6
128 | 126,8.52,1.48,Male,No,Thur,Lunch,2
129 | 127,14.52,2.0,Female,No,Thur,Lunch,2
130 | 128,11.38,2.0,Female,No,Thur,Lunch,2
131 | 129,22.82,2.18,Male,No,Thur,Lunch,3
132 | 130,19.08,1.5,Male,No,Thur,Lunch,2
133 | 131,20.27,2.83,Female,No,Thur,Lunch,2
134 | 132,11.17,1.5,Female,No,Thur,Lunch,2
135 | 133,12.26,2.0,Female,No,Thur,Lunch,2
136 | 134,18.26,3.25,Female,No,Thur,Lunch,2
137 | 135,8.51,1.25,Female,No,Thur,Lunch,2
138 | 136,10.33,2.0,Female,No,Thur,Lunch,2
139 | 137,14.15,2.0,Female,No,Thur,Lunch,2
140 | 138,16.0,2.0,Male,Yes,Thur,Lunch,2
141 | 139,13.16,2.75,Female,No,Thur,Lunch,2
142 | 140,17.47,3.5,Female,No,Thur,Lunch,2
143 | 141,34.3,6.7,Male,No,Thur,Lunch,6
144 | 142,41.19,5.0,Male,No,Thur,Lunch,5
145 | 143,27.05,5.0,Female,No,Thur,Lunch,6
146 | 144,16.43,2.3,Female,No,Thur,Lunch,2
147 | 145,8.35,1.5,Female,No,Thur,Lunch,2
148 | 146,18.64,1.36,Female,No,Thur,Lunch,3
149 | 147,11.87,1.63,Female,No,Thur,Lunch,2
150 | 148,9.78,1.73,Male,No,Thur,Lunch,2
151 | 149,7.51,2.0,Male,No,Thur,Lunch,2
152 | 150,14.07,2.5,Male,No,Sun,Dinner,2
153 | 151,13.13,2.0,Male,No,Sun,Dinner,2
154 | 152,17.26,2.74,Male,No,Sun,Dinner,3
155 | 153,24.55,2.0,Male,No,Sun,Dinner,4
156 | 154,19.77,2.0,Male,No,Sun,Dinner,4
157 | 155,29.85,5.14,Female,No,Sun,Dinner,5
158 | 156,48.17,5.0,Male,No,Sun,Dinner,6
159 | 157,25.0,3.75,Female,No,Sun,Dinner,4
160 | 158,13.39,2.61,Female,No,Sun,Dinner,2
161 | 159,16.49,2.0,Male,No,Sun,Dinner,4
162 | 160,21.5,3.5,Male,No,Sun,Dinner,4
163 | 161,12.66,2.5,Male,No,Sun,Dinner,2
164 | 162,16.21,2.0,Female,No,Sun,Dinner,3
165 | 163,13.81,2.0,Male,No,Sun,Dinner,2
166 | 164,17.51,3.0,Female,Yes,Sun,Dinner,2
167 | 165,24.52,3.48,Male,No,Sun,Dinner,3
168 | 166,20.76,2.24,Male,No,Sun,Dinner,2
169 | 167,31.71,4.5,Male,No,Sun,Dinner,4
170 | 168,10.59,1.61,Female,Yes,Sat,Dinner,2
171 | 169,10.63,2.0,Female,Yes,Sat,Dinner,2
172 | 170,50.81,10.0,Male,Yes,Sat,Dinner,3
173 | 171,15.81,3.16,Male,Yes,Sat,Dinner,2
174 | 172,7.25,5.15,Male,Yes,Sun,Dinner,2
175 | 173,31.85,3.18,Male,Yes,Sun,Dinner,2
176 | 174,16.82,4.0,Male,Yes,Sun,Dinner,2
177 | 175,32.9,3.11,Male,Yes,Sun,Dinner,2
178 | 176,17.89,2.0,Male,Yes,Sun,Dinner,2
179 | 177,14.48,2.0,Male,Yes,Sun,Dinner,2
180 | 178,9.6,4.0,Female,Yes,Sun,Dinner,2
181 | 179,34.63,3.55,Male,Yes,Sun,Dinner,2
182 | 180,34.65,3.68,Male,Yes,Sun,Dinner,4
183 | 181,23.33,5.65,Male,Yes,Sun,Dinner,2
184 | 182,45.35,3.5,Male,Yes,Sun,Dinner,3
185 | 183,23.17,6.5,Male,Yes,Sun,Dinner,4
186 | 184,40.55,3.0,Male,Yes,Sun,Dinner,2
187 | 185,20.69,5.0,Male,No,Sun,Dinner,5
188 | 186,20.9,3.5,Female,Yes,Sun,Dinner,3
189 | 187,30.46,2.0,Male,Yes,Sun,Dinner,5
190 | 188,18.15,3.5,Female,Yes,Sun,Dinner,3
191 | 189,23.1,4.0,Male,Yes,Sun,Dinner,3
192 | 190,15.69,1.5,Male,Yes,Sun,Dinner,2
193 | 191,19.81,4.19,Female,Yes,Thur,Lunch,2
194 | 192,28.44,2.56,Male,Yes,Thur,Lunch,2
195 | 193,15.48,2.02,Male,Yes,Thur,Lunch,2
196 | 194,16.58,4.0,Male,Yes,Thur,Lunch,2
197 | 195,7.56,1.44,Male,No,Thur,Lunch,2
198 | 196,10.34,2.0,Male,Yes,Thur,Lunch,2
199 | 197,43.11,5.0,Female,Yes,Thur,Lunch,4
200 | 198,13.0,2.0,Female,Yes,Thur,Lunch,2
201 | 199,13.51,2.0,Male,Yes,Thur,Lunch,2
202 | 200,18.71,4.0,Male,Yes,Thur,Lunch,3
203 | 201,12.74,2.01,Female,Yes,Thur,Lunch,2
204 | 202,13.0,2.0,Female,Yes,Thur,Lunch,2
205 | 203,16.4,2.5,Female,Yes,Thur,Lunch,2
206 | 204,20.53,4.0,Male,Yes,Thur,Lunch,4
207 | 205,16.47,3.23,Female,Yes,Thur,Lunch,3
208 | 206,26.59,3.41,Male,Yes,Sat,Dinner,3
209 | 207,38.73,3.0,Male,Yes,Sat,Dinner,4
210 | 208,24.27,2.03,Male,Yes,Sat,Dinner,2
211 | 209,12.76,2.23,Female,Yes,Sat,Dinner,2
212 | 210,30.06,2.0,Male,Yes,Sat,Dinner,3
213 | 211,25.89,5.16,Male,Yes,Sat,Dinner,4
214 | 212,48.33,9.0,Male,No,Sat,Dinner,4
215 | 213,13.27,2.5,Female,Yes,Sat,Dinner,2
216 | 214,28.17,6.5,Female,Yes,Sat,Dinner,3
217 | 215,12.9,1.1,Female,Yes,Sat,Dinner,2
218 | 216,28.15,3.0,Male,Yes,Sat,Dinner,5
219 | 217,11.59,1.5,Male,Yes,Sat,Dinner,2
220 | 218,7.74,1.44,Male,Yes,Sat,Dinner,2
221 | 219,30.14,3.09,Female,Yes,Sat,Dinner,4
222 | 220,12.16,2.2,Male,Yes,Fri,Lunch,2
223 | 221,13.42,3.48,Female,Yes,Fri,Lunch,2
224 | 222,8.58,1.92,Male,Yes,Fri,Lunch,1
225 | 223,15.98,3.0,Female,No,Fri,Lunch,3
226 | 224,13.42,1.58,Male,Yes,Fri,Lunch,2
227 | 225,16.27,2.5,Female,Yes,Fri,Lunch,2
228 | 226,10.09,2.0,Female,Yes,Fri,Lunch,2
229 | 227,20.45,3.0,Male,No,Sat,Dinner,4
230 | 228,13.28,2.72,Male,No,Sat,Dinner,2
231 | 229,22.12,2.88,Female,Yes,Sat,Dinner,2
232 | 230,24.01,2.0,Male,Yes,Sat,Dinner,4
233 | 231,15.69,3.0,Male,Yes,Sat,Dinner,3
234 | 232,11.61,3.39,Male,No,Sat,Dinner,2
235 | 233,10.77,1.47,Male,No,Sat,Dinner,2
236 | 234,15.53,3.0,Male,Yes,Sat,Dinner,2
237 | 235,10.07,1.25,Male,No,Sat,Dinner,2
238 | 236,12.6,1.0,Male,Yes,Sat,Dinner,2
239 | 237,32.83,1.17,Male,Yes,Sat,Dinner,2
240 | 238,35.83,4.67,Female,No,Sat,Dinner,3
241 | 239,29.03,5.92,Male,No,Sat,Dinner,3
242 | 240,27.18,2.0,Female,Yes,Sat,Dinner,2
243 | 241,22.67,2.0,Male,Yes,Sat,Dinner,2
244 | 242,17.82,1.75,Male,No,Sat,Dinner,2
245 | 243,18.78,3.0,Female,No,Thur,Dinner,2
246 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/pandas_tips_and_tricks/tips_save.csv:
--------------------------------------------------------------------------------
  1 | ,total_bill,tip,sex,smoker,day,time,size
  2 | 0,16.99,1.01,Female,No,Sun,Dinner,2
  3 | 1,10.34,1.66,Male,No,Sun,Dinner,3
  4 | 2,21.01,3.5,Male,No,Sun,Dinner,3
  5 | 3,23.68,3.31,Male,No,Sun,Dinner,2
  6 | 4,24.59,3.61,Female,No,Sun,Dinner,4
  7 | 5,25.29,4.71,Male,No,Sun,Dinner,4
  8 | 6,8.77,2.0,Male,No,Sun,Dinner,2
  9 | 7,26.88,3.12,Male,No,Sun,Dinner,4
 10 | 8,15.04,1.96,Male,No,Sun,Dinner,2
 11 | 9,14.78,3.23,Male,No,Sun,Dinner,2
 12 | 10,10.27,1.71,Male,No,Sun,Dinner,2
 13 | 11,35.26,5.0,Female,No,Sun,Dinner,4
 14 | 12,15.42,1.57,Male,No,Sun,Dinner,2
 15 | 13,18.43,3.0,Male,No,Sun,Dinner,4
 16 | 14,14.83,3.02,Female,No,Sun,Dinner,2
 17 | 15,21.58,3.92,Male,No,Sun,Dinner,2
 18 | 16,10.33,1.67,Female,No,Sun,Dinner,3
 19 | 17,16.29,3.71,Male,No,Sun,Dinner,3
 20 | 18,16.97,3.5,Female,No,Sun,Dinner,3
 21 | 19,20.65,3.35,Male,No,Sat,Dinner,3
 22 | 20,17.92,4.08,Male,No,Sat,Dinner,2
 23 | 21,20.29,2.75,Female,No,Sat,Dinner,2
 24 | 22,15.77,2.23,Female,No,Sat,Dinner,2
 25 | 23,39.42,7.58,Male,No,Sat,Dinner,4
 26 | 24,19.82,3.18,Male,No,Sat,Dinner,2
 27 | 25,17.81,2.34,Male,No,Sat,Dinner,4
 28 | 26,13.37,2.0,Male,No,Sat,Dinner,2
 29 | 27,12.69,2.0,Male,No,Sat,Dinner,2
 30 | 28,21.7,4.3,Male,No,Sat,Dinner,2
 31 | 29,19.65,3.0,Female,No,Sat,Dinner,2
 32 | 30,9.55,1.45,Male,No,Sat,Dinner,2
 33 | 31,18.35,2.5,Male,No,Sat,Dinner,4
 34 | 32,15.06,3.0,Female,No,Sat,Dinner,2
 35 | 33,20.69,2.45,Female,No,Sat,Dinner,4
 36 | 34,17.78,3.27,Male,No,Sat,Dinner,2
 37 | 35,24.06,3.6,Male,No,Sat,Dinner,3
 38 | 36,16.31,2.0,Male,No,Sat,Dinner,3
 39 | 37,16.93,3.07,Female,No,Sat,Dinner,3
 40 | 38,18.69,2.31,Male,No,Sat,Dinner,3
 41 | 39,31.27,5.0,Male,No,Sat,Dinner,3
 42 | 40,16.04,2.24,Male,No,Sat,Dinner,3
 43 | 41,17.46,2.54,Male,No,Sun,Dinner,2
 44 | 42,13.94,3.06,Male,No,Sun,Dinner,2
 45 | 43,9.68,1.32,Male,No,Sun,Dinner,2
 46 | 44,30.4,5.6,Male,No,Sun,Dinner,4
 47 | 45,18.29,3.0,Male,No,Sun,Dinner,2
 48 | 46,22.23,5.0,Male,No,Sun,Dinner,2
 49 | 47,32.4,6.0,Male,No,Sun,Dinner,4
 50 | 48,28.55,2.05,Male,No,Sun,Dinner,3
 51 | 49,18.04,3.0,Male,No,Sun,Dinner,2
 52 | 50,12.54,2.5,Male,No,Sun,Dinner,2
 53 | 51,10.29,2.6,Female,No,Sun,Dinner,2
 54 | 52,34.81,5.2,Female,No,Sun,Dinner,4
 55 | 53,9.94,1.56,Male,No,Sun,Dinner,2
 56 | 54,25.56,4.34,Male,No,Sun,Dinner,4
 57 | 55,19.49,3.51,Male,No,Sun,Dinner,2
 58 | 56,38.01,3.0,Male,Yes,Sat,Dinner,4
 59 | 57,26.41,1.5,Female,No,Sat,Dinner,2
 60 | 58,11.24,1.76,Male,Yes,Sat,Dinner,2
 61 | 59,48.27,6.73,Male,No,Sat,Dinner,4
 62 | 60,20.29,3.21,Male,Yes,Sat,Dinner,2
 63 | 61,13.81,2.0,Male,Yes,Sat,Dinner,2
 64 | 62,11.02,1.98,Male,Yes,Sat,Dinner,2
 65 | 63,18.29,3.76,Male,Yes,Sat,Dinner,4
 66 | 64,17.59,2.64,Male,No,Sat,Dinner,3
 67 | 65,20.08,3.15,Male,No,Sat,Dinner,3
 68 | 66,16.45,2.47,Female,No,Sat,Dinner,2
 69 | 67,3.07,1.0,Female,Yes,Sat,Dinner,1
 70 | 68,20.23,2.01,Male,No,Sat,Dinner,2
 71 | 69,15.01,2.09,Male,Yes,Sat,Dinner,2
 72 | 70,12.02,1.97,Male,No,Sat,Dinner,2
 73 | 71,17.07,3.0,Female,No,Sat,Dinner,3
 74 | 72,26.86,3.14,Female,Yes,Sat,Dinner,2
 75 | 73,25.28,5.0,Female,Yes,Sat,Dinner,2
 76 | 74,14.73,2.2,Female,No,Sat,Dinner,2
 77 | 75,10.51,1.25,Male,No,Sat,Dinner,2
 78 | 76,17.92,3.08,Male,Yes,Sat,Dinner,2
 79 | 77,27.2,4.0,Male,No,Thur,Lunch,4
 80 | 78,22.76,3.0,Male,No,Thur,Lunch,2
 81 | 79,17.29,2.71,Male,No,Thur,Lunch,2
 82 | 80,19.44,3.0,Male,Yes,Thur,Lunch,2
 83 | 81,16.66,3.4,Male,No,Thur,Lunch,2
 84 | 82,10.07,1.83,Female,No,Thur,Lunch,1
 85 | 83,32.68,5.0,Male,Yes,Thur,Lunch,2
 86 | 84,15.98,2.03,Male,No,Thur,Lunch,2
 87 | 85,34.83,5.17,Female,No,Thur,Lunch,4
 88 | 86,13.03,2.0,Male,No,Thur,Lunch,2
 89 | 87,18.28,4.0,Male,No,Thur,Lunch,2
 90 | 88,24.71,5.85,Male,No,Thur,Lunch,2
 91 | 89,21.16,3.0,Male,No,Thur,Lunch,2
 92 | 90,28.97,3.0,Male,Yes,Fri,Dinner,2
 93 | 91,22.49,3.5,Male,No,Fri,Dinner,2
 94 | 92,5.75,1.0,Female,Yes,Fri,Dinner,2
 95 | 93,16.32,4.3,Female,Yes,Fri,Dinner,2
 96 | 94,22.75,3.25,Female,No,Fri,Dinner,2
 97 | 95,40.17,4.73,Male,Yes,Fri,Dinner,4
 98 | 96,27.28,4.0,Male,Yes,Fri,Dinner,2
 99 | 97,12.03,1.5,Male,Yes,Fri,Dinner,2
100 | 98,21.01,3.0,Male,Yes,Fri,Dinner,2
101 | 99,12.46,1.5,Male,No,Fri,Dinner,2
102 | 100,11.35,2.5,Female,Yes,Fri,Dinner,2
103 | 101,15.38,3.0,Female,Yes,Fri,Dinner,2
104 | 102,44.3,2.5,Female,Yes,Sat,Dinner,3
105 | 103,22.42,3.48,Female,Yes,Sat,Dinner,2
106 | 104,20.92,4.08,Female,No,Sat,Dinner,2
107 | 105,15.36,1.64,Male,Yes,Sat,Dinner,2
108 | 106,20.49,4.06,Male,Yes,Sat,Dinner,2
109 | 107,25.21,4.29,Male,Yes,Sat,Dinner,2
110 | 108,18.24,3.76,Male,No,Sat,Dinner,2
111 | 109,14.31,4.0,Female,Yes,Sat,Dinner,2
112 | 110,14.0,3.0,Male,No,Sat,Dinner,2
113 | 111,7.25,1.0,Female,No,Sat,Dinner,1
114 | 112,38.07,4.0,Male,No,Sun,Dinner,3
115 | 113,23.95,2.55,Male,No,Sun,Dinner,2
116 | 114,25.71,4.0,Female,No,Sun,Dinner,3
117 | 115,17.31,3.5,Female,No,Sun,Dinner,2
118 | 116,29.93,5.07,Male,No,Sun,Dinner,4
119 | 117,10.65,1.5,Female,No,Thur,Lunch,2
120 | 118,12.43,1.8,Female,No,Thur,Lunch,2
121 | 119,24.08,2.92,Female,No,Thur,Lunch,4
122 | 120,11.69,2.31,Male,No,Thur,Lunch,2
123 | 121,13.42,1.68,Female,No,Thur,Lunch,2
124 | 122,14.26,2.5,Male,No,Thur,Lunch,2
125 | 123,15.95,2.0,Male,No,Thur,Lunch,2
126 | 124,12.48,2.52,Female,No,Thur,Lunch,2
127 | 125,29.8,4.2,Female,No,Thur,Lunch,6
128 | 126,8.52,1.48,Male,No,Thur,Lunch,2
129 | 127,14.52,2.0,Female,No,Thur,Lunch,2
130 | 128,11.38,2.0,Female,No,Thur,Lunch,2
131 | 129,22.82,2.18,Male,No,Thur,Lunch,3
132 | 130,19.08,1.5,Male,No,Thur,Lunch,2
133 | 131,20.27,2.83,Female,No,Thur,Lunch,2
134 | 132,11.17,1.5,Female,No,Thur,Lunch,2
135 | 133,12.26,2.0,Female,No,Thur,Lunch,2
136 | 134,18.26,3.25,Female,No,Thur,Lunch,2
137 | 135,8.51,1.25,Female,No,Thur,Lunch,2
138 | 136,10.33,2.0,Female,No,Thur,Lunch,2
139 | 137,14.15,2.0,Female,No,Thur,Lunch,2
140 | 138,16.0,2.0,Male,Yes,Thur,Lunch,2
141 | 139,13.16,2.75,Female,No,Thur,Lunch,2
142 | 140,17.47,3.5,Female,No,Thur,Lunch,2
143 | 141,34.3,6.7,Male,No,Thur,Lunch,6
144 | 142,41.19,5.0,Male,No,Thur,Lunch,5
145 | 143,27.05,5.0,Female,No,Thur,Lunch,6
146 | 144,16.43,2.3,Female,No,Thur,Lunch,2
147 | 145,8.35,1.5,Female,No,Thur,Lunch,2
148 | 146,18.64,1.36,Female,No,Thur,Lunch,3
149 | 147,11.87,1.63,Female,No,Thur,Lunch,2
150 | 148,9.78,1.73,Male,No,Thur,Lunch,2
151 | 149,7.51,2.0,Male,No,Thur,Lunch,2
152 | 150,14.07,2.5,Male,No,Sun,Dinner,2
153 | 151,13.13,2.0,Male,No,Sun,Dinner,2
154 | 152,17.26,2.74,Male,No,Sun,Dinner,3
155 | 153,24.55,2.0,Male,No,Sun,Dinner,4
156 | 154,19.77,2.0,Male,No,Sun,Dinner,4
157 | 155,29.85,5.14,Female,No,Sun,Dinner,5
158 | 156,48.17,5.0,Male,No,Sun,Dinner,6
159 | 157,25.0,3.75,Female,No,Sun,Dinner,4
160 | 158,13.39,2.61,Female,No,Sun,Dinner,2
161 | 159,16.49,2.0,Male,No,Sun,Dinner,4
162 | 160,21.5,3.5,Male,No,Sun,Dinner,4
163 | 161,12.66,2.5,Male,No,Sun,Dinner,2
164 | 162,16.21,2.0,Female,No,Sun,Dinner,3
165 | 163,13.81,2.0,Male,No,Sun,Dinner,2
166 | 164,17.51,3.0,Female,Yes,Sun,Dinner,2
167 | 165,24.52,3.48,Male,No,Sun,Dinner,3
168 | 166,20.76,2.24,Male,No,Sun,Dinner,2
169 | 167,31.71,4.5,Male,No,Sun,Dinner,4
170 | 168,10.59,1.61,Female,Yes,Sat,Dinner,2
171 | 169,10.63,2.0,Female,Yes,Sat,Dinner,2
172 | 170,50.81,10.0,Male,Yes,Sat,Dinner,3
173 | 171,15.81,3.16,Male,Yes,Sat,Dinner,2
174 | 172,7.25,5.15,Male,Yes,Sun,Dinner,2
175 | 173,31.85,3.18,Male,Yes,Sun,Dinner,2
176 | 174,16.82,4.0,Male,Yes,Sun,Dinner,2
177 | 175,32.9,3.11,Male,Yes,Sun,Dinner,2
178 | 176,17.89,2.0,Male,Yes,Sun,Dinner,2
179 | 177,14.48,2.0,Male,Yes,Sun,Dinner,2
180 | 178,9.6,4.0,Female,Yes,Sun,Dinner,2
181 | 179,34.63,3.55,Male,Yes,Sun,Dinner,2
182 | 180,34.65,3.68,Male,Yes,Sun,Dinner,4
183 | 181,23.33,5.65,Male,Yes,Sun,Dinner,2
184 | 182,45.35,3.5,Male,Yes,Sun,Dinner,3
185 | 183,23.17,6.5,Male,Yes,Sun,Dinner,4
186 | 184,40.55,3.0,Male,Yes,Sun,Dinner,2
187 | 185,20.69,5.0,Male,No,Sun,Dinner,5
188 | 186,20.9,3.5,Female,Yes,Sun,Dinner,3
189 | 187,30.46,2.0,Male,Yes,Sun,Dinner,5
190 | 188,18.15,3.5,Female,Yes,Sun,Dinner,3
191 | 189,23.1,4.0,Male,Yes,Sun,Dinner,3
192 | 190,15.69,1.5,Male,Yes,Sun,Dinner,2
193 | 191,19.81,4.19,Female,Yes,Thur,Lunch,2
194 | 192,28.44,2.56,Male,Yes,Thur,Lunch,2
195 | 193,15.48,2.02,Male,Yes,Thur,Lunch,2
196 | 194,16.58,4.0,Male,Yes,Thur,Lunch,2
197 | 195,7.56,1.44,Male,No,Thur,Lunch,2
198 | 196,10.34,2.0,Male,Yes,Thur,Lunch,2
199 | 197,43.11,5.0,Female,Yes,Thur,Lunch,4
200 | 198,13.0,2.0,Female,Yes,Thur,Lunch,2
201 | 199,13.51,2.0,Male,Yes,Thur,Lunch,2
202 | 200,18.71,4.0,Male,Yes,Thur,Lunch,3
203 | 201,12.74,2.01,Female,Yes,Thur,Lunch,2
204 | 202,13.0,2.0,Female,Yes,Thur,Lunch,2
205 | 203,16.4,2.5,Female,Yes,Thur,Lunch,2
206 | 204,20.53,4.0,Male,Yes,Thur,Lunch,4
207 | 205,16.47,3.23,Female,Yes,Thur,Lunch,3
208 | 206,26.59,3.41,Male,Yes,Sat,Dinner,3
209 | 207,38.73,3.0,Male,Yes,Sat,Dinner,4
210 | 208,24.27,2.03,Male,Yes,Sat,Dinner,2
211 | 209,12.76,2.23,Female,Yes,Sat,Dinner,2
212 | 210,30.06,2.0,Male,Yes,Sat,Dinner,3
213 | 211,25.89,5.16,Male,Yes,Sat,Dinner,4
214 | 212,48.33,9.0,Male,No,Sat,Dinner,4
215 | 213,13.27,2.5,Female,Yes,Sat,Dinner,2
216 | 214,28.17,6.5,Female,Yes,Sat,Dinner,3
217 | 215,12.9,1.1,Female,Yes,Sat,Dinner,2
218 | 216,28.15,3.0,Male,Yes,Sat,Dinner,5
219 | 217,11.59,1.5,Male,Yes,Sat,Dinner,2
220 | 218,7.74,1.44,Male,Yes,Sat,Dinner,2
221 | 219,30.14,3.09,Female,Yes,Sat,Dinner,4
222 | 220,12.16,2.2,Male,Yes,Fri,Lunch,2
223 | 221,13.42,3.48,Female,Yes,Fri,Lunch,2
224 | 222,8.58,1.92,Male,Yes,Fri,Lunch,1
225 | 223,15.98,3.0,Female,No,Fri,Lunch,3
226 | 224,13.42,1.58,Male,Yes,Fri,Lunch,2
227 | 225,16.27,2.5,Female,Yes,Fri,Lunch,2
228 | 226,10.09,2.0,Female,Yes,Fri,Lunch,2
229 | 227,20.45,3.0,Male,No,Sat,Dinner,4
230 | 228,13.28,2.72,Male,No,Sat,Dinner,2
231 | 229,22.12,2.88,Female,Yes,Sat,Dinner,2
232 | 230,24.01,2.0,Male,Yes,Sat,Dinner,4
233 | 231,15.69,3.0,Male,Yes,Sat,Dinner,3
234 | 232,11.61,3.39,Male,No,Sat,Dinner,2
235 | 233,10.77,1.47,Male,No,Sat,Dinner,2
236 | 234,15.53,3.0,Male,Yes,Sat,Dinner,2
237 | 235,10.07,1.25,Male,No,Sat,Dinner,2
238 | 236,12.6,1.0,Male,Yes,Sat,Dinner,2
239 | 237,32.83,1.17,Male,Yes,Sat,Dinner,2
240 | 238,35.83,4.67,Female,No,Sat,Dinner,3
241 | 239,29.03,5.92,Male,No,Sat,Dinner,3
242 | 240,27.18,2.0,Female,Yes,Sat,Dinner,2
243 | 241,22.67,2.0,Male,Yes,Sat,Dinner,2
244 | 242,17.82,1.75,Male,No,Sat,Dinner,2
245 | 243,18.78,3.0,Female,No,Thur,Dinner,2
246 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Solutions/07_grouping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex - GroupBy"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). "
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "### Step 3. Assign it to a variable called drinks."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 4,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/html": [
 41 |        "<div>\n",
 42 |        "<table border=\"1\" class=\"dataframe\">\n",
 43 |        "  <thead>\n",
 44 |        "    <tr style=\"text-align: right;\">\n",
 45 |        "      <th></th>\n",
 46 |        "      <th>country</th>\n",
 47 |        "      <th>beer_servings</th>\n",
 48 |        "      <th>spirit_servings</th>\n",
 49 |        "      <th>wine_servings</th>\n",
 50 |        "      <th>total_litres_of_pure_alcohol</th>\n",
 51 |        "      <th>continent</th>\n",
 52 |        "    </tr>\n",
 53 |        "  </thead>\n",
 54 |        "  <tbody>\n",
 55 |        "    <tr>\n",
 56 |        "      <th>0</th>\n",
 57 |        "      <td>Afghanistan</td>\n",
 58 |        "      <td>0</td>\n",
 59 |        "      <td>0</td>\n",
 60 |        "      <td>0</td>\n",
 61 |        "      <td>0.0</td>\n",
 62 |        "      <td>AS</td>\n",
 63 |        "    </tr>\n",
 64 |        "    <tr>\n",
 65 |        "      <th>1</th>\n",
 66 |        "      <td>Albania</td>\n",
 67 |        "      <td>89</td>\n",
 68 |        "      <td>132</td>\n",
 69 |        "      <td>54</td>\n",
 70 |        "      <td>4.9</td>\n",
 71 |        "      <td>EU</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>2</th>\n",
 75 |        "      <td>Algeria</td>\n",
 76 |        "      <td>25</td>\n",
 77 |        "      <td>0</td>\n",
 78 |        "      <td>14</td>\n",
 79 |        "      <td>0.7</td>\n",
 80 |        "      <td>AF</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>3</th>\n",
 84 |        "      <td>Andorra</td>\n",
 85 |        "      <td>245</td>\n",
 86 |        "      <td>138</td>\n",
 87 |        "      <td>312</td>\n",
 88 |        "      <td>12.4</td>\n",
 89 |        "      <td>EU</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>4</th>\n",
 93 |        "      <td>Angola</td>\n",
 94 |        "      <td>217</td>\n",
 95 |        "      <td>57</td>\n",
 96 |        "      <td>45</td>\n",
 97 |        "      <td>5.9</td>\n",
 98 |        "      <td>AF</td>\n",
 99 |        "    </tr>\n",
100 |        "  </tbody>\n",
101 |        "</table>\n",
102 |        "</div>"
103 |       ],
104 |       "text/plain": [
105 |        "       country  beer_servings  spirit_servings  wine_servings  \\\n",
106 |        "0  Afghanistan              0                0              0   \n",
107 |        "1      Albania             89              132             54   \n",
108 |        "2      Algeria             25                0             14   \n",
109 |        "3      Andorra            245              138            312   \n",
110 |        "4       Angola            217               57             45   \n",
111 |        "\n",
112 |        "   total_litres_of_pure_alcohol continent  \n",
113 |        "0                           0.0        AS  \n",
114 |        "1                           4.9        EU  \n",
115 |        "2                           0.7        AF  \n",
116 |        "3                          12.4        EU  \n",
117 |        "4                           5.9        AF  "
118 |       ]
119 |      },
120 |      "execution_count": 4,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": [
126 |     "drinks = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv')\n",
127 |     "drinks.head()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "### Step 4. Which continent drinks more beer on average?"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 6,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "data": {
144 |       "text/plain": [
145 |        "continent\n",
146 |        "AF     61.471698\n",
147 |        "AS     37.045455\n",
148 |        "EU    193.777778\n",
149 |        "OC     89.687500\n",
150 |        "SA    175.083333\n",
151 |        "Name: beer_servings, dtype: float64"
152 |       ]
153 |      },
154 |      "execution_count": 6,
155 |      "metadata": {},
156 |      "output_type": "execute_result"
157 |     }
158 |    ],
159 |    "source": [
160 |     "drinks.groupby('continent').beer_servings.mean()"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "### Step 5. For each continent print the statistics for wine consumption."
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 9,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "data": {
177 |       "text/plain": [
178 |        "continent       \n",
179 |        "AF         count     53.000000\n",
180 |        "           mean      16.264151\n",
181 |        "           std       38.846419\n",
182 |        "           min        0.000000\n",
183 |        "           25%        1.000000\n",
184 |        "           50%        2.000000\n",
185 |        "           75%       13.000000\n",
186 |        "           max      233.000000\n",
187 |        "AS         count     44.000000\n",
188 |        "           mean       9.068182\n",
189 |        "           std       21.667034\n",
190 |        "           min        0.000000\n",
191 |        "           25%        0.000000\n",
192 |        "           50%        1.000000\n",
193 |        "           75%        8.000000\n",
194 |        "           max      123.000000\n",
195 |        "EU         count     45.000000\n",
196 |        "           mean     142.222222\n",
197 |        "           std       97.421738\n",
198 |        "           min        0.000000\n",
199 |        "           25%       59.000000\n",
200 |        "           50%      128.000000\n",
201 |        "           75%      195.000000\n",
202 |        "           max      370.000000\n",
203 |        "OC         count     16.000000\n",
204 |        "           mean      35.625000\n",
205 |        "           std       64.555790\n",
206 |        "           min        0.000000\n",
207 |        "           25%        1.000000\n",
208 |        "           50%        8.500000\n",
209 |        "           75%       23.250000\n",
210 |        "           max      212.000000\n",
211 |        "SA         count     12.000000\n",
212 |        "           mean      62.416667\n",
213 |        "           std       88.620189\n",
214 |        "           min        1.000000\n",
215 |        "           25%        3.000000\n",
216 |        "           50%       12.000000\n",
217 |        "           75%       98.500000\n",
218 |        "           max      221.000000\n",
219 |        "dtype: float64"
220 |       ]
221 |      },
222 |      "execution_count": 9,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "drinks.groupby('continent').wine_servings.describe()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "### Step 6. Print the mean alcohol consumption per continent for every column"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 10,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/html": [
246 |        "<div>\n",
247 |        "<table border=\"1\" class=\"dataframe\">\n",
248 |        "  <thead>\n",
249 |        "    <tr style=\"text-align: right;\">\n",
250 |        "      <th></th>\n",
251 |        "      <th>beer_servings</th>\n",
252 |        "      <th>spirit_servings</th>\n",
253 |        "      <th>wine_servings</th>\n",
254 |        "      <th>total_litres_of_pure_alcohol</th>\n",
255 |        "    </tr>\n",
256 |        "    <tr>\n",
257 |        "      <th>continent</th>\n",
258 |        "      <th></th>\n",
259 |        "      <th></th>\n",
260 |        "      <th></th>\n",
261 |        "      <th></th>\n",
262 |        "    </tr>\n",
263 |        "  </thead>\n",
264 |        "  <tbody>\n",
265 |        "    <tr>\n",
266 |        "      <th>AF</th>\n",
267 |        "      <td>61.471698</td>\n",
268 |        "      <td>16.339623</td>\n",
269 |        "      <td>16.264151</td>\n",
270 |        "      <td>3.007547</td>\n",
271 |        "    </tr>\n",
272 |        "    <tr>\n",
273 |        "      <th>AS</th>\n",
274 |        "      <td>37.045455</td>\n",
275 |        "      <td>60.840909</td>\n",
276 |        "      <td>9.068182</td>\n",
277 |        "      <td>2.170455</td>\n",
278 |        "    </tr>\n",
279 |        "    <tr>\n",
280 |        "      <th>EU</th>\n",
281 |        "      <td>193.777778</td>\n",
282 |        "      <td>132.555556</td>\n",
283 |        "      <td>142.222222</td>\n",
284 |        "      <td>8.617778</td>\n",
285 |        "    </tr>\n",
286 |        "    <tr>\n",
287 |        "      <th>OC</th>\n",
288 |        "      <td>89.687500</td>\n",
289 |        "      <td>58.437500</td>\n",
290 |        "      <td>35.625000</td>\n",
291 |        "      <td>3.381250</td>\n",
292 |        "    </tr>\n",
293 |        "    <tr>\n",
294 |        "      <th>SA</th>\n",
295 |        "      <td>175.083333</td>\n",
296 |        "      <td>114.750000</td>\n",
297 |        "      <td>62.416667</td>\n",
298 |        "      <td>6.308333</td>\n",
299 |        "    </tr>\n",
300 |        "  </tbody>\n",
301 |        "</table>\n",
302 |        "</div>"
303 |       ],
304 |       "text/plain": [
305 |        "           beer_servings  spirit_servings  wine_servings  \\\n",
306 |        "continent                                                  \n",
307 |        "AF             61.471698        16.339623      16.264151   \n",
308 |        "AS             37.045455        60.840909       9.068182   \n",
309 |        "EU            193.777778       132.555556     142.222222   \n",
310 |        "OC             89.687500        58.437500      35.625000   \n",
311 |        "SA            175.083333       114.750000      62.416667   \n",
312 |        "\n",
313 |        "           total_litres_of_pure_alcohol  \n",
314 |        "continent                                \n",
315 |        "AF                             3.007547  \n",
316 |        "AS                             2.170455  \n",
317 |        "EU                             8.617778  \n",
318 |        "OC                             3.381250  \n",
319 |        "SA                             6.308333  "
320 |       ]
321 |      },
322 |      "execution_count": 10,
323 |      "metadata": {},
324 |      "output_type": "execute_result"
325 |     }
326 |    ],
327 |    "source": [
328 |     "drinks.groupby('continent').mean()"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "### Step 7. Print the median alcohol consumption per continent for every column"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 14,
341 |    "metadata": {},
342 |    "outputs": [
343 |     {
344 |      "data": {
345 |       "text/html": [
346 |        "<div>\n",
347 |        "<table border=\"1\" class=\"dataframe\">\n",
348 |        "  <thead>\n",
349 |        "    <tr style=\"text-align: right;\">\n",
350 |        "      <th></th>\n",
351 |        "      <th>beer_servings</th>\n",
352 |        "      <th>spirit_servings</th>\n",
353 |        "      <th>wine_servings</th>\n",
354 |        "      <th>total_litres_of_pure_alcohol</th>\n",
355 |        "    </tr>\n",
356 |        "    <tr>\n",
357 |        "      <th>continent</th>\n",
358 |        "      <th></th>\n",
359 |        "      <th></th>\n",
360 |        "      <th></th>\n",
361 |        "      <th></th>\n",
362 |        "    </tr>\n",
363 |        "  </thead>\n",
364 |        "  <tbody>\n",
365 |        "    <tr>\n",
366 |        "      <th>AF</th>\n",
367 |        "      <td>32.0</td>\n",
368 |        "      <td>3.0</td>\n",
369 |        "      <td>2.0</td>\n",
370 |        "      <td>2.30</td>\n",
371 |        "    </tr>\n",
372 |        "    <tr>\n",
373 |        "      <th>AS</th>\n",
374 |        "      <td>17.5</td>\n",
375 |        "      <td>16.0</td>\n",
376 |        "      <td>1.0</td>\n",
377 |        "      <td>1.20</td>\n",
378 |        "    </tr>\n",
379 |        "    <tr>\n",
380 |        "      <th>EU</th>\n",
381 |        "      <td>219.0</td>\n",
382 |        "      <td>122.0</td>\n",
383 |        "      <td>128.0</td>\n",
384 |        "      <td>10.00</td>\n",
385 |        "    </tr>\n",
386 |        "    <tr>\n",
387 |        "      <th>OC</th>\n",
388 |        "      <td>52.5</td>\n",
389 |        "      <td>37.0</td>\n",
390 |        "      <td>8.5</td>\n",
391 |        "      <td>1.75</td>\n",
392 |        "    </tr>\n",
393 |        "    <tr>\n",
394 |        "      <th>SA</th>\n",
395 |        "      <td>162.5</td>\n",
396 |        "      <td>108.5</td>\n",
397 |        "      <td>12.0</td>\n",
398 |        "      <td>6.85</td>\n",
399 |        "    </tr>\n",
400 |        "  </tbody>\n",
401 |        "</table>\n",
402 |        "</div>"
403 |       ],
404 |       "text/plain": [
405 |        "           beer_servings  spirit_servings  wine_servings  \\\n",
406 |        "continent                                                  \n",
407 |        "AF                  32.0              3.0            2.0   \n",
408 |        "AS                  17.5             16.0            1.0   \n",
409 |        "EU                 219.0            122.0          128.0   \n",
410 |        "OC                  52.5             37.0            8.5   \n",
411 |        "SA                 162.5            108.5           12.0   \n",
412 |        "\n",
413 |        "           total_litres_of_pure_alcohol  \n",
414 |        "continent                                \n",
415 |        "AF                                 2.30  \n",
416 |        "AS                                 1.20  \n",
417 |        "EU                                10.00  \n",
418 |        "OC                                 1.75  \n",
419 |        "SA                                 6.85  "
420 |       ]
421 |      },
422 |      "execution_count": 14,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": [
428 |     "drinks.groupby('continent').median()"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {},
434 |    "source": [
435 |     "### Step 8. Print the mean, min and max values for spirit consumption.\n",
436 |     "#### This time output a DataFrame"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": 15,
442 |    "metadata": {},
443 |    "outputs": [
444 |     {
445 |      "data": {
446 |       "text/html": [
447 |        "<div>\n",
448 |        "<table border=\"1\" class=\"dataframe\">\n",
449 |        "  <thead>\n",
450 |        "    <tr style=\"text-align: right;\">\n",
451 |        "      <th></th>\n",
452 |        "      <th>mean</th>\n",
453 |        "      <th>min</th>\n",
454 |        "      <th>max</th>\n",
455 |        "    </tr>\n",
456 |        "    <tr>\n",
457 |        "      <th>continent</th>\n",
458 |        "      <th></th>\n",
459 |        "      <th></th>\n",
460 |        "      <th></th>\n",
461 |        "    </tr>\n",
462 |        "  </thead>\n",
463 |        "  <tbody>\n",
464 |        "    <tr>\n",
465 |        "      <th>AF</th>\n",
466 |        "      <td>16.339623</td>\n",
467 |        "      <td>0</td>\n",
468 |        "      <td>152</td>\n",
469 |        "    </tr>\n",
470 |        "    <tr>\n",
471 |        "      <th>AS</th>\n",
472 |        "      <td>60.840909</td>\n",
473 |        "      <td>0</td>\n",
474 |        "      <td>326</td>\n",
475 |        "    </tr>\n",
476 |        "    <tr>\n",
477 |        "      <th>EU</th>\n",
478 |        "      <td>132.555556</td>\n",
479 |        "      <td>0</td>\n",
480 |        "      <td>373</td>\n",
481 |        "    </tr>\n",
482 |        "    <tr>\n",
483 |        "      <th>OC</th>\n",
484 |        "      <td>58.437500</td>\n",
485 |        "      <td>0</td>\n",
486 |        "      <td>254</td>\n",
487 |        "    </tr>\n",
488 |        "    <tr>\n",
489 |        "      <th>SA</th>\n",
490 |        "      <td>114.750000</td>\n",
491 |        "      <td>25</td>\n",
492 |        "      <td>302</td>\n",
493 |        "    </tr>\n",
494 |        "  </tbody>\n",
495 |        "</table>\n",
496 |        "</div>"
497 |       ],
498 |       "text/plain": [
499 |        "                 mean  min  max\n",
500 |        "continent                      \n",
501 |        "AF          16.339623    0  152\n",
502 |        "AS          60.840909    0  326\n",
503 |        "EU         132.555556    0  373\n",
504 |        "OC          58.437500    0  254\n",
505 |        "SA         114.750000   25  302"
506 |       ]
507 |      },
508 |      "execution_count": 15,
509 |      "metadata": {},
510 |      "output_type": "execute_result"
511 |     }
512 |    ],
513 |    "source": [
514 |     "drinks.groupby('continent').spirit_servings.agg(['mean', 'min', 'max'])"
515 |    ]
516 |   }
517 |  ],
518 |  "metadata": {
519 |   "kernelspec": {
520 |    "display_name": "Python 3.9.7 ('base')",
521 |    "language": "python",
522 |    "name": "python3"
523 |   },
524 |   "language_info": {
525 |    "codemirror_mode": {
526 |     "name": "ipython",
527 |     "version": 3
528 |    },
529 |    "file_extension": ".py",
530 |    "mimetype": "text/x-python",
531 |    "name": "python",
532 |    "nbconvert_exporter": "python",
533 |    "pygments_lexer": "ipython3",
534 |    "version": "3.9.7"
535 |   },
536 |   "toc": {
537 |    "base_numbering": 1,
538 |    "nav_menu": {},
539 |    "number_sections": true,
540 |    "sideBar": true,
541 |    "skip_h1_title": false,
542 |    "title_cell": "Table of Contents",
543 |    "title_sidebar": "Contents",
544 |    "toc_cell": false,
545 |    "toc_position": {},
546 |    "toc_section_display": true,
547 |    "toc_window_display": false
548 |   },
549 |   "vscode": {
550 |    "interpreter": {
551 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
552 |    }
553 |   }
554 |  },
555 |  "nbformat": 4,
556 |  "nbformat_minor": 1
557 | }
558 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Solutions/03_Know_your_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex3 - Getting and knowing your Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "###  Step 2. Download the dataset to your computer and unzip it."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import pandas as pd\n",
 33 |     "import numpy as np"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Step 3. Use the tsv file and assign it to a dataframe called food"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stderr",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0,3,5,19,20,24,25,26,27,28,36,37,38,39,48) have mixed types. Specify dtype option on import or set low_memory=False.\n",
 53 |       "  interactivity=interactivity, compiler=compiler, result=result)\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "food = pd.read_csv('~/Desktop/en.openfoodfacts.org.products.tsv', sep='\\t')"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "### Step 4. See the first 5 entries"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 4,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/html": [
 76 |        "<div>\n",
 77 |        "<table border=\"1\" class=\"dataframe\">\n",
 78 |        "  <thead>\n",
 79 |        "    <tr style=\"text-align: right;\">\n",
 80 |        "      <th></th>\n",
 81 |        "      <th>code</th>\n",
 82 |        "      <th>url</th>\n",
 83 |        "      <th>creator</th>\n",
 84 |        "      <th>created_t</th>\n",
 85 |        "      <th>created_datetime</th>\n",
 86 |        "      <th>last_modified_t</th>\n",
 87 |        "      <th>last_modified_datetime</th>\n",
 88 |        "      <th>product_name</th>\n",
 89 |        "      <th>generic_name</th>\n",
 90 |        "      <th>quantity</th>\n",
 91 |        "      <th>...</th>\n",
 92 |        "      <th>fruits-vegetables-nuts_100g</th>\n",
 93 |        "      <th>fruits-vegetables-nuts-estimate_100g</th>\n",
 94 |        "      <th>collagen-meat-protein-ratio_100g</th>\n",
 95 |        "      <th>cocoa_100g</th>\n",
 96 |        "      <th>chlorophyl_100g</th>\n",
 97 |        "      <th>carbon-footprint_100g</th>\n",
 98 |        "      <th>nutrition-score-fr_100g</th>\n",
 99 |        "      <th>nutrition-score-uk_100g</th>\n",
100 |        "      <th>glycemic-index_100g</th>\n",
101 |        "      <th>water-hardness_100g</th>\n",
102 |        "    </tr>\n",
103 |        "  </thead>\n",
104 |        "  <tbody>\n",
105 |        "    <tr>\n",
106 |        "      <th>0</th>\n",
107 |        "      <td>3087</td>\n",
108 |        "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
109 |        "      <td>openfoodfacts-contributors</td>\n",
110 |        "      <td>1474103866</td>\n",
111 |        "      <td>2016-09-17T09:17:46Z</td>\n",
112 |        "      <td>1474103893</td>\n",
113 |        "      <td>2016-09-17T09:18:13Z</td>\n",
114 |        "      <td>Farine de blé noir</td>\n",
115 |        "      <td>NaN</td>\n",
116 |        "      <td>1kg</td>\n",
117 |        "      <td>...</td>\n",
118 |        "      <td>NaN</td>\n",
119 |        "      <td>NaN</td>\n",
120 |        "      <td>NaN</td>\n",
121 |        "      <td>NaN</td>\n",
122 |        "      <td>NaN</td>\n",
123 |        "      <td>NaN</td>\n",
124 |        "      <td>NaN</td>\n",
125 |        "      <td>NaN</td>\n",
126 |        "      <td>NaN</td>\n",
127 |        "      <td>NaN</td>\n",
128 |        "    </tr>\n",
129 |        "    <tr>\n",
130 |        "      <th>1</th>\n",
131 |        "      <td>4530</td>\n",
132 |        "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
133 |        "      <td>usda-ndb-import</td>\n",
134 |        "      <td>1489069957</td>\n",
135 |        "      <td>2017-03-09T14:32:37Z</td>\n",
136 |        "      <td>1489069957</td>\n",
137 |        "      <td>2017-03-09T14:32:37Z</td>\n",
138 |        "      <td>Banana Chips Sweetened (Whole)</td>\n",
139 |        "      <td>NaN</td>\n",
140 |        "      <td>NaN</td>\n",
141 |        "      <td>...</td>\n",
142 |        "      <td>NaN</td>\n",
143 |        "      <td>NaN</td>\n",
144 |        "      <td>NaN</td>\n",
145 |        "      <td>NaN</td>\n",
146 |        "      <td>NaN</td>\n",
147 |        "      <td>NaN</td>\n",
148 |        "      <td>14.0</td>\n",
149 |        "      <td>14.0</td>\n",
150 |        "      <td>NaN</td>\n",
151 |        "      <td>NaN</td>\n",
152 |        "    </tr>\n",
153 |        "    <tr>\n",
154 |        "      <th>2</th>\n",
155 |        "      <td>4559</td>\n",
156 |        "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
157 |        "      <td>usda-ndb-import</td>\n",
158 |        "      <td>1489069957</td>\n",
159 |        "      <td>2017-03-09T14:32:37Z</td>\n",
160 |        "      <td>1489069957</td>\n",
161 |        "      <td>2017-03-09T14:32:37Z</td>\n",
162 |        "      <td>Peanuts</td>\n",
163 |        "      <td>NaN</td>\n",
164 |        "      <td>NaN</td>\n",
165 |        "      <td>...</td>\n",
166 |        "      <td>NaN</td>\n",
167 |        "      <td>NaN</td>\n",
168 |        "      <td>NaN</td>\n",
169 |        "      <td>NaN</td>\n",
170 |        "      <td>NaN</td>\n",
171 |        "      <td>NaN</td>\n",
172 |        "      <td>0.0</td>\n",
173 |        "      <td>0.0</td>\n",
174 |        "      <td>NaN</td>\n",
175 |        "      <td>NaN</td>\n",
176 |        "    </tr>\n",
177 |        "    <tr>\n",
178 |        "      <th>3</th>\n",
179 |        "      <td>16087</td>\n",
180 |        "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
181 |        "      <td>usda-ndb-import</td>\n",
182 |        "      <td>1489055731</td>\n",
183 |        "      <td>2017-03-09T10:35:31Z</td>\n",
184 |        "      <td>1489055731</td>\n",
185 |        "      <td>2017-03-09T10:35:31Z</td>\n",
186 |        "      <td>Organic Salted Nut Mix</td>\n",
187 |        "      <td>NaN</td>\n",
188 |        "      <td>NaN</td>\n",
189 |        "      <td>...</td>\n",
190 |        "      <td>NaN</td>\n",
191 |        "      <td>NaN</td>\n",
192 |        "      <td>NaN</td>\n",
193 |        "      <td>NaN</td>\n",
194 |        "      <td>NaN</td>\n",
195 |        "      <td>NaN</td>\n",
196 |        "      <td>12.0</td>\n",
197 |        "      <td>12.0</td>\n",
198 |        "      <td>NaN</td>\n",
199 |        "      <td>NaN</td>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>4</th>\n",
203 |        "      <td>16094</td>\n",
204 |        "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
205 |        "      <td>usda-ndb-import</td>\n",
206 |        "      <td>1489055653</td>\n",
207 |        "      <td>2017-03-09T10:34:13Z</td>\n",
208 |        "      <td>1489055653</td>\n",
209 |        "      <td>2017-03-09T10:34:13Z</td>\n",
210 |        "      <td>Organic Polenta</td>\n",
211 |        "      <td>NaN</td>\n",
212 |        "      <td>NaN</td>\n",
213 |        "      <td>...</td>\n",
214 |        "      <td>NaN</td>\n",
215 |        "      <td>NaN</td>\n",
216 |        "      <td>NaN</td>\n",
217 |        "      <td>NaN</td>\n",
218 |        "      <td>NaN</td>\n",
219 |        "      <td>NaN</td>\n",
220 |        "      <td>NaN</td>\n",
221 |        "      <td>NaN</td>\n",
222 |        "      <td>NaN</td>\n",
223 |        "      <td>NaN</td>\n",
224 |        "    </tr>\n",
225 |        "  </tbody>\n",
226 |        "</table>\n",
227 |        "<p>5 rows × 163 columns</p>\n",
228 |        "</div>"
229 |       ],
230 |       "text/plain": [
231 |        "    code                                                url  \\\n",
232 |        "0   3087  http://world-en.openfoodfacts.org/product/0000...   \n",
233 |        "1   4530  http://world-en.openfoodfacts.org/product/0000...   \n",
234 |        "2   4559  http://world-en.openfoodfacts.org/product/0000...   \n",
235 |        "3  16087  http://world-en.openfoodfacts.org/product/0000...   \n",
236 |        "4  16094  http://world-en.openfoodfacts.org/product/0000...   \n",
237 |        "\n",
238 |        "                      creator   created_t      created_datetime  \\\n",
239 |        "0  openfoodfacts-contributors  1474103866  2016-09-17T09:17:46Z   \n",
240 |        "1             usda-ndb-import  1489069957  2017-03-09T14:32:37Z   \n",
241 |        "2             usda-ndb-import  1489069957  2017-03-09T14:32:37Z   \n",
242 |        "3             usda-ndb-import  1489055731  2017-03-09T10:35:31Z   \n",
243 |        "4             usda-ndb-import  1489055653  2017-03-09T10:34:13Z   \n",
244 |        "\n",
245 |        "  last_modified_t last_modified_datetime                    product_name  \\\n",
246 |        "0      1474103893   2016-09-17T09:18:13Z              Farine de blé noir   \n",
247 |        "1      1489069957   2017-03-09T14:32:37Z  Banana Chips Sweetened (Whole)   \n",
248 |        "2      1489069957   2017-03-09T14:32:37Z                         Peanuts   \n",
249 |        "3      1489055731   2017-03-09T10:35:31Z          Organic Salted Nut Mix   \n",
250 |        "4      1489055653   2017-03-09T10:34:13Z                 Organic Polenta   \n",
251 |        "\n",
252 |        "  generic_name quantity         ...         fruits-vegetables-nuts_100g  \\\n",
253 |        "0          NaN      1kg         ...                                 NaN   \n",
254 |        "1          NaN      NaN         ...                                 NaN   \n",
255 |        "2          NaN      NaN         ...                                 NaN   \n",
256 |        "3          NaN      NaN         ...                                 NaN   \n",
257 |        "4          NaN      NaN         ...                                 NaN   \n",
258 |        "\n",
259 |        "  fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g  \\\n",
260 |        "0                                  NaN                              NaN   \n",
261 |        "1                                  NaN                              NaN   \n",
262 |        "2                                  NaN                              NaN   \n",
263 |        "3                                  NaN                              NaN   \n",
264 |        "4                                  NaN                              NaN   \n",
265 |        "\n",
266 |        "  cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g  \\\n",
267 |        "0        NaN             NaN                   NaN                     NaN   \n",
268 |        "1        NaN             NaN                   NaN                    14.0   \n",
269 |        "2        NaN             NaN                   NaN                     0.0   \n",
270 |        "3        NaN             NaN                   NaN                    12.0   \n",
271 |        "4        NaN             NaN                   NaN                     NaN   \n",
272 |        "\n",
273 |        "  nutrition-score-uk_100g glycemic-index_100g water-hardness_100g  \n",
274 |        "0                     NaN                 NaN                 NaN  \n",
275 |        "1                    14.0                 NaN                 NaN  \n",
276 |        "2                     0.0                 NaN                 NaN  \n",
277 |        "3                    12.0                 NaN                 NaN  \n",
278 |        "4                     NaN                 NaN                 NaN  \n",
279 |        "\n",
280 |        "[5 rows x 163 columns]"
281 |       ]
282 |      },
283 |      "execution_count": 4,
284 |      "metadata": {},
285 |      "output_type": "execute_result"
286 |     }
287 |    ],
288 |    "source": [
289 |     "food.head()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "### Step 5. What is the number of observations in the dataset?"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 5,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "data": {
306 |       "text/plain": [
307 |        "(356027, 163)"
308 |       ]
309 |      },
310 |      "execution_count": 5,
311 |      "metadata": {},
312 |      "output_type": "execute_result"
313 |     }
314 |    ],
315 |    "source": [
316 |     "food.shape #will give you both (observations/rows, columns)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 6,
322 |    "metadata": {},
323 |    "outputs": [
324 |     {
325 |      "data": {
326 |       "text/plain": [
327 |        "356027"
328 |       ]
329 |      },
330 |      "execution_count": 6,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "food.shape[0] #will give you only the observations/rows number"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "### Step 6. What is the number of columns in the dataset?"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 7,
349 |    "metadata": {},
350 |    "outputs": [
351 |     {
352 |      "name": "stdout",
353 |      "output_type": "stream",
354 |      "text": [
355 |       "(356027, 163)\n",
356 |       "163\n",
357 |       "<class 'pandas.core.frame.DataFrame'>\n",
358 |       "RangeIndex: 356027 entries, 0 to 356026\n",
359 |       "Columns: 163 entries, code to water-hardness_100g\n",
360 |       "dtypes: float64(107), object(56)\n",
361 |       "memory usage: 442.8+ MB\n"
362 |      ]
363 |     }
364 |    ],
365 |    "source": [
366 |     "print(food.shape) #will give you both (observations/rows, columns)\n",
367 |     "print(food.shape[1]) #will give you only the columns number\n",
368 |     "\n",
369 |     "#OR\n",
370 |     "\n",
371 |     "food.info() #Columns: 163 entries"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "### Step 7. Print the name of all the columns."
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": 8,
384 |    "metadata": {},
385 |    "outputs": [
386 |     {
387 |      "data": {
388 |       "text/plain": [
389 |        "Index([u'code', u'url', u'creator', u'created_t', u'created_datetime',\n",
390 |        "       u'last_modified_t', u'last_modified_datetime', u'product_name',\n",
391 |        "       u'generic_name', u'quantity',\n",
392 |        "       ...\n",
393 |        "       u'fruits-vegetables-nuts_100g', u'fruits-vegetables-nuts-estimate_100g',\n",
394 |        "       u'collagen-meat-protein-ratio_100g', u'cocoa_100g', u'chlorophyl_100g',\n",
395 |        "       u'carbon-footprint_100g', u'nutrition-score-fr_100g',\n",
396 |        "       u'nutrition-score-uk_100g', u'glycemic-index_100g',\n",
397 |        "       u'water-hardness_100g'],\n",
398 |        "      dtype='object', length=163)"
399 |       ]
400 |      },
401 |      "execution_count": 8,
402 |      "metadata": {},
403 |      "output_type": "execute_result"
404 |     }
405 |    ],
406 |    "source": [
407 |     "food.columns"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "markdown",
412 |    "metadata": {},
413 |    "source": [
414 |     "### Step 8. What is the name of 105th column?"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 9,
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "data": {
424 |       "text/plain": [
425 |        "'-glucose_100g'"
426 |       ]
427 |      },
428 |      "execution_count": 9,
429 |      "metadata": {},
430 |      "output_type": "execute_result"
431 |     }
432 |    ],
433 |    "source": [
434 |     "food.columns[104]"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "metadata": {},
440 |    "source": [
441 |     "### Step 9. What is the type of the observations of the 105th column?"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": 10,
447 |    "metadata": {},
448 |    "outputs": [
449 |     {
450 |      "data": {
451 |       "text/plain": [
452 |        "dtype('float64')"
453 |       ]
454 |      },
455 |      "execution_count": 10,
456 |      "metadata": {},
457 |      "output_type": "execute_result"
458 |     }
459 |    ],
460 |    "source": [
461 |     "food.dtypes['-glucose_100g']"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "markdown",
466 |    "metadata": {},
467 |    "source": [
468 |     "### Step 10. How is the dataset indexed?"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": 11,
474 |    "metadata": {},
475 |    "outputs": [
476 |     {
477 |      "data": {
478 |       "text/plain": [
479 |        "RangeIndex(start=0, stop=356027, step=1)"
480 |       ]
481 |      },
482 |      "execution_count": 11,
483 |      "metadata": {},
484 |      "output_type": "execute_result"
485 |     }
486 |    ],
487 |    "source": [
488 |     "food.index"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "metadata": {},
494 |    "source": [
495 |     "### Step 11. What is the product name of the 19th observation?"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": 13,
501 |    "metadata": {},
502 |    "outputs": [
503 |     {
504 |      "data": {
505 |       "text/plain": [
506 |        "'Lotus Organic Brown Jasmine Rice'"
507 |       ]
508 |      },
509 |      "execution_count": 13,
510 |      "metadata": {},
511 |      "output_type": "execute_result"
512 |     }
513 |    ],
514 |    "source": [
515 |     "food.values[18][7]"
516 |    ]
517 |   }
518 |  ],
519 |  "metadata": {
520 |   "anaconda-cloud": {},
521 |   "kernelspec": {
522 |    "display_name": "Python 3.10.5 64-bit (windows store)",
523 |    "language": "python",
524 |    "name": "python3"
525 |   },
526 |   "language_info": {
527 |    "codemirror_mode": {
528 |     "name": "ipython",
529 |     "version": 3
530 |    },
531 |    "file_extension": ".py",
532 |    "mimetype": "text/x-python",
533 |    "name": "python",
534 |    "nbconvert_exporter": "python",
535 |    "pygments_lexer": "ipython3",
536 |    "version": "3.10.5"
537 |   },
538 |   "toc": {
539 |    "base_numbering": 1,
540 |    "nav_menu": {},
541 |    "number_sections": true,
542 |    "sideBar": true,
543 |    "skip_h1_title": false,
544 |    "title_cell": "Table of Contents",
545 |    "title_sidebar": "Contents",
546 |    "toc_cell": false,
547 |    "toc_position": {},
548 |    "toc_section_display": true,
549 |    "toc_window_display": false
550 |   },
551 |   "vscode": {
552 |    "interpreter": {
553 |     "hash": "2a50a5f25cb96edfd9254847fbf2f642dc98c240ea8775b6be580f22b3253a59"
554 |    }
555 |   }
556 |  },
557 |  "nbformat": 4,
558 |  "nbformat_minor": 1
559 | }
560 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Solutions/08_grouping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Occupation"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 64,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "### Step 3. Assign it to a variable called users."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 65,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/html": [
 41 |        "<div>\n",
 42 |        "<table border=\"1\" class=\"dataframe\">\n",
 43 |        "  <thead>\n",
 44 |        "    <tr style=\"text-align: right;\">\n",
 45 |        "      <th></th>\n",
 46 |        "      <th>age</th>\n",
 47 |        "      <th>gender</th>\n",
 48 |        "      <th>occupation</th>\n",
 49 |        "      <th>zip_code</th>\n",
 50 |        "    </tr>\n",
 51 |        "    <tr>\n",
 52 |        "      <th>user_id</th>\n",
 53 |        "      <th></th>\n",
 54 |        "      <th></th>\n",
 55 |        "      <th></th>\n",
 56 |        "      <th></th>\n",
 57 |        "    </tr>\n",
 58 |        "  </thead>\n",
 59 |        "  <tbody>\n",
 60 |        "    <tr>\n",
 61 |        "      <th>1</th>\n",
 62 |        "      <td>24</td>\n",
 63 |        "      <td>M</td>\n",
 64 |        "      <td>technician</td>\n",
 65 |        "      <td>85711</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>2</th>\n",
 69 |        "      <td>53</td>\n",
 70 |        "      <td>F</td>\n",
 71 |        "      <td>other</td>\n",
 72 |        "      <td>94043</td>\n",
 73 |        "    </tr>\n",
 74 |        "    <tr>\n",
 75 |        "      <th>3</th>\n",
 76 |        "      <td>23</td>\n",
 77 |        "      <td>M</td>\n",
 78 |        "      <td>writer</td>\n",
 79 |        "      <td>32067</td>\n",
 80 |        "    </tr>\n",
 81 |        "    <tr>\n",
 82 |        "      <th>4</th>\n",
 83 |        "      <td>24</td>\n",
 84 |        "      <td>M</td>\n",
 85 |        "      <td>technician</td>\n",
 86 |        "      <td>43537</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th>5</th>\n",
 90 |        "      <td>33</td>\n",
 91 |        "      <td>F</td>\n",
 92 |        "      <td>other</td>\n",
 93 |        "      <td>15213</td>\n",
 94 |        "    </tr>\n",
 95 |        "  </tbody>\n",
 96 |        "</table>\n",
 97 |        "</div>"
 98 |       ],
 99 |       "text/plain": [
100 |        "         age gender  occupation zip_code\n",
101 |        "user_id                                 \n",
102 |        "1         24      M  technician    85711\n",
103 |        "2         53      F       other    94043\n",
104 |        "3         23      M      writer    32067\n",
105 |        "4         24      M  technician    43537\n",
106 |        "5         33      F       other    15213"
107 |       ]
108 |      },
109 |      "execution_count": 65,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "users = pd.read_table('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user', \n",
116 |     "                      sep='|', index_col='user_id')\n",
117 |     "users.head()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "### Step 4. Discover what is the mean age per occupation"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 66,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "occupation\n",
136 |        "administrator    38.746835\n",
137 |        "artist           31.392857\n",
138 |        "doctor           43.571429\n",
139 |        "educator         42.010526\n",
140 |        "engineer         36.388060\n",
141 |        "entertainment    29.222222\n",
142 |        "executive        38.718750\n",
143 |        "healthcare       41.562500\n",
144 |        "homemaker        32.571429\n",
145 |        "lawyer           36.750000\n",
146 |        "librarian        40.000000\n",
147 |        "marketing        37.615385\n",
148 |        "none             26.555556\n",
149 |        "other            34.523810\n",
150 |        "programmer       33.121212\n",
151 |        "retired          63.071429\n",
152 |        "salesman         35.666667\n",
153 |        "scientist        35.548387\n",
154 |        "student          22.081633\n",
155 |        "technician       33.148148\n",
156 |        "writer           36.311111\n",
157 |        "Name: age, dtype: float64"
158 |       ]
159 |      },
160 |      "execution_count": 66,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "users.groupby('occupation').age.mean()"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 150,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "doctor           100.000000\n",
185 |        "engineer          97.014925\n",
186 |        "technician        96.296296\n",
187 |        "retired           92.857143\n",
188 |        "programmer        90.909091\n",
189 |        "executive         90.625000\n",
190 |        "scientist         90.322581\n",
191 |        "entertainment     88.888889\n",
192 |        "lawyer            83.333333\n",
193 |        "salesman          75.000000\n",
194 |        "educator          72.631579\n",
195 |        "student           69.387755\n",
196 |        "other             65.714286\n",
197 |        "marketing         61.538462\n",
198 |        "writer            57.777778\n",
199 |        "none              55.555556\n",
200 |        "administrator     54.430380\n",
201 |        "artist            53.571429\n",
202 |        "librarian         43.137255\n",
203 |        "healthcare        31.250000\n",
204 |        "homemaker         14.285714\n",
205 |        "dtype: float64"
206 |       ]
207 |      },
208 |      "execution_count": 150,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "# create a function\n",
215 |     "def gender_to_numeric(x):\n",
216 |     "    if x == 'M':\n",
217 |     "        return 1\n",
218 |     "    if x == 'F':\n",
219 |     "        return 0\n",
220 |     "\n",
221 |     "# apply the function to the gender column and create a new column\n",
222 |     "users['gender_n'] = users['gender'].apply(gender_to_numeric)\n",
223 |     "\n",
224 |     "\n",
225 |     "a = users.groupby('occupation').gender_n.sum() / users.occupation.value_counts() * 100 \n",
226 |     "\n",
227 |     "# sort to the most male \n",
228 |     "a.sort_values(ascending = False)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "### Step 6. For each occupation, calculate the minimum and maximum ages"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 151,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/html": [
246 |        "<div>\n",
247 |        "<table border=\"1\" class=\"dataframe\">\n",
248 |        "  <thead>\n",
249 |        "    <tr style=\"text-align: right;\">\n",
250 |        "      <th></th>\n",
251 |        "      <th>min</th>\n",
252 |        "      <th>max</th>\n",
253 |        "    </tr>\n",
254 |        "    <tr>\n",
255 |        "      <th>occupation</th>\n",
256 |        "      <th></th>\n",
257 |        "      <th></th>\n",
258 |        "    </tr>\n",
259 |        "  </thead>\n",
260 |        "  <tbody>\n",
261 |        "    <tr>\n",
262 |        "      <th>administrator</th>\n",
263 |        "      <td>21</td>\n",
264 |        "      <td>70</td>\n",
265 |        "    </tr>\n",
266 |        "    <tr>\n",
267 |        "      <th>artist</th>\n",
268 |        "      <td>19</td>\n",
269 |        "      <td>48</td>\n",
270 |        "    </tr>\n",
271 |        "    <tr>\n",
272 |        "      <th>doctor</th>\n",
273 |        "      <td>28</td>\n",
274 |        "      <td>64</td>\n",
275 |        "    </tr>\n",
276 |        "    <tr>\n",
277 |        "      <th>educator</th>\n",
278 |        "      <td>23</td>\n",
279 |        "      <td>63</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>engineer</th>\n",
283 |        "      <td>22</td>\n",
284 |        "      <td>70</td>\n",
285 |        "    </tr>\n",
286 |        "    <tr>\n",
287 |        "      <th>entertainment</th>\n",
288 |        "      <td>15</td>\n",
289 |        "      <td>50</td>\n",
290 |        "    </tr>\n",
291 |        "    <tr>\n",
292 |        "      <th>executive</th>\n",
293 |        "      <td>22</td>\n",
294 |        "      <td>69</td>\n",
295 |        "    </tr>\n",
296 |        "    <tr>\n",
297 |        "      <th>healthcare</th>\n",
298 |        "      <td>22</td>\n",
299 |        "      <td>62</td>\n",
300 |        "    </tr>\n",
301 |        "    <tr>\n",
302 |        "      <th>homemaker</th>\n",
303 |        "      <td>20</td>\n",
304 |        "      <td>50</td>\n",
305 |        "    </tr>\n",
306 |        "    <tr>\n",
307 |        "      <th>lawyer</th>\n",
308 |        "      <td>21</td>\n",
309 |        "      <td>53</td>\n",
310 |        "    </tr>\n",
311 |        "    <tr>\n",
312 |        "      <th>librarian</th>\n",
313 |        "      <td>23</td>\n",
314 |        "      <td>69</td>\n",
315 |        "    </tr>\n",
316 |        "    <tr>\n",
317 |        "      <th>marketing</th>\n",
318 |        "      <td>24</td>\n",
319 |        "      <td>55</td>\n",
320 |        "    </tr>\n",
321 |        "    <tr>\n",
322 |        "      <th>none</th>\n",
323 |        "      <td>11</td>\n",
324 |        "      <td>55</td>\n",
325 |        "    </tr>\n",
326 |        "    <tr>\n",
327 |        "      <th>other</th>\n",
328 |        "      <td>13</td>\n",
329 |        "      <td>64</td>\n",
330 |        "    </tr>\n",
331 |        "    <tr>\n",
332 |        "      <th>programmer</th>\n",
333 |        "      <td>20</td>\n",
334 |        "      <td>63</td>\n",
335 |        "    </tr>\n",
336 |        "    <tr>\n",
337 |        "      <th>retired</th>\n",
338 |        "      <td>51</td>\n",
339 |        "      <td>73</td>\n",
340 |        "    </tr>\n",
341 |        "    <tr>\n",
342 |        "      <th>salesman</th>\n",
343 |        "      <td>18</td>\n",
344 |        "      <td>66</td>\n",
345 |        "    </tr>\n",
346 |        "    <tr>\n",
347 |        "      <th>scientist</th>\n",
348 |        "      <td>23</td>\n",
349 |        "      <td>55</td>\n",
350 |        "    </tr>\n",
351 |        "    <tr>\n",
352 |        "      <th>student</th>\n",
353 |        "      <td>7</td>\n",
354 |        "      <td>42</td>\n",
355 |        "    </tr>\n",
356 |        "    <tr>\n",
357 |        "      <th>technician</th>\n",
358 |        "      <td>21</td>\n",
359 |        "      <td>55</td>\n",
360 |        "    </tr>\n",
361 |        "    <tr>\n",
362 |        "      <th>writer</th>\n",
363 |        "      <td>18</td>\n",
364 |        "      <td>60</td>\n",
365 |        "    </tr>\n",
366 |        "  </tbody>\n",
367 |        "</table>\n",
368 |        "</div>"
369 |       ],
370 |       "text/plain": [
371 |        "               min  max\n",
372 |        "occupation             \n",
373 |        "administrator   21   70\n",
374 |        "artist          19   48\n",
375 |        "doctor          28   64\n",
376 |        "educator        23   63\n",
377 |        "engineer        22   70\n",
378 |        "entertainment   15   50\n",
379 |        "executive       22   69\n",
380 |        "healthcare      22   62\n",
381 |        "homemaker       20   50\n",
382 |        "lawyer          21   53\n",
383 |        "librarian       23   69\n",
384 |        "marketing       24   55\n",
385 |        "none            11   55\n",
386 |        "other           13   64\n",
387 |        "programmer      20   63\n",
388 |        "retired         51   73\n",
389 |        "salesman        18   66\n",
390 |        "scientist       23   55\n",
391 |        "student          7   42\n",
392 |        "technician      21   55\n",
393 |        "writer          18   60"
394 |       ]
395 |      },
396 |      "execution_count": 151,
397 |      "metadata": {},
398 |      "output_type": "execute_result"
399 |     }
400 |    ],
401 |    "source": [
402 |     "users.groupby('occupation').age.agg(['min', 'max'])"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "### Step 7. For each combination of occupation and gender, calculate the mean age"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 152,
415 |    "metadata": {},
416 |    "outputs": [
417 |     {
418 |      "data": {
419 |       "text/plain": [
420 |        "occupation     gender\n",
421 |        "administrator  F         40.638889\n",
422 |        "               M         37.162791\n",
423 |        "artist         F         30.307692\n",
424 |        "               M         32.333333\n",
425 |        "doctor         M         43.571429\n",
426 |        "educator       F         39.115385\n",
427 |        "               M         43.101449\n",
428 |        "engineer       F         29.500000\n",
429 |        "               M         36.600000\n",
430 |        "entertainment  F         31.000000\n",
431 |        "               M         29.000000\n",
432 |        "executive      F         44.000000\n",
433 |        "               M         38.172414\n",
434 |        "healthcare     F         39.818182\n",
435 |        "               M         45.400000\n",
436 |        "homemaker      F         34.166667\n",
437 |        "               M         23.000000\n",
438 |        "lawyer         F         39.500000\n",
439 |        "               M         36.200000\n",
440 |        "librarian      F         40.000000\n",
441 |        "               M         40.000000\n",
442 |        "marketing      F         37.200000\n",
443 |        "               M         37.875000\n",
444 |        "none           F         36.500000\n",
445 |        "               M         18.600000\n",
446 |        "other          F         35.472222\n",
447 |        "               M         34.028986\n",
448 |        "programmer     F         32.166667\n",
449 |        "               M         33.216667\n",
450 |        "retired        F         70.000000\n",
451 |        "               M         62.538462\n",
452 |        "salesman       F         27.000000\n",
453 |        "               M         38.555556\n",
454 |        "scientist      F         28.333333\n",
455 |        "               M         36.321429\n",
456 |        "student        F         20.750000\n",
457 |        "               M         22.669118\n",
458 |        "technician     F         38.000000\n",
459 |        "               M         32.961538\n",
460 |        "writer         F         37.631579\n",
461 |        "               M         35.346154\n",
462 |        "Name: age, dtype: float64"
463 |       ]
464 |      },
465 |      "execution_count": 152,
466 |      "metadata": {},
467 |      "output_type": "execute_result"
468 |     }
469 |    ],
470 |    "source": [
471 |     "users.groupby(['occupation', 'gender']).age.mean()"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "markdown",
476 |    "metadata": {},
477 |    "source": [
478 |     "### Step 8.  For each occupation present the percentage of women and men"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": 154,
484 |    "metadata": {},
485 |    "outputs": [
486 |     {
487 |      "data": {
488 |       "text/plain": [
489 |        "occupation     gender\n",
490 |        "administrator  F          45.569620\n",
491 |        "               M          54.430380\n",
492 |        "artist         F          46.428571\n",
493 |        "               M          53.571429\n",
494 |        "doctor         M         100.000000\n",
495 |        "educator       F          27.368421\n",
496 |        "               M          72.631579\n",
497 |        "engineer       F           2.985075\n",
498 |        "               M          97.014925\n",
499 |        "entertainment  F          11.111111\n",
500 |        "               M          88.888889\n",
501 |        "executive      F           9.375000\n",
502 |        "               M          90.625000\n",
503 |        "healthcare     F          68.750000\n",
504 |        "               M          31.250000\n",
505 |        "homemaker      F          85.714286\n",
506 |        "               M          14.285714\n",
507 |        "lawyer         F          16.666667\n",
508 |        "               M          83.333333\n",
509 |        "librarian      F          56.862745\n",
510 |        "               M          43.137255\n",
511 |        "marketing      F          38.461538\n",
512 |        "               M          61.538462\n",
513 |        "none           F          44.444444\n",
514 |        "               M          55.555556\n",
515 |        "other          F          34.285714\n",
516 |        "               M          65.714286\n",
517 |        "programmer     F           9.090909\n",
518 |        "               M          90.909091\n",
519 |        "retired        F           7.142857\n",
520 |        "               M          92.857143\n",
521 |        "salesman       F          25.000000\n",
522 |        "               M          75.000000\n",
523 |        "scientist      F           9.677419\n",
524 |        "               M          90.322581\n",
525 |        "student        F          30.612245\n",
526 |        "               M          69.387755\n",
527 |        "technician     F           3.703704\n",
528 |        "               M          96.296296\n",
529 |        "writer         F          42.222222\n",
530 |        "               M          57.777778\n",
531 |        "Name: gender, dtype: float64"
532 |       ]
533 |      },
534 |      "execution_count": 154,
535 |      "metadata": {},
536 |      "output_type": "execute_result"
537 |     }
538 |    ],
539 |    "source": [
540 |     "# create a data frame and apply count to gender\n",
541 |     "gender_ocup = users.groupby(['occupation', 'gender']).agg({'gender': 'count'})\n",
542 |     "\n",
543 |     "# create a DataFrame and apply count for each occupation\n",
544 |     "occup_count = users.groupby(['occupation']).agg('count')\n",
545 |     "\n",
546 |     "# divide the gender_ocup per the occup_count and multiply per 100\n",
547 |     "occup_gender = gender_ocup.div(occup_count, level = \"occupation\") * 100\n",
548 |     "\n",
549 |     "# present all rows from the 'gender column'\n",
550 |     "occup_gender.loc[: , 'gender']"
551 |    ]
552 |   }
553 |  ],
554 |  "metadata": {
555 |   "kernelspec": {
556 |    "display_name": "Python 3.9.7 ('base')",
557 |    "language": "python",
558 |    "name": "python3"
559 |   },
560 |   "language_info": {
561 |    "codemirror_mode": {
562 |     "name": "ipython",
563 |     "version": 3
564 |    },
565 |    "file_extension": ".py",
566 |    "mimetype": "text/x-python",
567 |    "name": "python",
568 |    "nbconvert_exporter": "python",
569 |    "pygments_lexer": "ipython3",
570 |    "version": "3.9.7"
571 |   },
572 |   "toc": {
573 |    "base_numbering": 1,
574 |    "nav_menu": {},
575 |    "number_sections": true,
576 |    "sideBar": true,
577 |    "skip_h1_title": false,
578 |    "title_cell": "Table of Contents",
579 |    "title_sidebar": "Contents",
580 |    "toc_cell": false,
581 |    "toc_position": {},
582 |    "toc_section_display": true,
583 |    "toc_window_display": false
584 |   },
585 |   "vscode": {
586 |    "interpreter": {
587 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
588 |    }
589 |   }
590 |  },
591 |  "nbformat": 4,
592 |  "nbformat_minor": 1
593 | }
594 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Solutions/09_grouping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Regiment"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### Step 2. Create the DataFrame with the following values:"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 4,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n",
 35 |     "        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n",
 36 |     "        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n",
 37 |     "        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n",
 38 |     "        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "### Step 3. Assign it to a variable called regiment.\n",
 46 |     "#### Don't forget to name each column"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 6,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/html": [
 57 |        "<div>\n",
 58 |        "<table border=\"1\" class=\"dataframe\">\n",
 59 |        "  <thead>\n",
 60 |        "    <tr style=\"text-align: right;\">\n",
 61 |        "      <th></th>\n",
 62 |        "      <th>regiment</th>\n",
 63 |        "      <th>company</th>\n",
 64 |        "      <th>name</th>\n",
 65 |        "      <th>preTestScore</th>\n",
 66 |        "      <th>postTestScore</th>\n",
 67 |        "    </tr>\n",
 68 |        "  </thead>\n",
 69 |        "  <tbody>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>0</th>\n",
 72 |        "      <td>Nighthawks</td>\n",
 73 |        "      <td>1st</td>\n",
 74 |        "      <td>Miller</td>\n",
 75 |        "      <td>4</td>\n",
 76 |        "      <td>25</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>1</th>\n",
 80 |        "      <td>Nighthawks</td>\n",
 81 |        "      <td>1st</td>\n",
 82 |        "      <td>Jacobson</td>\n",
 83 |        "      <td>24</td>\n",
 84 |        "      <td>94</td>\n",
 85 |        "    </tr>\n",
 86 |        "    <tr>\n",
 87 |        "      <th>2</th>\n",
 88 |        "      <td>Nighthawks</td>\n",
 89 |        "      <td>2nd</td>\n",
 90 |        "      <td>Ali</td>\n",
 91 |        "      <td>31</td>\n",
 92 |        "      <td>57</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>3</th>\n",
 96 |        "      <td>Nighthawks</td>\n",
 97 |        "      <td>2nd</td>\n",
 98 |        "      <td>Milner</td>\n",
 99 |        "      <td>2</td>\n",
100 |        "      <td>62</td>\n",
101 |        "    </tr>\n",
102 |        "    <tr>\n",
103 |        "      <th>4</th>\n",
104 |        "      <td>Dragoons</td>\n",
105 |        "      <td>1st</td>\n",
106 |        "      <td>Cooze</td>\n",
107 |        "      <td>3</td>\n",
108 |        "      <td>70</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>5</th>\n",
112 |        "      <td>Dragoons</td>\n",
113 |        "      <td>1st</td>\n",
114 |        "      <td>Jacon</td>\n",
115 |        "      <td>4</td>\n",
116 |        "      <td>25</td>\n",
117 |        "    </tr>\n",
118 |        "    <tr>\n",
119 |        "      <th>6</th>\n",
120 |        "      <td>Dragoons</td>\n",
121 |        "      <td>2nd</td>\n",
122 |        "      <td>Ryaner</td>\n",
123 |        "      <td>24</td>\n",
124 |        "      <td>94</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>7</th>\n",
128 |        "      <td>Dragoons</td>\n",
129 |        "      <td>2nd</td>\n",
130 |        "      <td>Sone</td>\n",
131 |        "      <td>31</td>\n",
132 |        "      <td>57</td>\n",
133 |        "    </tr>\n",
134 |        "    <tr>\n",
135 |        "      <th>8</th>\n",
136 |        "      <td>Scouts</td>\n",
137 |        "      <td>1st</td>\n",
138 |        "      <td>Sloan</td>\n",
139 |        "      <td>2</td>\n",
140 |        "      <td>62</td>\n",
141 |        "    </tr>\n",
142 |        "    <tr>\n",
143 |        "      <th>9</th>\n",
144 |        "      <td>Scouts</td>\n",
145 |        "      <td>1st</td>\n",
146 |        "      <td>Piger</td>\n",
147 |        "      <td>3</td>\n",
148 |        "      <td>70</td>\n",
149 |        "    </tr>\n",
150 |        "    <tr>\n",
151 |        "      <th>10</th>\n",
152 |        "      <td>Scouts</td>\n",
153 |        "      <td>2nd</td>\n",
154 |        "      <td>Riani</td>\n",
155 |        "      <td>2</td>\n",
156 |        "      <td>62</td>\n",
157 |        "    </tr>\n",
158 |        "    <tr>\n",
159 |        "      <th>11</th>\n",
160 |        "      <td>Scouts</td>\n",
161 |        "      <td>2nd</td>\n",
162 |        "      <td>Ali</td>\n",
163 |        "      <td>3</td>\n",
164 |        "      <td>70</td>\n",
165 |        "    </tr>\n",
166 |        "  </tbody>\n",
167 |        "</table>\n",
168 |        "</div>"
169 |       ],
170 |       "text/plain": [
171 |        "      regiment company      name  preTestScore  postTestScore\n",
172 |        "0   Nighthawks     1st    Miller             4             25\n",
173 |        "1   Nighthawks     1st  Jacobson            24             94\n",
174 |        "2   Nighthawks     2nd       Ali            31             57\n",
175 |        "3   Nighthawks     2nd    Milner             2             62\n",
176 |        "4     Dragoons     1st     Cooze             3             70\n",
177 |        "5     Dragoons     1st     Jacon             4             25\n",
178 |        "6     Dragoons     2nd    Ryaner            24             94\n",
179 |        "7     Dragoons     2nd      Sone            31             57\n",
180 |        "8       Scouts     1st     Sloan             2             62\n",
181 |        "9       Scouts     1st     Piger             3             70\n",
182 |        "10      Scouts     2nd     Riani             2             62\n",
183 |        "11      Scouts     2nd       Ali             3             70"
184 |       ]
185 |      },
186 |      "execution_count": 6,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "regiment = pd.DataFrame(raw_data, columns = raw_data.keys())\n",
193 |     "regiment"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "### Step 4. What is the mean preTestScore from the regiment Nighthawks?  "
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 26,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/html": [
211 |        "<div>\n",
212 |        "<table border=\"1\" class=\"dataframe\">\n",
213 |        "  <thead>\n",
214 |        "    <tr style=\"text-align: right;\">\n",
215 |        "      <th></th>\n",
216 |        "      <th>preTestScore</th>\n",
217 |        "      <th>postTestScore</th>\n",
218 |        "    </tr>\n",
219 |        "    <tr>\n",
220 |        "      <th>regiment</th>\n",
221 |        "      <th></th>\n",
222 |        "      <th></th>\n",
223 |        "    </tr>\n",
224 |        "  </thead>\n",
225 |        "  <tbody>\n",
226 |        "    <tr>\n",
227 |        "      <th>Dragoons</th>\n",
228 |        "      <td>15.50</td>\n",
229 |        "      <td>61.5</td>\n",
230 |        "    </tr>\n",
231 |        "    <tr>\n",
232 |        "      <th>Nighthawks</th>\n",
233 |        "      <td>15.25</td>\n",
234 |        "      <td>59.5</td>\n",
235 |        "    </tr>\n",
236 |        "    <tr>\n",
237 |        "      <th>Scouts</th>\n",
238 |        "      <td>2.50</td>\n",
239 |        "      <td>66.0</td>\n",
240 |        "    </tr>\n",
241 |        "  </tbody>\n",
242 |        "</table>\n",
243 |        "</div>"
244 |       ],
245 |       "text/plain": [
246 |        "            preTestScore  postTestScore\n",
247 |        "regiment                               \n",
248 |        "Dragoons           15.50           61.5\n",
249 |        "Nighthawks         15.25           59.5\n",
250 |        "Scouts              2.50           66.0"
251 |       ]
252 |      },
253 |      "execution_count": 26,
254 |      "metadata": {},
255 |      "output_type": "execute_result"
256 |     }
257 |    ],
258 |    "source": [
259 |     "regiment[regiment['regiment'] == 'Nighthawks'].groupby('regiment').mean()"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "### Step 5. Present general statistics by company"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 29,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "data": {
276 |       "text/html": [
277 |        "<div>\n",
278 |        "<table border=\"1\" class=\"dataframe\">\n",
279 |        "  <thead>\n",
280 |        "    <tr style=\"text-align: right;\">\n",
281 |        "      <th></th>\n",
282 |        "      <th></th>\n",
283 |        "      <th>postTestScore</th>\n",
284 |        "      <th>preTestScore</th>\n",
285 |        "    </tr>\n",
286 |        "    <tr>\n",
287 |        "      <th>company</th>\n",
288 |        "      <th></th>\n",
289 |        "      <th></th>\n",
290 |        "      <th></th>\n",
291 |        "    </tr>\n",
292 |        "  </thead>\n",
293 |        "  <tbody>\n",
294 |        "    <tr>\n",
295 |        "      <th rowspan=\"8\" valign=\"top\">1st</th>\n",
296 |        "      <th>count</th>\n",
297 |        "      <td>6.000000</td>\n",
298 |        "      <td>6.000000</td>\n",
299 |        "    </tr>\n",
300 |        "    <tr>\n",
301 |        "      <th>mean</th>\n",
302 |        "      <td>57.666667</td>\n",
303 |        "      <td>6.666667</td>\n",
304 |        "    </tr>\n",
305 |        "    <tr>\n",
306 |        "      <th>std</th>\n",
307 |        "      <td>27.485754</td>\n",
308 |        "      <td>8.524475</td>\n",
309 |        "    </tr>\n",
310 |        "    <tr>\n",
311 |        "      <th>min</th>\n",
312 |        "      <td>25.000000</td>\n",
313 |        "      <td>2.000000</td>\n",
314 |        "    </tr>\n",
315 |        "    <tr>\n",
316 |        "      <th>25%</th>\n",
317 |        "      <td>34.250000</td>\n",
318 |        "      <td>3.000000</td>\n",
319 |        "    </tr>\n",
320 |        "    <tr>\n",
321 |        "      <th>50%</th>\n",
322 |        "      <td>66.000000</td>\n",
323 |        "      <td>3.500000</td>\n",
324 |        "    </tr>\n",
325 |        "    <tr>\n",
326 |        "      <th>75%</th>\n",
327 |        "      <td>70.000000</td>\n",
328 |        "      <td>4.000000</td>\n",
329 |        "    </tr>\n",
330 |        "    <tr>\n",
331 |        "      <th>max</th>\n",
332 |        "      <td>94.000000</td>\n",
333 |        "      <td>24.000000</td>\n",
334 |        "    </tr>\n",
335 |        "    <tr>\n",
336 |        "      <th rowspan=\"8\" valign=\"top\">2nd</th>\n",
337 |        "      <th>count</th>\n",
338 |        "      <td>6.000000</td>\n",
339 |        "      <td>6.000000</td>\n",
340 |        "    </tr>\n",
341 |        "    <tr>\n",
342 |        "      <th>mean</th>\n",
343 |        "      <td>67.000000</td>\n",
344 |        "      <td>15.500000</td>\n",
345 |        "    </tr>\n",
346 |        "    <tr>\n",
347 |        "      <th>std</th>\n",
348 |        "      <td>14.057027</td>\n",
349 |        "      <td>14.652645</td>\n",
350 |        "    </tr>\n",
351 |        "    <tr>\n",
352 |        "      <th>min</th>\n",
353 |        "      <td>57.000000</td>\n",
354 |        "      <td>2.000000</td>\n",
355 |        "    </tr>\n",
356 |        "    <tr>\n",
357 |        "      <th>25%</th>\n",
358 |        "      <td>58.250000</td>\n",
359 |        "      <td>2.250000</td>\n",
360 |        "    </tr>\n",
361 |        "    <tr>\n",
362 |        "      <th>50%</th>\n",
363 |        "      <td>62.000000</td>\n",
364 |        "      <td>13.500000</td>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>75%</th>\n",
368 |        "      <td>68.000000</td>\n",
369 |        "      <td>29.250000</td>\n",
370 |        "    </tr>\n",
371 |        "    <tr>\n",
372 |        "      <th>max</th>\n",
373 |        "      <td>94.000000</td>\n",
374 |        "      <td>31.000000</td>\n",
375 |        "    </tr>\n",
376 |        "  </tbody>\n",
377 |        "</table>\n",
378 |        "</div>"
379 |       ],
380 |       "text/plain": [
381 |        "               postTestScore  preTestScore\n",
382 |        "company                                   \n",
383 |        "1st     count       6.000000      6.000000\n",
384 |        "        mean       57.666667      6.666667\n",
385 |        "        std        27.485754      8.524475\n",
386 |        "        min        25.000000      2.000000\n",
387 |        "        25%        34.250000      3.000000\n",
388 |        "        50%        66.000000      3.500000\n",
389 |        "        75%        70.000000      4.000000\n",
390 |        "        max        94.000000     24.000000\n",
391 |        "2nd     count       6.000000      6.000000\n",
392 |        "        mean       67.000000     15.500000\n",
393 |        "        std        14.057027     14.652645\n",
394 |        "        min        57.000000      2.000000\n",
395 |        "        25%        58.250000      2.250000\n",
396 |        "        50%        62.000000     13.500000\n",
397 |        "        75%        68.000000     29.250000\n",
398 |        "        max        94.000000     31.000000"
399 |       ]
400 |      },
401 |      "execution_count": 29,
402 |      "metadata": {},
403 |      "output_type": "execute_result"
404 |     }
405 |    ],
406 |    "source": [
407 |     "regiment.groupby('company').describe()"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "markdown",
412 |    "metadata": {},
413 |    "source": [
414 |     "### Step 6. What is the mean of each company's preTestScore?"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 33,
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "data": {
424 |       "text/plain": [
425 |        "company\n",
426 |        "1st     6.666667\n",
427 |        "2nd    15.500000\n",
428 |        "Name: preTestScore, dtype: float64"
429 |       ]
430 |      },
431 |      "execution_count": 33,
432 |      "metadata": {},
433 |      "output_type": "execute_result"
434 |     }
435 |    ],
436 |    "source": [
437 |     "regiment.groupby('company').preTestScore.mean()"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "markdown",
442 |    "metadata": {},
443 |    "source": [
444 |     "### Step 7. Present the mean preTestScores grouped by regiment and company"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": 35,
450 |    "metadata": {},
451 |    "outputs": [
452 |     {
453 |      "data": {
454 |       "text/plain": [
455 |        "regiment    company\n",
456 |        "Dragoons    1st         3.5\n",
457 |        "            2nd        27.5\n",
458 |        "Nighthawks  1st        14.0\n",
459 |        "            2nd        16.5\n",
460 |        "Scouts      1st         2.5\n",
461 |        "            2nd         2.5\n",
462 |        "Name: preTestScore, dtype: float64"
463 |       ]
464 |      },
465 |      "execution_count": 35,
466 |      "metadata": {},
467 |      "output_type": "execute_result"
468 |     }
469 |    ],
470 |    "source": [
471 |     "regiment.groupby(['regiment', 'company']).preTestScore.mean()"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "markdown",
476 |    "metadata": {},
477 |    "source": [
478 |     "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": 36,
484 |    "metadata": {},
485 |    "outputs": [
486 |     {
487 |      "data": {
488 |       "text/html": [
489 |        "<div>\n",
490 |        "<table border=\"1\" class=\"dataframe\">\n",
491 |        "  <thead>\n",
492 |        "    <tr style=\"text-align: right;\">\n",
493 |        "      <th>company</th>\n",
494 |        "      <th>1st</th>\n",
495 |        "      <th>2nd</th>\n",
496 |        "    </tr>\n",
497 |        "    <tr>\n",
498 |        "      <th>regiment</th>\n",
499 |        "      <th></th>\n",
500 |        "      <th></th>\n",
501 |        "    </tr>\n",
502 |        "  </thead>\n",
503 |        "  <tbody>\n",
504 |        "    <tr>\n",
505 |        "      <th>Dragoons</th>\n",
506 |        "      <td>3.5</td>\n",
507 |        "      <td>27.5</td>\n",
508 |        "    </tr>\n",
509 |        "    <tr>\n",
510 |        "      <th>Nighthawks</th>\n",
511 |        "      <td>14.0</td>\n",
512 |        "      <td>16.5</td>\n",
513 |        "    </tr>\n",
514 |        "    <tr>\n",
515 |        "      <th>Scouts</th>\n",
516 |        "      <td>2.5</td>\n",
517 |        "      <td>2.5</td>\n",
518 |        "    </tr>\n",
519 |        "  </tbody>\n",
520 |        "</table>\n",
521 |        "</div>"
522 |       ],
523 |       "text/plain": [
524 |        "company      1st   2nd\n",
525 |        "regiment              \n",
526 |        "Dragoons     3.5  27.5\n",
527 |        "Nighthawks  14.0  16.5\n",
528 |        "Scouts       2.5   2.5"
529 |       ]
530 |      },
531 |      "execution_count": 36,
532 |      "metadata": {},
533 |      "output_type": "execute_result"
534 |     }
535 |    ],
536 |    "source": [
537 |     "regiment.groupby(['regiment', 'company']).preTestScore.mean().unstack()"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "markdown",
542 |    "metadata": {},
543 |    "source": [
544 |     "### Step 9. Group the entire dataframe by regiment and company"
545 |    ]
546 |   },
547 |   {
548 |    "cell_type": "code",
549 |    "execution_count": 37,
550 |    "metadata": {},
551 |    "outputs": [
552 |     {
553 |      "data": {
554 |       "text/html": [
555 |        "<div>\n",
556 |        "<table border=\"1\" class=\"dataframe\">\n",
557 |        "  <thead>\n",
558 |        "    <tr style=\"text-align: right;\">\n",
559 |        "      <th></th>\n",
560 |        "      <th></th>\n",
561 |        "      <th>preTestScore</th>\n",
562 |        "      <th>postTestScore</th>\n",
563 |        "    </tr>\n",
564 |        "    <tr>\n",
565 |        "      <th>regiment</th>\n",
566 |        "      <th>company</th>\n",
567 |        "      <th></th>\n",
568 |        "      <th></th>\n",
569 |        "    </tr>\n",
570 |        "  </thead>\n",
571 |        "  <tbody>\n",
572 |        "    <tr>\n",
573 |        "      <th rowspan=\"2\" valign=\"top\">Dragoons</th>\n",
574 |        "      <th>1st</th>\n",
575 |        "      <td>3.5</td>\n",
576 |        "      <td>47.5</td>\n",
577 |        "    </tr>\n",
578 |        "    <tr>\n",
579 |        "      <th>2nd</th>\n",
580 |        "      <td>27.5</td>\n",
581 |        "      <td>75.5</td>\n",
582 |        "    </tr>\n",
583 |        "    <tr>\n",
584 |        "      <th rowspan=\"2\" valign=\"top\">Nighthawks</th>\n",
585 |        "      <th>1st</th>\n",
586 |        "      <td>14.0</td>\n",
587 |        "      <td>59.5</td>\n",
588 |        "    </tr>\n",
589 |        "    <tr>\n",
590 |        "      <th>2nd</th>\n",
591 |        "      <td>16.5</td>\n",
592 |        "      <td>59.5</td>\n",
593 |        "    </tr>\n",
594 |        "    <tr>\n",
595 |        "      <th rowspan=\"2\" valign=\"top\">Scouts</th>\n",
596 |        "      <th>1st</th>\n",
597 |        "      <td>2.5</td>\n",
598 |        "      <td>66.0</td>\n",
599 |        "    </tr>\n",
600 |        "    <tr>\n",
601 |        "      <th>2nd</th>\n",
602 |        "      <td>2.5</td>\n",
603 |        "      <td>66.0</td>\n",
604 |        "    </tr>\n",
605 |        "  </tbody>\n",
606 |        "</table>\n",
607 |        "</div>"
608 |       ],
609 |       "text/plain": [
610 |        "                    preTestScore  postTestScore\n",
611 |        "regiment   company                             \n",
612 |        "Dragoons   1st               3.5           47.5\n",
613 |        "           2nd              27.5           75.5\n",
614 |        "Nighthawks 1st              14.0           59.5\n",
615 |        "           2nd              16.5           59.5\n",
616 |        "Scouts     1st               2.5           66.0\n",
617 |        "           2nd               2.5           66.0"
618 |       ]
619 |      },
620 |      "execution_count": 37,
621 |      "metadata": {},
622 |      "output_type": "execute_result"
623 |     }
624 |    ],
625 |    "source": [
626 |     "regiment.groupby(['regiment', 'company']).mean()"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "markdown",
631 |    "metadata": {},
632 |    "source": [
633 |     "### Step 10. What is the number of observations in each regiment and company"
634 |    ]
635 |   },
636 |   {
637 |    "cell_type": "code",
638 |    "execution_count": 41,
639 |    "metadata": {},
640 |    "outputs": [
641 |     {
642 |      "data": {
643 |       "text/plain": [
644 |        "company  regiment  \n",
645 |        "1st      Dragoons      2\n",
646 |        "         Nighthawks    2\n",
647 |        "         Scouts        2\n",
648 |        "2nd      Dragoons      2\n",
649 |        "         Nighthawks    2\n",
650 |        "         Scouts        2\n",
651 |        "dtype: int64"
652 |       ]
653 |      },
654 |      "execution_count": 41,
655 |      "metadata": {},
656 |      "output_type": "execute_result"
657 |     }
658 |    ],
659 |    "source": [
660 |     "regiment.groupby(['company', 'regiment']).size()"
661 |    ]
662 |   },
663 |   {
664 |    "cell_type": "markdown",
665 |    "metadata": {},
666 |    "source": [
667 |     "### Step 11. Iterate over a group and print the name and the whole data from the regiment"
668 |    ]
669 |   },
670 |   {
671 |    "cell_type": "code",
672 |    "execution_count": 50,
673 |    "metadata": {},
674 |    "outputs": [
675 |     {
676 |      "name": "stdout",
677 |      "output_type": "stream",
678 |      "text": [
679 |       "Dragoons\n",
680 |       "   regiment company    name  preTestScore  postTestScore\n",
681 |       "4  Dragoons     1st   Cooze             3             70\n",
682 |       "5  Dragoons     1st   Jacon             4             25\n",
683 |       "6  Dragoons     2nd  Ryaner            24             94\n",
684 |       "7  Dragoons     2nd    Sone            31             57\n",
685 |       "Nighthawks\n",
686 |       "     regiment company      name  preTestScore  postTestScore\n",
687 |       "0  Nighthawks     1st    Miller             4             25\n",
688 |       "1  Nighthawks     1st  Jacobson            24             94\n",
689 |       "2  Nighthawks     2nd       Ali            31             57\n",
690 |       "3  Nighthawks     2nd    Milner             2             62\n",
691 |       "Scouts\n",
692 |       "   regiment company   name  preTestScore  postTestScore\n",
693 |       "8    Scouts     1st  Sloan             2             62\n",
694 |       "9    Scouts     1st  Piger             3             70\n",
695 |       "10   Scouts     2nd  Riani             2             62\n",
696 |       "11   Scouts     2nd    Ali             3             70\n"
697 |      ]
698 |     }
699 |    ],
700 |    "source": [
701 |     "# Group the dataframe by regiment, and for each regiment,\n",
702 |     "for name, group in regiment.groupby('regiment'):\n",
703 |     "    # print the name of the regiment\n",
704 |     "    print(name)\n",
705 |     "    # print the data of that regiment\n",
706 |     "    print(group)"
707 |    ]
708 |   }
709 |  ],
710 |  "metadata": {
711 |   "kernelspec": {
712 |    "display_name": "Python 3.9.7 ('base')",
713 |    "language": "python",
714 |    "name": "python3"
715 |   },
716 |   "language_info": {
717 |    "codemirror_mode": {
718 |     "name": "ipython",
719 |     "version": 3
720 |    },
721 |    "file_extension": ".py",
722 |    "mimetype": "text/x-python",
723 |    "name": "python",
724 |    "nbconvert_exporter": "python",
725 |    "pygments_lexer": "ipython3",
726 |    "version": "3.9.7"
727 |   },
728 |   "toc": {
729 |    "base_numbering": 1,
730 |    "nav_menu": {},
731 |    "number_sections": true,
732 |    "sideBar": true,
733 |    "skip_h1_title": false,
734 |    "title_cell": "Table of Contents",
735 |    "title_sidebar": "Contents",
736 |    "toc_cell": false,
737 |    "toc_position": {},
738 |    "toc_section_display": true,
739 |    "toc_window_display": false
740 |   },
741 |   "vscode": {
742 |    "interpreter": {
743 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
744 |    }
745 |   }
746 |  },
747 |  "nbformat": 4,
748 |  "nbformat_minor": 1
749 | }
750 | 


--------------------------------------------------------------------------------
/02_pandas_tips&tricks/Solutions/01_Know_your_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Ex1 - Know your Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Step 1. Import the necessary libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 34,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import pandas as pd\n",
 26 |     "import numpy as np"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Step 3. Assign it to a variable called chipo."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 35,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n",
 52 |     "\n",
 53 |     "chipo = pd.read_csv(url, sep='\\t')"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "### Step 4. See the first 10 entries"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 36,
 66 |    "metadata": {
 67 |     "collapsed": false,
 68 |     "scrolled": false
 69 |    },
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/html": [
 74 |        "<div>\n",
 75 |        "<style scoped>\n",
 76 |        "    .dataframe tbody tr th:only-of-type {\n",
 77 |        "        vertical-align: middle;\n",
 78 |        "    }\n",
 79 |        "\n",
 80 |        "    .dataframe tbody tr th {\n",
 81 |        "        vertical-align: top;\n",
 82 |        "    }\n",
 83 |        "\n",
 84 |        "    .dataframe thead th {\n",
 85 |        "        text-align: right;\n",
 86 |        "    }\n",
 87 |        "</style>\n",
 88 |        "<table border=\"1\" class=\"dataframe\">\n",
 89 |        "  <thead>\n",
 90 |        "    <tr style=\"text-align: right;\">\n",
 91 |        "      <th></th>\n",
 92 |        "      <th>order_id</th>\n",
 93 |        "      <th>quantity</th>\n",
 94 |        "      <th>item_name</th>\n",
 95 |        "      <th>choice_description</th>\n",
 96 |        "      <th>item_price</th>\n",
 97 |        "    </tr>\n",
 98 |        "  </thead>\n",
 99 |        "  <tbody>\n",
100 |        "    <tr>\n",
101 |        "      <th>0</th>\n",
102 |        "      <td>1</td>\n",
103 |        "      <td>1</td>\n",
104 |        "      <td>Chips and Fresh Tomato Salsa</td>\n",
105 |        "      <td>NaN</td>\n",
106 |        "      <td>$2.39</td>\n",
107 |        "    </tr>\n",
108 |        "    <tr>\n",
109 |        "      <th>1</th>\n",
110 |        "      <td>1</td>\n",
111 |        "      <td>1</td>\n",
112 |        "      <td>Izze</td>\n",
113 |        "      <td>[Clementine]</td>\n",
114 |        "      <td>$3.39</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>2</th>\n",
118 |        "      <td>1</td>\n",
119 |        "      <td>1</td>\n",
120 |        "      <td>Nantucket Nectar</td>\n",
121 |        "      <td>[Apple]</td>\n",
122 |        "      <td>$3.39</td>\n",
123 |        "    </tr>\n",
124 |        "    <tr>\n",
125 |        "      <th>3</th>\n",
126 |        "      <td>1</td>\n",
127 |        "      <td>1</td>\n",
128 |        "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
129 |        "      <td>NaN</td>\n",
130 |        "      <td>$2.39</td>\n",
131 |        "    </tr>\n",
132 |        "    <tr>\n",
133 |        "      <th>4</th>\n",
134 |        "      <td>2</td>\n",
135 |        "      <td>2</td>\n",
136 |        "      <td>Chicken Bowl</td>\n",
137 |        "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
138 |        "      <td>$16.98</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>5</th>\n",
142 |        "      <td>3</td>\n",
143 |        "      <td>1</td>\n",
144 |        "      <td>Chicken Bowl</td>\n",
145 |        "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
146 |        "      <td>$10.98</td>\n",
147 |        "    </tr>\n",
148 |        "    <tr>\n",
149 |        "      <th>6</th>\n",
150 |        "      <td>3</td>\n",
151 |        "      <td>1</td>\n",
152 |        "      <td>Side of Chips</td>\n",
153 |        "      <td>NaN</td>\n",
154 |        "      <td>$1.69</td>\n",
155 |        "    </tr>\n",
156 |        "    <tr>\n",
157 |        "      <th>7</th>\n",
158 |        "      <td>4</td>\n",
159 |        "      <td>1</td>\n",
160 |        "      <td>Steak Burrito</td>\n",
161 |        "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
162 |        "      <td>$11.75</td>\n",
163 |        "    </tr>\n",
164 |        "    <tr>\n",
165 |        "      <th>8</th>\n",
166 |        "      <td>4</td>\n",
167 |        "      <td>1</td>\n",
168 |        "      <td>Steak Soft Tacos</td>\n",
169 |        "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
170 |        "      <td>$9.25</td>\n",
171 |        "    </tr>\n",
172 |        "    <tr>\n",
173 |        "      <th>9</th>\n",
174 |        "      <td>5</td>\n",
175 |        "      <td>1</td>\n",
176 |        "      <td>Steak Burrito</td>\n",
177 |        "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
178 |        "      <td>$9.25</td>\n",
179 |        "    </tr>\n",
180 |        "  </tbody>\n",
181 |        "</table>\n",
182 |        "</div>"
183 |       ],
184 |       "text/plain": [
185 |        "   order_id  quantity                              item_name  \\\n",
186 |        "0         1         1           Chips and Fresh Tomato Salsa   \n",
187 |        "1         1         1                                   Izze   \n",
188 |        "2         1         1                       Nantucket Nectar   \n",
189 |        "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
190 |        "4         2         2                           Chicken Bowl   \n",
191 |        "5         3         1                           Chicken Bowl   \n",
192 |        "6         3         1                          Side of Chips   \n",
193 |        "7         4         1                          Steak Burrito   \n",
194 |        "8         4         1                       Steak Soft Tacos   \n",
195 |        "9         5         1                          Steak Burrito   \n",
196 |        "\n",
197 |        "                                  choice_description item_price  \n",
198 |        "0                                                NaN     $2.39   \n",
199 |        "1                                       [Clementine]     $3.39   \n",
200 |        "2                                            [Apple]     $3.39   \n",
201 |        "3                                                NaN     $2.39   \n",
202 |        "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...    $16.98   \n",
203 |        "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...    $10.98   \n",
204 |        "6                                                NaN     $1.69   \n",
205 |        "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...    $11.75   \n",
206 |        "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...     $9.25   \n",
207 |        "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...     $9.25   "
208 |       ]
209 |      },
210 |      "execution_count": 36,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "chipo.head(10)"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "### Step 5. What is the number of observations in the dataset?"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 37,
229 |    "metadata": {
230 |     "collapsed": false
231 |    },
232 |    "outputs": [
233 |     {
234 |      "name": "stdout",
235 |      "output_type": "stream",
236 |      "text": [
237 |       "The number of observation are: 4622\n"
238 |      ]
239 |     }
240 |    ],
241 |    "source": [
242 |     "# Solution 1\n",
243 |     "\n",
244 |     "print('The number of observation are:' , chipo.shape[0])"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 38,
250 |    "metadata": {
251 |     "collapsed": false
252 |    },
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "<class 'pandas.core.frame.DataFrame'>\n",
259 |       "RangeIndex: 4622 entries, 0 to 4621\n",
260 |       "Data columns (total 5 columns):\n",
261 |       " #   Column              Non-Null Count  Dtype \n",
262 |       "---  ------              --------------  ----- \n",
263 |       " 0   order_id            4622 non-null   int64 \n",
264 |       " 1   quantity            4622 non-null   int64 \n",
265 |       " 2   item_name           4622 non-null   object\n",
266 |       " 3   choice_description  3376 non-null   object\n",
267 |       " 4   item_price          4622 non-null   object\n",
268 |       "dtypes: int64(2), object(3)\n",
269 |       "memory usage: 180.7+ KB\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     "# Solution 2\n",
275 |     "chipo.info()\n"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "### Step 6. What is the number of columns in the dataset?"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": 39,
288 |    "metadata": {
289 |     "collapsed": false
290 |    },
291 |    "outputs": [
292 |     {
293 |      "data": {
294 |       "text/plain": [
295 |        "5"
296 |       ]
297 |      },
298 |      "execution_count": 39,
299 |      "metadata": {},
300 |      "output_type": "execute_result"
301 |     }
302 |    ],
303 |    "source": [
304 |     "chipo.shape[1]"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "### Step 7. Print the name of all the columns."
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 40,
317 |    "metadata": {
318 |     "collapsed": false
319 |    },
320 |    "outputs": [
321 |     {
322 |      "data": {
323 |       "text/plain": [
324 |        "Index(['order_id', 'quantity', 'item_name', 'choice_description',\n",
325 |        "       'item_price'],\n",
326 |        "      dtype='object')"
327 |       ]
328 |      },
329 |      "execution_count": 40,
330 |      "metadata": {},
331 |      "output_type": "execute_result"
332 |     }
333 |    ],
334 |    "source": [
335 |     "chipo.columns"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "### Step 8. How is the dataset indexed?"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 41,
348 |    "metadata": {
349 |     "collapsed": false
350 |    },
351 |    "outputs": [
352 |     {
353 |      "data": {
354 |       "text/plain": [
355 |        "RangeIndex(start=0, stop=4622, step=1)"
356 |       ]
357 |      },
358 |      "execution_count": 41,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "chipo.index"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "### Step 9. Which was the most-ordered item? "
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 42,
377 |    "metadata": {
378 |     "collapsed": false
379 |    },
380 |    "outputs": [
381 |     {
382 |      "data": {
383 |       "text/html": [
384 |        "<div>\n",
385 |        "<style scoped>\n",
386 |        "    .dataframe tbody tr th:only-of-type {\n",
387 |        "        vertical-align: middle;\n",
388 |        "    }\n",
389 |        "\n",
390 |        "    .dataframe tbody tr th {\n",
391 |        "        vertical-align: top;\n",
392 |        "    }\n",
393 |        "\n",
394 |        "    .dataframe thead th {\n",
395 |        "        text-align: right;\n",
396 |        "    }\n",
397 |        "</style>\n",
398 |        "<table border=\"1\" class=\"dataframe\">\n",
399 |        "  <thead>\n",
400 |        "    <tr style=\"text-align: right;\">\n",
401 |        "      <th></th>\n",
402 |        "      <th>order_id</th>\n",
403 |        "      <th>quantity</th>\n",
404 |        "    </tr>\n",
405 |        "    <tr>\n",
406 |        "      <th>item_name</th>\n",
407 |        "      <th></th>\n",
408 |        "      <th></th>\n",
409 |        "    </tr>\n",
410 |        "  </thead>\n",
411 |        "  <tbody>\n",
412 |        "    <tr>\n",
413 |        "      <th>Chicken Bowl</th>\n",
414 |        "      <td>713926</td>\n",
415 |        "      <td>761</td>\n",
416 |        "    </tr>\n",
417 |        "  </tbody>\n",
418 |        "</table>\n",
419 |        "</div>"
420 |       ],
421 |       "text/plain": [
422 |        "              order_id  quantity\n",
423 |        "item_name                       \n",
424 |        "Chicken Bowl    713926       761"
425 |       ]
426 |      },
427 |      "execution_count": 42,
428 |      "metadata": {},
429 |      "output_type": "execute_result"
430 |     }
431 |    ],
432 |    "source": [
433 |     "chipo_count = chipo.groupby('item_name').sum()\n",
434 |     "chipo_count_more = chipo_count.sort_values(['quantity'], ascending=False)\n",
435 |     "chipo_count_more.head(1)"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 43,
441 |    "metadata": {},
442 |    "outputs": [
443 |     {
444 |      "data": {
445 |       "text/html": [
446 |        "<div>\n",
447 |        "<style scoped>\n",
448 |        "    .dataframe tbody tr th:only-of-type {\n",
449 |        "        vertical-align: middle;\n",
450 |        "    }\n",
451 |        "\n",
452 |        "    .dataframe tbody tr th {\n",
453 |        "        vertical-align: top;\n",
454 |        "    }\n",
455 |        "\n",
456 |        "    .dataframe thead th {\n",
457 |        "        text-align: right;\n",
458 |        "    }\n",
459 |        "</style>\n",
460 |        "<table border=\"1\" class=\"dataframe\">\n",
461 |        "  <thead>\n",
462 |        "    <tr style=\"text-align: right;\">\n",
463 |        "      <th></th>\n",
464 |        "      <th>order_id</th>\n",
465 |        "      <th>quantity</th>\n",
466 |        "    </tr>\n",
467 |        "    <tr>\n",
468 |        "      <th>item_name</th>\n",
469 |        "      <th></th>\n",
470 |        "      <th></th>\n",
471 |        "    </tr>\n",
472 |        "  </thead>\n",
473 |        "  <tbody>\n",
474 |        "    <tr>\n",
475 |        "      <th>Chicken Bowl</th>\n",
476 |        "      <td>713926</td>\n",
477 |        "      <td>761</td>\n",
478 |        "    </tr>\n",
479 |        "  </tbody>\n",
480 |        "</table>\n",
481 |        "</div>"
482 |       ],
483 |       "text/plain": [
484 |        "              order_id  quantity\n",
485 |        "item_name                       \n",
486 |        "Chicken Bowl    713926       761"
487 |       ]
488 |      },
489 |      "execution_count": 43,
490 |      "metadata": {},
491 |      "output_type": "execute_result"
492 |     }
493 |    ],
494 |    "source": [
495 |     "# second way\n",
496 |     "c = chipo.groupby('item_name').sum()\n",
497 |     "c = c.sort_values(['quantity'], ascending=False)\n",
498 |     "c.head(1)"
499 |    ]
500 |   },
501 |   {
502 |    "cell_type": "markdown",
503 |    "metadata": {},
504 |    "source": [
505 |     "### Step 10. For the most-ordered item, how many items were ordered?"
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": 44,
511 |    "metadata": {
512 |     "collapsed": false
513 |    },
514 |    "outputs": [
515 |     {
516 |      "data": {
517 |       "text/html": [
518 |        "<div>\n",
519 |        "<style scoped>\n",
520 |        "    .dataframe tbody tr th:only-of-type {\n",
521 |        "        vertical-align: middle;\n",
522 |        "    }\n",
523 |        "\n",
524 |        "    .dataframe tbody tr th {\n",
525 |        "        vertical-align: top;\n",
526 |        "    }\n",
527 |        "\n",
528 |        "    .dataframe thead th {\n",
529 |        "        text-align: right;\n",
530 |        "    }\n",
531 |        "</style>\n",
532 |        "<table border=\"1\" class=\"dataframe\">\n",
533 |        "  <thead>\n",
534 |        "    <tr style=\"text-align: right;\">\n",
535 |        "      <th></th>\n",
536 |        "      <th>order_id</th>\n",
537 |        "      <th>quantity</th>\n",
538 |        "    </tr>\n",
539 |        "    <tr>\n",
540 |        "      <th>item_name</th>\n",
541 |        "      <th></th>\n",
542 |        "      <th></th>\n",
543 |        "    </tr>\n",
544 |        "  </thead>\n",
545 |        "  <tbody>\n",
546 |        "    <tr>\n",
547 |        "      <th>Chicken Bowl</th>\n",
548 |        "      <td>713926</td>\n",
549 |        "      <td>761</td>\n",
550 |        "    </tr>\n",
551 |        "  </tbody>\n",
552 |        "</table>\n",
553 |        "</div>"
554 |       ],
555 |       "text/plain": [
556 |        "              order_id  quantity\n",
557 |        "item_name                       \n",
558 |        "Chicken Bowl    713926       761"
559 |       ]
560 |      },
561 |      "execution_count": 44,
562 |      "metadata": {},
563 |      "output_type": "execute_result"
564 |     }
565 |    ],
566 |    "source": [
567 |     "c = chipo.groupby('item_name')\n",
568 |     "c = c.sum()\n",
569 |     "c = c.sort_values(['quantity'], ascending=False)\n",
570 |     "c.head(1)"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "markdown",
575 |    "metadata": {},
576 |    "source": [
577 |     "### Step 11. What was the most ordered item in the choice_description column?"
578 |    ]
579 |   },
580 |   {
581 |    "cell_type": "code",
582 |    "execution_count": 45,
583 |    "metadata": {
584 |     "collapsed": false
585 |    },
586 |    "outputs": [
587 |     {
588 |      "data": {
589 |       "text/html": [
590 |        "<div>\n",
591 |        "<style scoped>\n",
592 |        "    .dataframe tbody tr th:only-of-type {\n",
593 |        "        vertical-align: middle;\n",
594 |        "    }\n",
595 |        "\n",
596 |        "    .dataframe tbody tr th {\n",
597 |        "        vertical-align: top;\n",
598 |        "    }\n",
599 |        "\n",
600 |        "    .dataframe thead th {\n",
601 |        "        text-align: right;\n",
602 |        "    }\n",
603 |        "</style>\n",
604 |        "<table border=\"1\" class=\"dataframe\">\n",
605 |        "  <thead>\n",
606 |        "    <tr style=\"text-align: right;\">\n",
607 |        "      <th></th>\n",
608 |        "      <th>order_id</th>\n",
609 |        "      <th>quantity</th>\n",
610 |        "    </tr>\n",
611 |        "    <tr>\n",
612 |        "      <th>choice_description</th>\n",
613 |        "      <th></th>\n",
614 |        "      <th></th>\n",
615 |        "    </tr>\n",
616 |        "  </thead>\n",
617 |        "  <tbody>\n",
618 |        "    <tr>\n",
619 |        "      <th>[Diet Coke]</th>\n",
620 |        "      <td>123455</td>\n",
621 |        "      <td>159</td>\n",
622 |        "    </tr>\n",
623 |        "  </tbody>\n",
624 |        "</table>\n",
625 |        "</div>"
626 |       ],
627 |       "text/plain": [
628 |        "                    order_id  quantity\n",
629 |        "choice_description                    \n",
630 |        "[Diet Coke]           123455       159"
631 |       ]
632 |      },
633 |      "execution_count": 45,
634 |      "metadata": {},
635 |      "output_type": "execute_result"
636 |     }
637 |    ],
638 |    "source": [
639 |     "c = chipo.groupby('choice_description').sum()\n",
640 |     "c = c.sort_values(['quantity'], ascending=False)\n",
641 |     "c.head(1)"
642 |    ]
643 |   },
644 |   {
645 |    "cell_type": "markdown",
646 |    "metadata": {},
647 |    "source": [
648 |     "### Step 12. How many items were orderd in total?"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "code",
653 |    "execution_count": 46,
654 |    "metadata": {
655 |     "collapsed": false
656 |    },
657 |    "outputs": [
658 |     {
659 |      "data": {
660 |       "text/plain": [
661 |        "4972"
662 |       ]
663 |      },
664 |      "execution_count": 46,
665 |      "metadata": {},
666 |      "output_type": "execute_result"
667 |     }
668 |    ],
669 |    "source": [
670 |     "total_items_ordered = chipo.quantity.sum()\n",
671 |     "total_items_ordered"
672 |    ]
673 |   },
674 |   {
675 |    "cell_type": "markdown",
676 |    "metadata": {},
677 |    "source": [
678 |     "### Step 13. Turn the item price into a float"
679 |    ]
680 |   },
681 |   {
682 |    "cell_type": "markdown",
683 |    "metadata": {},
684 |    "source": [
685 |     "#### Step 13.a. Check the item price type"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "code",
690 |    "execution_count": 47,
691 |    "metadata": {
692 |     "collapsed": false
693 |    },
694 |    "outputs": [
695 |     {
696 |      "data": {
697 |       "text/plain": [
698 |        "dtype('O')"
699 |       ]
700 |      },
701 |      "execution_count": 47,
702 |      "metadata": {},
703 |      "output_type": "execute_result"
704 |     }
705 |    ],
706 |    "source": [
707 |     "chipo.item_price.dtype"
708 |    ]
709 |   },
710 |   {
711 |    "cell_type": "markdown",
712 |    "metadata": {},
713 |    "source": [
714 |     "#### Step 13.b. Create a lambda function and change the type of item price"
715 |    ]
716 |   },
717 |   {
718 |    "cell_type": "code",
719 |    "execution_count": 48,
720 |    "metadata": {
721 |     "collapsed": true
722 |    },
723 |    "outputs": [],
724 |    "source": [
725 |     "dollarizer = lambda x: float(x[1:-1])\n",
726 |     "chipo.item_price = chipo.item_price.apply(dollarizer)"
727 |    ]
728 |   },
729 |   {
730 |    "cell_type": "markdown",
731 |    "metadata": {},
732 |    "source": [
733 |     "#### Step 13.c. Check the item price type"
734 |    ]
735 |   },
736 |   {
737 |    "cell_type": "code",
738 |    "execution_count": 49,
739 |    "metadata": {
740 |     "collapsed": false
741 |    },
742 |    "outputs": [
743 |     {
744 |      "data": {
745 |       "text/plain": [
746 |        "dtype('float64')"
747 |       ]
748 |      },
749 |      "execution_count": 49,
750 |      "metadata": {},
751 |      "output_type": "execute_result"
752 |     }
753 |    ],
754 |    "source": [
755 |     "chipo.item_price.dtype"
756 |    ]
757 |   },
758 |   {
759 |    "cell_type": "markdown",
760 |    "metadata": {},
761 |    "source": [
762 |     "### Step 14. How much was the revenue for the period in the dataset?"
763 |    ]
764 |   },
765 |   {
766 |    "cell_type": "code",
767 |    "execution_count": 50,
768 |    "metadata": {
769 |     "collapsed": false
770 |    },
771 |    "outputs": [
772 |     {
773 |      "name": "stdout",
774 |      "output_type": "stream",
775 |      "text": [
776 |       "Revenue was: $39237.02\n"
777 |      ]
778 |     }
779 |    ],
780 |    "source": [
781 |     "revenue = (chipo['quantity']* chipo['item_price']).sum()\n",
782 |     "\n",
783 |     "print('Revenue was: $' + str(np.round(revenue,2)))"
784 |    ]
785 |   },
786 |   {
787 |    "cell_type": "markdown",
788 |    "metadata": {},
789 |    "source": [
790 |     "### Step 15. How many orders were made in the period?"
791 |    ]
792 |   },
793 |   {
794 |    "cell_type": "code",
795 |    "execution_count": 51,
796 |    "metadata": {
797 |     "collapsed": false
798 |    },
799 |    "outputs": [
800 |     {
801 |      "data": {
802 |       "text/plain": [
803 |        "1834"
804 |       ]
805 |      },
806 |      "execution_count": 51,
807 |      "metadata": {},
808 |      "output_type": "execute_result"
809 |     }
810 |    ],
811 |    "source": [
812 |     "orders = chipo.order_id.value_counts().count()\n",
813 |     "orders"
814 |    ]
815 |   },
816 |   {
817 |    "cell_type": "markdown",
818 |    "metadata": {},
819 |    "source": [
820 |     "### Step 16. What is the average revenue amount per order?"
821 |    ]
822 |   },
823 |   {
824 |    "cell_type": "code",
825 |    "execution_count": 52,
826 |    "metadata": {
827 |     "collapsed": false
828 |    },
829 |    "outputs": [
830 |     {
831 |      "data": {
832 |       "text/plain": [
833 |        "21.394231188658654"
834 |       ]
835 |      },
836 |      "execution_count": 52,
837 |      "metadata": {},
838 |      "output_type": "execute_result"
839 |     }
840 |    ],
841 |    "source": [
842 |     "# Solution 1\n",
843 |     "# Solution 1\n",
844 |     "\n",
845 |     "chipo['revenue'] = chipo['quantity'] * chipo['item_price']\n",
846 |     "order_grouped = chipo.groupby(by=['order_id']).sum()\n",
847 |     "order_grouped.mean()['revenue']\n"
848 |    ]
849 |   },
850 |   {
851 |    "cell_type": "code",
852 |    "execution_count": 53,
853 |    "metadata": {
854 |     "collapsed": false
855 |    },
856 |    "outputs": [
857 |     {
858 |      "data": {
859 |       "text/plain": [
860 |        "21.394231188658654"
861 |       ]
862 |      },
863 |      "execution_count": 53,
864 |      "metadata": {},
865 |      "output_type": "execute_result"
866 |     }
867 |    ],
868 |    "source": [
869 |     "# Solution 2\n",
870 |     "\n",
871 |     "chipo.groupby(by=['order_id']).sum().mean()['revenue']\n"
872 |    ]
873 |   },
874 |   {
875 |    "cell_type": "markdown",
876 |    "metadata": {},
877 |    "source": [
878 |     "### Step 17. How many different items are sold?"
879 |    ]
880 |   },
881 |   {
882 |    "cell_type": "code",
883 |    "execution_count": 54,
884 |    "metadata": {
885 |     "collapsed": false
886 |    },
887 |    "outputs": [
888 |     {
889 |      "data": {
890 |       "text/plain": [
891 |        "50"
892 |       ]
893 |      },
894 |      "execution_count": 54,
895 |      "metadata": {},
896 |      "output_type": "execute_result"
897 |     }
898 |    ],
899 |    "source": [
900 |     "chipo.item_name.value_counts().count()"
901 |    ]
902 |   }
903 |  ],
904 |  "metadata": {
905 |   "anaconda-cloud": {},
906 |   "kernelspec": {
907 |    "display_name": "Python 3.9.7 ('base')",
908 |    "language": "python",
909 |    "name": "python3"
910 |   },
911 |   "language_info": {
912 |    "codemirror_mode": {
913 |     "name": "ipython",
914 |     "version": 3
915 |    },
916 |    "file_extension": ".py",
917 |    "mimetype": "text/x-python",
918 |    "name": "python",
919 |    "nbconvert_exporter": "python",
920 |    "pygments_lexer": "ipython3",
921 |    "version": "3.9.7"
922 |   },
923 |   "vscode": {
924 |    "interpreter": {
925 |     "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
926 |    }
927 |   }
928 |  },
929 |  "nbformat": 4,
930 |  "nbformat_minor": 0
931 | }
932 | 


--------------------------------------------------------------------------------