├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── main.yml ├── .gitignore ├── 01_01.ipynb ├── 01_01_begin.ipynb ├── 01_02.ipynb ├── 01_02_begin.ipynb ├── 01_03.ipynb ├── 01_03_begin.ipynb ├── 01_04.ipynb ├── 01_04_begin.ipynb ├── 02_01.ipynb ├── 02_01_begin.ipynb ├── 02_02.ipynb ├── 02_02_begin.ipynb ├── 02_03.ipynb ├── 02_03_begin.ipynb ├── 03_01.ipynb ├── 03_01_begin.ipynb ├── 03_02.ipynb ├── 03_02_begin.ipynb ├── 03_03_begin.ipynb ├── 04_01.ipynb ├── 04_01_begin.ipynb ├── 04_02.ipynb ├── 04_02_begin.ipynb ├── 04_03.ipynb ├── 04_03_begin.ipynb ├── 04_04.ipynb ├── 04_04_begin.ipynb ├── 05_01.ipynb ├── 05_01_begin.ipynb ├── 05_02.ipynb ├── 05_02_begin.ipynb ├── 05_03.ipynb ├── 05_03_begin.ipynb ├── 05_04_begin.ipynb ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── data ├── auto_mpg.csv ├── landslides.csv ├── marketing_campaign.csv └── stock_data.csv └── notebooks ├── .ipynb_checkpoints ├── PCC_Chapter_3-checkpoint.ipynb ├── PCC_Chapter_4-checkpoint.ipynb └── PCC_Chapter_5-checkpoint.ipynb ├── PCC_Chapter_1.ipynb ├── PCC_Chapter_2.ipynb ├── PCC_Chapter_3.ipynb ├── PCC_Chapter_4.ipynb └── PCC_Chapter_5.ipynb /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Codeowners for these exercise files: 2 | # * (asterisk) deotes "all files and folders" 3 | # Example: * @producer @instructor 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 7 | 8 | ## Issue Overview 9 | 10 | 11 | ## Describe your environment 12 | 13 | 14 | ## Steps to Reproduce 15 | 16 | 1. 17 | 2. 18 | 3. 19 | 4. 20 | 21 | ## Expected Behavior 22 | 23 | 24 | ## Current Behavior 25 | 26 | 27 | ## Possible Solution 28 | 29 | 30 | ## Screenshots / Video 31 | 32 | 33 | ## Related Issues 34 | 35 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Copy To Branches 2 | on: 3 | workflow_dispatch: 4 | jobs: 5 | copy-to-branches: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | with: 10 | fetch-depth: 0 11 | - name: Copy To Branches Action 12 | uses: planetoftheweb/copy-to-branches@v1 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | .tmp 4 | npm-debug.log 5 | -------------------------------------------------------------------------------- /01_01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "607e901f", 6 | "metadata": {}, 7 | "source": [ 8 | "## Read data from the CSV file " 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "88ff8385", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "2dea0003", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | "
MPGCylindersDisplacementHorsepowerWeightAccelerationModel YearOrigin
018.08307.0130.03504.012.070India
115.08350.0165.03693.011.570India
218.08318.0150.03436.011.070India
316.08304.0150.03433.012.070India
417.08302.0140.03449.010.570India
515.08429.0198.04341.010.070India
614.08454.0220.04354.09.070India
714.08440.0215.04312.08.570India
814.08455.0225.04425.010.070India
915.08390.0190.03850.08.570India
\n", 171 | "
" 172 | ], 173 | "text/plain": [ 174 | " MPG Cylinders Displacement Horsepower Weight Acceleration \\\n", 175 | "0 18.0 8 307.0 130.0 3504.0 12.0 \n", 176 | "1 15.0 8 350.0 165.0 3693.0 11.5 \n", 177 | "2 18.0 8 318.0 150.0 3436.0 11.0 \n", 178 | "3 16.0 8 304.0 150.0 3433.0 12.0 \n", 179 | "4 17.0 8 302.0 140.0 3449.0 10.5 \n", 180 | "5 15.0 8 429.0 198.0 4341.0 10.0 \n", 181 | "6 14.0 8 454.0 220.0 4354.0 9.0 \n", 182 | "7 14.0 8 440.0 215.0 4312.0 8.5 \n", 183 | "8 14.0 8 455.0 225.0 4425.0 10.0 \n", 184 | "9 15.0 8 390.0 190.0 3850.0 8.5 \n", 185 | "\n", 186 | " Model Year Origin \n", 187 | "0 70 India \n", 188 | "1 70 India \n", 189 | "2 70 India \n", 190 | "3 70 India \n", 191 | "4 70 India \n", 192 | "5 70 India \n", 193 | "6 70 India \n", 194 | "7 70 India \n", 195 | "8 70 India \n", 196 | "9 70 India " 197 | ] 198 | }, 199 | "execution_count": 2, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "df = pd.read_csv(\"./data/auto_mpg.csv\",\n", 206 | " header=0,\n", 207 | " names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower','Weight',\\\n", 208 | " 'Acceleration', 'Model Year', 'Origin'])\n", 209 | "\n", 210 | "df.head(10)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "id": "658ee65e", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 3 (ipykernel)", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.9.7" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 5 243 | } 244 | -------------------------------------------------------------------------------- /01_01_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "607e901f", 6 | "metadata": {}, 7 | "source": [ 8 | "## Read data from the CSV file " 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "88ff8385", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "2b95286f", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [] 28 | } 29 | ], 30 | "metadata": { 31 | "kernelspec": { 32 | "display_name": "Python 3 (ipykernel)", 33 | "language": "python", 34 | "name": "python3" 35 | }, 36 | "language_info": { 37 | "codemirror_mode": { 38 | "name": "ipython", 39 | "version": 3 40 | }, 41 | "file_extension": ".py", 42 | "mimetype": "text/x-python", 43 | "name": "python", 44 | "nbconvert_exporter": "python", 45 | "pygments_lexer": "ipython3", 46 | "version": "3.9.7" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 5 51 | } 52 | -------------------------------------------------------------------------------- /01_02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "584cc0c7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Initial exploration of the data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 3, 14 | "id": "14da7e32", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "id": "6de6057e", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | "
MPGCylindersDisplacementHorsepowerWeightAccelerationModel YearOrigin
018.08307.0130.03504.012.070India
115.08350.0165.03693.011.570India
218.08318.0150.03436.011.070India
316.08304.0150.03433.012.070India
417.08302.0140.03449.010.570India
515.08429.0198.04341.010.070India
614.08454.0220.04354.09.070India
714.08440.0215.04312.08.570India
814.08455.0225.04425.010.070India
915.08390.0190.03850.08.570India
\n", 171 | "
" 172 | ], 173 | "text/plain": [ 174 | " MPG Cylinders Displacement Horsepower Weight Acceleration \\\n", 175 | "0 18.0 8 307.0 130.0 3504.0 12.0 \n", 176 | "1 15.0 8 350.0 165.0 3693.0 11.5 \n", 177 | "2 18.0 8 318.0 150.0 3436.0 11.0 \n", 178 | "3 16.0 8 304.0 150.0 3433.0 12.0 \n", 179 | "4 17.0 8 302.0 140.0 3449.0 10.5 \n", 180 | "5 15.0 8 429.0 198.0 4341.0 10.0 \n", 181 | "6 14.0 8 454.0 220.0 4354.0 9.0 \n", 182 | "7 14.0 8 440.0 215.0 4312.0 8.5 \n", 183 | "8 14.0 8 455.0 225.0 4425.0 10.0 \n", 184 | "9 15.0 8 390.0 190.0 3850.0 8.5 \n", 185 | "\n", 186 | " Model Year Origin \n", 187 | "0 70 India \n", 188 | "1 70 India \n", 189 | "2 70 India \n", 190 | "3 70 India \n", 191 | "4 70 India \n", 192 | "5 70 India \n", 193 | "6 70 India \n", 194 | "7 70 India \n", 195 | "8 70 India \n", 196 | "9 70 India " 197 | ] 198 | }, 199 | "execution_count": 4, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "df = pd.read_csv(\"./data/auto_mpg.csv\",\n", 206 | " header=0,\n", 207 | " names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower','Weight',\\\n", 208 | " 'Acceleration', 'Model Year', 'Origin'])\n", 209 | "\n", 210 | "df.head(10)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "id": "2be324e7", 216 | "metadata": {}, 217 | "source": [ 218 | "## Initial exploration of data" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 5, 224 | "id": "3f63cbd2", 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "(398, 8)" 231 | ] 232 | }, 233 | "execution_count": 5, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "df.shape" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 7, 245 | "id": "828de12f", 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "['MPG',\n", 252 | " 'Cylinders',\n", 253 | " 'Displacement',\n", 254 | " 'Horsepower',\n", 255 | " 'Weight',\n", 256 | " 'Acceleration',\n", 257 | " 'Model Year',\n", 258 | " 'Origin']" 259 | ] 260 | }, 261 | "execution_count": 7, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "list(df.columns)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 8, 273 | "id": "f815a4ca", 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "RangeIndex(start=0, stop=398, step=1)" 280 | ] 281 | }, 282 | "execution_count": 8, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "df.index" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 9, 294 | "id": "4834ca39", 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "\n", 302 | "RangeIndex: 398 entries, 0 to 397\n", 303 | "Data columns (total 8 columns):\n", 304 | " # Column Non-Null Count Dtype \n", 305 | "--- ------ -------------- ----- \n", 306 | " 0 MPG 398 non-null float64\n", 307 | " 1 Cylinders 398 non-null int64 \n", 308 | " 2 Displacement 398 non-null float64\n", 309 | " 3 Horsepower 392 non-null float64\n", 310 | " 4 Weight 398 non-null float64\n", 311 | " 5 Acceleration 398 non-null float64\n", 312 | " 6 Model Year 398 non-null int64 \n", 313 | " 7 Origin 398 non-null object \n", 314 | "dtypes: float64(5), int64(2), object(1)\n", 315 | "memory usage: 25.0+ KB\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "df.info()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "id": "c7fe07f9", 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "id": "78c15702", 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "id": "fb5c8bc2", 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "id": "6d6dcb67", 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [] 354 | } 355 | ], 356 | "metadata": { 357 | "kernelspec": { 358 | "display_name": "Python 3 (ipykernel)", 359 | "language": "python", 360 | "name": "python3" 361 | }, 362 | "language_info": { 363 | "codemirror_mode": { 364 | "name": "ipython", 365 | "version": 3 366 | }, 367 | "file_extension": ".py", 368 | "mimetype": "text/x-python", 369 | "name": "python", 370 | "nbconvert_exporter": "python", 371 | "pygments_lexer": "ipython3", 372 | "version": "3.9.7" 373 | } 374 | }, 375 | "nbformat": 4, 376 | "nbformat_minor": 5 377 | } 378 | -------------------------------------------------------------------------------- /01_02_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "584cc0c7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Initial exploration of the data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 3, 14 | "id": "14da7e32", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "id": "6de6057e", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | "
MPGCylindersDisplacementHorsepowerWeightAccelerationModel YearOrigin
018.08307.0130.03504.012.070India
115.08350.0165.03693.011.570India
218.08318.0150.03436.011.070India
316.08304.0150.03433.012.070India
417.08302.0140.03449.010.570India
515.08429.0198.04341.010.070India
614.08454.0220.04354.09.070India
714.08440.0215.04312.08.570India
814.08455.0225.04425.010.070India
915.08390.0190.03850.08.570India
\n", 171 | "
" 172 | ], 173 | "text/plain": [ 174 | " MPG Cylinders Displacement Horsepower Weight Acceleration \\\n", 175 | "0 18.0 8 307.0 130.0 3504.0 12.0 \n", 176 | "1 15.0 8 350.0 165.0 3693.0 11.5 \n", 177 | "2 18.0 8 318.0 150.0 3436.0 11.0 \n", 178 | "3 16.0 8 304.0 150.0 3433.0 12.0 \n", 179 | "4 17.0 8 302.0 140.0 3449.0 10.5 \n", 180 | "5 15.0 8 429.0 198.0 4341.0 10.0 \n", 181 | "6 14.0 8 454.0 220.0 4354.0 9.0 \n", 182 | "7 14.0 8 440.0 215.0 4312.0 8.5 \n", 183 | "8 14.0 8 455.0 225.0 4425.0 10.0 \n", 184 | "9 15.0 8 390.0 190.0 3850.0 8.5 \n", 185 | "\n", 186 | " Model Year Origin \n", 187 | "0 70 India \n", 188 | "1 70 India \n", 189 | "2 70 India \n", 190 | "3 70 India \n", 191 | "4 70 India \n", 192 | "5 70 India \n", 193 | "6 70 India \n", 194 | "7 70 India \n", 195 | "8 70 India \n", 196 | "9 70 India " 197 | ] 198 | }, 199 | "execution_count": 4, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "df = pd.read_csv(\"./data/auto_mpg.csv\",\n", 206 | " header=0,\n", 207 | " names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower','Weight',\\\n", 208 | " 'Acceleration', 'Model Year', 'Origin'])\n", 209 | "\n", 210 | "df.head(10)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "id": "2be324e7", 216 | "metadata": {}, 217 | "source": [ 218 | "## Initial exploration of data" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "id": "3e91942d", 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "id": "124ef9e8", 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "id": "718384c7", 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [] 244 | } 245 | ], 246 | "metadata": { 247 | "kernelspec": { 248 | "display_name": "Python 3 (ipykernel)", 249 | "language": "python", 250 | "name": "python3" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.9.7" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 5 267 | } 268 | -------------------------------------------------------------------------------- /01_03_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "584cc0c7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Summary Statistics & Distribution" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "14da7e32", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "6de6057e", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | "
MPGCylindersDisplacementHorsepowerWeightAccelerationModel YearOrigin
018.08307.0130.03504.012.070India
115.08350.0165.03693.011.570India
218.08318.0150.03436.011.070India
316.08304.0150.03433.012.070India
417.08302.0140.03449.010.570India
515.08429.0198.04341.010.070India
614.08454.0220.04354.09.070India
714.08440.0215.04312.08.570India
814.08455.0225.04425.010.070India
915.08390.0190.03850.08.570India
\n", 171 | "
" 172 | ], 173 | "text/plain": [ 174 | " MPG Cylinders Displacement Horsepower Weight Acceleration \\\n", 175 | "0 18.0 8 307.0 130.0 3504.0 12.0 \n", 176 | "1 15.0 8 350.0 165.0 3693.0 11.5 \n", 177 | "2 18.0 8 318.0 150.0 3436.0 11.0 \n", 178 | "3 16.0 8 304.0 150.0 3433.0 12.0 \n", 179 | "4 17.0 8 302.0 140.0 3449.0 10.5 \n", 180 | "5 15.0 8 429.0 198.0 4341.0 10.0 \n", 181 | "6 14.0 8 454.0 220.0 4354.0 9.0 \n", 182 | "7 14.0 8 440.0 215.0 4312.0 8.5 \n", 183 | "8 14.0 8 455.0 225.0 4425.0 10.0 \n", 184 | "9 15.0 8 390.0 190.0 3850.0 8.5 \n", 185 | "\n", 186 | " Model Year Origin \n", 187 | "0 70 India \n", 188 | "1 70 India \n", 189 | "2 70 India \n", 190 | "3 70 India \n", 191 | "4 70 India \n", 192 | "5 70 India \n", 193 | "6 70 India \n", 194 | "7 70 India \n", 195 | "8 70 India \n", 196 | "9 70 India " 197 | ] 198 | }, 199 | "execution_count": 2, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "df = pd.read_csv(\"./data/auto_mpg.csv\",\n", 206 | " header=0,\n", 207 | " names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower','Weight',\\\n", 208 | " 'Acceleration', 'Model Year', 'Origin'])\n", 209 | "\n", 210 | "df.head(10)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "id": "2be324e7", 216 | "metadata": {}, 217 | "source": [ 218 | "## Initial exploration of data" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 3, 224 | "id": "3f63cbd2", 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "(398, 8)" 231 | ] 232 | }, 233 | "execution_count": 3, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "df.shape" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 4, 245 | "id": "828de12f", 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "['MPG',\n", 252 | " 'Cylinders',\n", 253 | " 'Displacement',\n", 254 | " 'Horsepower',\n", 255 | " 'Weight',\n", 256 | " 'Acceleration',\n", 257 | " 'Model Year',\n", 258 | " 'Origin']" 259 | ] 260 | }, 261 | "execution_count": 4, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "list(df.columns)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 5, 273 | "id": "f815a4ca", 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "RangeIndex(start=0, stop=398, step=1)" 280 | ] 281 | }, 282 | "execution_count": 5, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "df.index" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 6, 294 | "id": "4834ca39", 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "\n", 302 | "RangeIndex: 398 entries, 0 to 397\n", 303 | "Data columns (total 8 columns):\n", 304 | " # Column Non-Null Count Dtype \n", 305 | "--- ------ -------------- ----- \n", 306 | " 0 MPG 398 non-null float64\n", 307 | " 1 Cylinders 398 non-null int64 \n", 308 | " 2 Displacement 398 non-null float64\n", 309 | " 3 Horsepower 392 non-null float64\n", 310 | " 4 Weight 398 non-null float64\n", 311 | " 5 Acceleration 398 non-null float64\n", 312 | " 6 Model Year 398 non-null int64 \n", 313 | " 7 Origin 398 non-null object \n", 314 | "dtypes: float64(5), int64(2), object(1)\n", 315 | "memory usage: 25.0+ KB\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "df.info()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "id": "ab53b08e", 326 | "metadata": {}, 327 | "source": [ 328 | "## Summary Statistics and Distribution" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "id": "608db0cc", 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "id": "ca7585dd", 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "id": "40a24894", 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [] 354 | } 355 | ], 356 | "metadata": { 357 | "kernelspec": { 358 | "display_name": "Python 3 (ipykernel)", 359 | "language": "python", 360 | "name": "python3" 361 | }, 362 | "language_info": { 363 | "codemirror_mode": { 364 | "name": "ipython", 365 | "version": 3 366 | }, 367 | "file_extension": ".py", 368 | "mimetype": "text/x-python", 369 | "name": "python", 370 | "nbconvert_exporter": "python", 371 | "pygments_lexer": "ipython3", 372 | "version": "3.9.7" 373 | } 374 | }, 375 | "nbformat": 4, 376 | "nbformat_minor": 5 377 | } 378 | -------------------------------------------------------------------------------- /02_01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "729123f0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Selecting a subset of columns" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "604af38a", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "8e197fd2", 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | "
IDYear_BirthEducationMarital_StatusIncomeKidhomeTeenhomeDt_CustomerRecencyMntWines...NumWebVisitsMonthAcceptedCmp3AcceptedCmp4AcceptedCmp5AcceptedCmp1AcceptedCmp2ComplainZ_CostContactZ_RevenueResponse
055241957GraduationSingle58138.00004-09-201258635...70000003111
121741954GraduationSingle46344.01108-03-20143811...50000003110
241411965GraduationTogether71613.00021-08-201326426...40000003110
361821984GraduationTogether26646.01010-02-20142611...60000003110
453241981PhDMarried58293.01019-01-201494173...50000003110
574461967MasterTogether62513.00109-09-201316520...60000003110
69651971GraduationDivorced55635.00113-11-201234235...60000003110
761771985PhDMarried33454.01008-05-20133276...80000003110
848551974PhDTogether30351.01006-06-20131914...90000003111
958991950PhDTogether5648.01113-03-20146828...201000003110
\n", 315 | "

10 rows × 29 columns

\n", 316 | "
" 317 | ], 318 | "text/plain": [ 319 | " ID Year_Birth Education Marital_Status Income Kidhome Teenhome \\\n", 320 | "0 5524 1957 Graduation Single 58138.0 0 0 \n", 321 | "1 2174 1954 Graduation Single 46344.0 1 1 \n", 322 | "2 4141 1965 Graduation Together 71613.0 0 0 \n", 323 | "3 6182 1984 Graduation Together 26646.0 1 0 \n", 324 | "4 5324 1981 PhD Married 58293.0 1 0 \n", 325 | "5 7446 1967 Master Together 62513.0 0 1 \n", 326 | "6 965 1971 Graduation Divorced 55635.0 0 1 \n", 327 | "7 6177 1985 PhD Married 33454.0 1 0 \n", 328 | "8 4855 1974 PhD Together 30351.0 1 0 \n", 329 | "9 5899 1950 PhD Together 5648.0 1 1 \n", 330 | "\n", 331 | " Dt_Customer Recency MntWines ... NumWebVisitsMonth AcceptedCmp3 \\\n", 332 | "0 04-09-2012 58 635 ... 7 0 \n", 333 | "1 08-03-2014 38 11 ... 5 0 \n", 334 | "2 21-08-2013 26 426 ... 4 0 \n", 335 | "3 10-02-2014 26 11 ... 6 0 \n", 336 | "4 19-01-2014 94 173 ... 5 0 \n", 337 | "5 09-09-2013 16 520 ... 6 0 \n", 338 | "6 13-11-2012 34 235 ... 6 0 \n", 339 | "7 08-05-2013 32 76 ... 8 0 \n", 340 | "8 06-06-2013 19 14 ... 9 0 \n", 341 | "9 13-03-2014 68 28 ... 20 1 \n", 342 | "\n", 343 | " AcceptedCmp4 AcceptedCmp5 AcceptedCmp1 AcceptedCmp2 Complain \\\n", 344 | "0 0 0 0 0 0 \n", 345 | "1 0 0 0 0 0 \n", 346 | "2 0 0 0 0 0 \n", 347 | "3 0 0 0 0 0 \n", 348 | "4 0 0 0 0 0 \n", 349 | "5 0 0 0 0 0 \n", 350 | "6 0 0 0 0 0 \n", 351 | "7 0 0 0 0 0 \n", 352 | "8 0 0 0 0 0 \n", 353 | "9 0 0 0 0 0 \n", 354 | "\n", 355 | " Z_CostContact Z_Revenue Response \n", 356 | "0 3 11 1 \n", 357 | "1 3 11 0 \n", 358 | "2 3 11 0 \n", 359 | "3 3 11 0 \n", 360 | "4 3 11 0 \n", 361 | "5 3 11 0 \n", 362 | "6 3 11 0 \n", 363 | "7 3 11 0 \n", 364 | "8 3 11 1 \n", 365 | "9 3 11 0 \n", 366 | "\n", 367 | "[10 rows x 29 columns]" 368 | ] 369 | }, 370 | "execution_count": 2, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "camp_df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 377 | "camp_df.head(10)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 3, 383 | "id": "cfa03b94", 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "name": "stdout", 388 | "output_type": "stream", 389 | "text": [ 390 | "\n", 391 | "RangeIndex: 2240 entries, 0 to 2239\n", 392 | "Data columns (total 29 columns):\n", 393 | " # Column Non-Null Count Dtype \n", 394 | "--- ------ -------------- ----- \n", 395 | " 0 ID 2240 non-null int64 \n", 396 | " 1 Year_Birth 2240 non-null int64 \n", 397 | " 2 Education 2240 non-null object \n", 398 | " 3 Marital_Status 2240 non-null object \n", 399 | " 4 Income 2216 non-null float64\n", 400 | " 5 Kidhome 2240 non-null int64 \n", 401 | " 6 Teenhome 2240 non-null int64 \n", 402 | " 7 Dt_Customer 2240 non-null object \n", 403 | " 8 Recency 2240 non-null int64 \n", 404 | " 9 MntWines 2240 non-null int64 \n", 405 | " 10 MntFruits 2240 non-null int64 \n", 406 | " 11 MntMeatProducts 2240 non-null int64 \n", 407 | " 12 MntFishProducts 2240 non-null int64 \n", 408 | " 13 MntSweetProducts 2240 non-null int64 \n", 409 | " 14 MntGoldProds 2240 non-null int64 \n", 410 | " 15 NumDealsPurchases 2240 non-null int64 \n", 411 | " 16 NumWebPurchases 2240 non-null int64 \n", 412 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 413 | " 18 NumStorePurchases 2240 non-null int64 \n", 414 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 415 | " 20 AcceptedCmp3 2240 non-null int64 \n", 416 | " 21 AcceptedCmp4 2240 non-null int64 \n", 417 | " 22 AcceptedCmp5 2240 non-null int64 \n", 418 | " 23 AcceptedCmp1 2240 non-null int64 \n", 419 | " 24 AcceptedCmp2 2240 non-null int64 \n", 420 | " 25 Complain 2240 non-null int64 \n", 421 | " 26 Z_CostContact 2240 non-null int64 \n", 422 | " 27 Z_Revenue 2240 non-null int64 \n", 423 | " 28 Response 2240 non-null int64 \n", 424 | "dtypes: float64(1), int64(25), object(3)\n", 425 | "memory usage: 507.6+ KB\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "camp_df.info()" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "id": "c0ceb94c", 436 | "metadata": {}, 437 | "source": [ 438 | "## Selecting multiple columns" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 6, 444 | "id": "500dae2d", 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "mnt_purchases = camp_df[\n", 449 | " [\n", 450 | " 'ID',\n", 451 | " 'MntWines',\n", 452 | " 'MntFruits',\n", 453 | " 'MntMeatProducts',\n", 454 | " 'MntFishProducts',\n", 455 | " 'MntSweetProducts',\n", 456 | " 'MntGoldProds'\n", 457 | " ]\n", 458 | "]" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 7, 464 | "id": "e22535ab", 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/html": [ 470 | "
\n", 471 | "\n", 484 | "\n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | "
IDMntWinesMntFruitsMntMeatProductsMntFishProductsMntSweetProductsMntGoldProds
05524635885461728888
121741116216
24141426491271112142
36182114201035
4532417343118462715
........................
2235108707094318242118247
22364001406030008
2237727090848217321224
2238823542830214803061
22399405843612121
\n", 610 | "

2240 rows × 7 columns

\n", 611 | "
" 612 | ], 613 | "text/plain": [ 614 | " ID MntWines MntFruits MntMeatProducts MntFishProducts \\\n", 615 | "0 5524 635 88 546 172 \n", 616 | "1 2174 11 1 6 2 \n", 617 | "2 4141 426 49 127 111 \n", 618 | "3 6182 11 4 20 10 \n", 619 | "4 5324 173 43 118 46 \n", 620 | "... ... ... ... ... ... \n", 621 | "2235 10870 709 43 182 42 \n", 622 | "2236 4001 406 0 30 0 \n", 623 | "2237 7270 908 48 217 32 \n", 624 | "2238 8235 428 30 214 80 \n", 625 | "2239 9405 84 3 61 2 \n", 626 | "\n", 627 | " MntSweetProducts MntGoldProds \n", 628 | "0 88 88 \n", 629 | "1 1 6 \n", 630 | "2 21 42 \n", 631 | "3 3 5 \n", 632 | "4 27 15 \n", 633 | "... ... ... \n", 634 | "2235 118 247 \n", 635 | "2236 0 8 \n", 636 | "2237 12 24 \n", 637 | "2238 30 61 \n", 638 | "2239 1 21 \n", 639 | "\n", 640 | "[2240 rows x 7 columns]" 641 | ] 642 | }, 643 | "execution_count": 7, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "mnt_purchases" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": null, 655 | "id": "c6656273", 656 | "metadata": {}, 657 | "outputs": [], 658 | "source": [] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "id": "967e17f3", 664 | "metadata": {}, 665 | "outputs": [], 666 | "source": [] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": null, 671 | "id": "5c817498", 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "id": "3bce1620", 680 | "metadata": {}, 681 | "outputs": [], 682 | "source": [] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": null, 687 | "id": "ca491cf2", 688 | "metadata": {}, 689 | "outputs": [], 690 | "source": [] 691 | } 692 | ], 693 | "metadata": { 694 | "kernelspec": { 695 | "display_name": "Python 3 (ipykernel)", 696 | "language": "python", 697 | "name": "python3" 698 | }, 699 | "language_info": { 700 | "codemirror_mode": { 701 | "name": "ipython", 702 | "version": 3 703 | }, 704 | "file_extension": ".py", 705 | "mimetype": "text/x-python", 706 | "name": "python", 707 | "nbconvert_exporter": "python", 708 | "pygments_lexer": "ipython3", 709 | "version": "3.9.7" 710 | } 711 | }, 712 | "nbformat": 4, 713 | "nbformat_minor": 5 714 | } 715 | -------------------------------------------------------------------------------- /02_01_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "729123f0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Selecting a subset of columns" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "604af38a", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "8e197fd2", 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | "
IDYear_BirthEducationMarital_StatusIncomeKidhomeTeenhomeDt_CustomerRecencyMntWines...NumWebVisitsMonthAcceptedCmp3AcceptedCmp4AcceptedCmp5AcceptedCmp1AcceptedCmp2ComplainZ_CostContactZ_RevenueResponse
055241957GraduationSingle58138.00004-09-201258635...70000003111
121741954GraduationSingle46344.01108-03-20143811...50000003110
241411965GraduationTogether71613.00021-08-201326426...40000003110
361821984GraduationTogether26646.01010-02-20142611...60000003110
453241981PhDMarried58293.01019-01-201494173...50000003110
574461967MasterTogether62513.00109-09-201316520...60000003110
69651971GraduationDivorced55635.00113-11-201234235...60000003110
761771985PhDMarried33454.01008-05-20133276...80000003110
848551974PhDTogether30351.01006-06-20131914...90000003111
958991950PhDTogether5648.01113-03-20146828...201000003110
\n", 315 | "

10 rows × 29 columns

\n", 316 | "
" 317 | ], 318 | "text/plain": [ 319 | " ID Year_Birth Education Marital_Status Income Kidhome Teenhome \\\n", 320 | "0 5524 1957 Graduation Single 58138.0 0 0 \n", 321 | "1 2174 1954 Graduation Single 46344.0 1 1 \n", 322 | "2 4141 1965 Graduation Together 71613.0 0 0 \n", 323 | "3 6182 1984 Graduation Together 26646.0 1 0 \n", 324 | "4 5324 1981 PhD Married 58293.0 1 0 \n", 325 | "5 7446 1967 Master Together 62513.0 0 1 \n", 326 | "6 965 1971 Graduation Divorced 55635.0 0 1 \n", 327 | "7 6177 1985 PhD Married 33454.0 1 0 \n", 328 | "8 4855 1974 PhD Together 30351.0 1 0 \n", 329 | "9 5899 1950 PhD Together 5648.0 1 1 \n", 330 | "\n", 331 | " Dt_Customer Recency MntWines ... NumWebVisitsMonth AcceptedCmp3 \\\n", 332 | "0 04-09-2012 58 635 ... 7 0 \n", 333 | "1 08-03-2014 38 11 ... 5 0 \n", 334 | "2 21-08-2013 26 426 ... 4 0 \n", 335 | "3 10-02-2014 26 11 ... 6 0 \n", 336 | "4 19-01-2014 94 173 ... 5 0 \n", 337 | "5 09-09-2013 16 520 ... 6 0 \n", 338 | "6 13-11-2012 34 235 ... 6 0 \n", 339 | "7 08-05-2013 32 76 ... 8 0 \n", 340 | "8 06-06-2013 19 14 ... 9 0 \n", 341 | "9 13-03-2014 68 28 ... 20 1 \n", 342 | "\n", 343 | " AcceptedCmp4 AcceptedCmp5 AcceptedCmp1 AcceptedCmp2 Complain \\\n", 344 | "0 0 0 0 0 0 \n", 345 | "1 0 0 0 0 0 \n", 346 | "2 0 0 0 0 0 \n", 347 | "3 0 0 0 0 0 \n", 348 | "4 0 0 0 0 0 \n", 349 | "5 0 0 0 0 0 \n", 350 | "6 0 0 0 0 0 \n", 351 | "7 0 0 0 0 0 \n", 352 | "8 0 0 0 0 0 \n", 353 | "9 0 0 0 0 0 \n", 354 | "\n", 355 | " Z_CostContact Z_Revenue Response \n", 356 | "0 3 11 1 \n", 357 | "1 3 11 0 \n", 358 | "2 3 11 0 \n", 359 | "3 3 11 0 \n", 360 | "4 3 11 0 \n", 361 | "5 3 11 0 \n", 362 | "6 3 11 0 \n", 363 | "7 3 11 0 \n", 364 | "8 3 11 1 \n", 365 | "9 3 11 0 \n", 366 | "\n", 367 | "[10 rows x 29 columns]" 368 | ] 369 | }, 370 | "execution_count": 2, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "camp_df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 377 | "camp_df.head(10)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 3, 383 | "id": "cfa03b94", 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "name": "stdout", 388 | "output_type": "stream", 389 | "text": [ 390 | "\n", 391 | "RangeIndex: 2240 entries, 0 to 2239\n", 392 | "Data columns (total 29 columns):\n", 393 | " # Column Non-Null Count Dtype \n", 394 | "--- ------ -------------- ----- \n", 395 | " 0 ID 2240 non-null int64 \n", 396 | " 1 Year_Birth 2240 non-null int64 \n", 397 | " 2 Education 2240 non-null object \n", 398 | " 3 Marital_Status 2240 non-null object \n", 399 | " 4 Income 2216 non-null float64\n", 400 | " 5 Kidhome 2240 non-null int64 \n", 401 | " 6 Teenhome 2240 non-null int64 \n", 402 | " 7 Dt_Customer 2240 non-null object \n", 403 | " 8 Recency 2240 non-null int64 \n", 404 | " 9 MntWines 2240 non-null int64 \n", 405 | " 10 MntFruits 2240 non-null int64 \n", 406 | " 11 MntMeatProducts 2240 non-null int64 \n", 407 | " 12 MntFishProducts 2240 non-null int64 \n", 408 | " 13 MntSweetProducts 2240 non-null int64 \n", 409 | " 14 MntGoldProds 2240 non-null int64 \n", 410 | " 15 NumDealsPurchases 2240 non-null int64 \n", 411 | " 16 NumWebPurchases 2240 non-null int64 \n", 412 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 413 | " 18 NumStorePurchases 2240 non-null int64 \n", 414 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 415 | " 20 AcceptedCmp3 2240 non-null int64 \n", 416 | " 21 AcceptedCmp4 2240 non-null int64 \n", 417 | " 22 AcceptedCmp5 2240 non-null int64 \n", 418 | " 23 AcceptedCmp1 2240 non-null int64 \n", 419 | " 24 AcceptedCmp2 2240 non-null int64 \n", 420 | " 25 Complain 2240 non-null int64 \n", 421 | " 26 Z_CostContact 2240 non-null int64 \n", 422 | " 27 Z_Revenue 2240 non-null int64 \n", 423 | " 28 Response 2240 non-null int64 \n", 424 | "dtypes: float64(1), int64(25), object(3)\n", 425 | "memory usage: 507.6+ KB\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "camp_df.info()" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "id": "c0ceb94c", 436 | "metadata": {}, 437 | "source": [ 438 | "## Selecting multiple columns" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "id": "967e17f3", 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "id": "5c817498", 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "id": "3bce1620", 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "id": "ca491cf2", 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [] 472 | } 473 | ], 474 | "metadata": { 475 | "kernelspec": { 476 | "display_name": "Python 3 (ipykernel)", 477 | "language": "python", 478 | "name": "python3" 479 | }, 480 | "language_info": { 481 | "codemirror_mode": { 482 | "name": "ipython", 483 | "version": 3 484 | }, 485 | "file_extension": ".py", 486 | "mimetype": "text/x-python", 487 | "name": "python", 488 | "nbconvert_exporter": "python", 489 | "pygments_lexer": "ipython3", 490 | "version": "3.9.7" 491 | } 492 | }, 493 | "nbformat": 4, 494 | "nbformat_minor": 5 495 | } 496 | -------------------------------------------------------------------------------- /03_01_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "20588d2a", 6 | "metadata": {}, 7 | "source": [ 8 | "# Handling missing values" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "5a9a92a6", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "910ceb1d", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | "
iddatetimecountry_namestate/provincepopulationlandslide_typetriggerfatalities
0343/2/07NightUnited StatesVirginia16000LandslideRainNaN
1423/22/07NaNUnited StatesOhio17288LandslideRainNaN
2564/6/07NaNUnited StatesPennsylvania15930LandslideRainNaN
3594/14/07NaNCanadaQuebec42786Riverbank collapseRainNaN
4614/15/07NaNUnited StatesKentucky6903LandslideDownpour0.0
\n", 122 | "
" 123 | ], 124 | "text/plain": [ 125 | " id date time country_name state/province population \\\n", 126 | "0 34 3/2/07 Night United States Virginia 16000 \n", 127 | "1 42 3/22/07 NaN United States Ohio 17288 \n", 128 | "2 56 4/6/07 NaN United States Pennsylvania 15930 \n", 129 | "3 59 4/14/07 NaN Canada Quebec 42786 \n", 130 | "4 61 4/15/07 NaN United States Kentucky 6903 \n", 131 | "\n", 132 | " landslide_type trigger fatalities \n", 133 | "0 Landslide Rain NaN \n", 134 | "1 Landslide Rain NaN \n", 135 | "2 Landslide Rain NaN \n", 136 | "3 Riverbank collapse Rain NaN \n", 137 | "4 Landslide Downpour 0.0 " 138 | ] 139 | }, 140 | "execution_count": 2, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "df = pd.read_csv(\"./data/landslides.csv\")\n", 147 | "df.head()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "id": "0524fad8", 153 | "metadata": {}, 154 | "source": [ 155 | "## Check and handle missing values" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "id": "c15ed220", 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "id": "07585864", 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3 (ipykernel)", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.9.6" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 5 196 | } 197 | -------------------------------------------------------------------------------- /04_01_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "2b026fa5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Conditional filtering" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "604af38a", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "id": "8e197fd2", 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | "
IDYear_BirthEducationMarital_StatusIncomeKidhomeTeenhomeDt_CustomerRecencyMntWines...NumWebVisitsMonthAcceptedCmp3AcceptedCmp4AcceptedCmp5AcceptedCmp1AcceptedCmp2ComplainZ_CostContactZ_RevenueResponse
055241957GraduationSingle58138.00004-09-201258635...70000003111
121741954GraduationSingle46344.01108-03-20143811...50000003110
241411965GraduationTogether71613.00021-08-201326426...40000003110
361821984GraduationTogether26646.01010-02-20142611...60000003110
453241981PhDMarried58293.01019-01-201494173...50000003110
\n", 195 | "

5 rows × 29 columns

\n", 196 | "
" 197 | ], 198 | "text/plain": [ 199 | " ID Year_Birth Education Marital_Status Income Kidhome Teenhome \\\n", 200 | "0 5524 1957 Graduation Single 58138.0 0 0 \n", 201 | "1 2174 1954 Graduation Single 46344.0 1 1 \n", 202 | "2 4141 1965 Graduation Together 71613.0 0 0 \n", 203 | "3 6182 1984 Graduation Together 26646.0 1 0 \n", 204 | "4 5324 1981 PhD Married 58293.0 1 0 \n", 205 | "\n", 206 | " Dt_Customer Recency MntWines ... NumWebVisitsMonth AcceptedCmp3 \\\n", 207 | "0 04-09-2012 58 635 ... 7 0 \n", 208 | "1 08-03-2014 38 11 ... 5 0 \n", 209 | "2 21-08-2013 26 426 ... 4 0 \n", 210 | "3 10-02-2014 26 11 ... 6 0 \n", 211 | "4 19-01-2014 94 173 ... 5 0 \n", 212 | "\n", 213 | " AcceptedCmp4 AcceptedCmp5 AcceptedCmp1 AcceptedCmp2 Complain \\\n", 214 | "0 0 0 0 0 0 \n", 215 | "1 0 0 0 0 0 \n", 216 | "2 0 0 0 0 0 \n", 217 | "3 0 0 0 0 0 \n", 218 | "4 0 0 0 0 0 \n", 219 | "\n", 220 | " Z_CostContact Z_Revenue Response \n", 221 | "0 3 11 1 \n", 222 | "1 3 11 0 \n", 223 | "2 3 11 0 \n", 224 | "3 3 11 0 \n", 225 | "4 3 11 0 \n", 226 | "\n", 227 | "[5 rows x 29 columns]" 228 | ] 229 | }, 230 | "execution_count": 3, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 237 | "df.head()" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 4, 243 | "id": "2a6ae263", 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "\n", 251 | "RangeIndex: 2240 entries, 0 to 2239\n", 252 | "Data columns (total 29 columns):\n", 253 | " # Column Non-Null Count Dtype \n", 254 | "--- ------ -------------- ----- \n", 255 | " 0 ID 2240 non-null int64 \n", 256 | " 1 Year_Birth 2240 non-null int64 \n", 257 | " 2 Education 2240 non-null object \n", 258 | " 3 Marital_Status 2240 non-null object \n", 259 | " 4 Income 2216 non-null float64\n", 260 | " 5 Kidhome 2240 non-null int64 \n", 261 | " 6 Teenhome 2240 non-null int64 \n", 262 | " 7 Dt_Customer 2240 non-null object \n", 263 | " 8 Recency 2240 non-null int64 \n", 264 | " 9 MntWines 2240 non-null int64 \n", 265 | " 10 MntFruits 2240 non-null int64 \n", 266 | " 11 MntMeatProducts 2240 non-null int64 \n", 267 | " 12 MntFishProducts 2240 non-null int64 \n", 268 | " 13 MntSweetProducts 2240 non-null int64 \n", 269 | " 14 MntGoldProds 2240 non-null int64 \n", 270 | " 15 NumDealsPurchases 2240 non-null int64 \n", 271 | " 16 NumWebPurchases 2240 non-null int64 \n", 272 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 273 | " 18 NumStorePurchases 2240 non-null int64 \n", 274 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 275 | " 20 AcceptedCmp3 2240 non-null int64 \n", 276 | " 21 AcceptedCmp4 2240 non-null int64 \n", 277 | " 22 AcceptedCmp5 2240 non-null int64 \n", 278 | " 23 AcceptedCmp1 2240 non-null int64 \n", 279 | " 24 AcceptedCmp2 2240 non-null int64 \n", 280 | " 25 Complain 2240 non-null int64 \n", 281 | " 26 Z_CostContact 2240 non-null int64 \n", 282 | " 27 Z_Revenue 2240 non-null int64 \n", 283 | " 28 Response 2240 non-null int64 \n", 284 | "dtypes: float64(1), int64(25), object(3)\n", 285 | "memory usage: 507.6+ KB\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "df.info()" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "id": "02fdc1e9", 296 | "metadata": {}, 297 | "source": [ 298 | "## Conditional filtering" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "id": "d51e745c", 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "id": "2e01675a", 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "Python 3 (ipykernel)", 321 | "language": "python", 322 | "name": "python3" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": { 326 | "name": "ipython", 327 | "version": 3 328 | }, 329 | "file_extension": ".py", 330 | "mimetype": "text/x-python", 331 | "name": "python", 332 | "nbconvert_exporter": "python", 333 | "pygments_lexer": "ipython3", 334 | "version": "3.9.6" 335 | } 336 | }, 337 | "nbformat": 4, 338 | "nbformat_minor": 5 339 | } 340 | -------------------------------------------------------------------------------- /05_01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ee7e7699", 6 | "metadata": {}, 7 | "source": [ 8 | "# Grouping data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "29452d05", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "dc791129", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "\n", 32 | "RangeIndex: 2240 entries, 0 to 2239\n", 33 | "Data columns (total 29 columns):\n", 34 | " # Column Non-Null Count Dtype \n", 35 | "--- ------ -------------- ----- \n", 36 | " 0 ID 2240 non-null int64 \n", 37 | " 1 Year_Birth 2240 non-null int64 \n", 38 | " 2 Education 2240 non-null object \n", 39 | " 3 Marital_Status 2240 non-null object \n", 40 | " 4 Income 2216 non-null float64\n", 41 | " 5 Kidhome 2240 non-null int64 \n", 42 | " 6 Teenhome 2240 non-null int64 \n", 43 | " 7 Dt_Customer 2240 non-null object \n", 44 | " 8 Recency 2240 non-null int64 \n", 45 | " 9 MntWines 2240 non-null int64 \n", 46 | " 10 MntFruits 2240 non-null int64 \n", 47 | " 11 MntMeatProducts 2240 non-null int64 \n", 48 | " 12 MntFishProducts 2240 non-null int64 \n", 49 | " 13 MntSweetProducts 2240 non-null int64 \n", 50 | " 14 MntGoldProds 2240 non-null int64 \n", 51 | " 15 NumDealsPurchases 2240 non-null int64 \n", 52 | " 16 NumWebPurchases 2240 non-null int64 \n", 53 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 54 | " 18 NumStorePurchases 2240 non-null int64 \n", 55 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 56 | " 20 AcceptedCmp3 2240 non-null int64 \n", 57 | " 21 AcceptedCmp4 2240 non-null int64 \n", 58 | " 22 AcceptedCmp5 2240 non-null int64 \n", 59 | " 23 AcceptedCmp1 2240 non-null int64 \n", 60 | " 24 AcceptedCmp2 2240 non-null int64 \n", 61 | " 25 Complain 2240 non-null int64 \n", 62 | " 26 Z_CostContact 2240 non-null int64 \n", 63 | " 27 Z_Revenue 2240 non-null int64 \n", 64 | " 28 Response 2240 non-null int64 \n", 65 | "dtypes: float64(1), int64(25), object(3)\n", 66 | "memory usage: 507.6+ KB\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 72 | "df.info()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "a39d6a11", 78 | "metadata": {}, 79 | "source": [ 80 | "## Average amount of each product bought by each group of customers" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 7, 86 | "id": "178450b4", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "amt_bought = [\n", 91 | " 'MntWines',\n", 92 | " 'MntFruits',\n", 93 | " 'MntMeatProducts',\n", 94 | " 'MntFishProducts',\n", 95 | " 'MntSweetProducts',\n", 96 | " 'MntGoldProds'\n", 97 | "]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "id": "f886b74a", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/html": [ 109 | "
\n", 110 | "\n", 123 | "\n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
MntWinesMntFruitsMntMeatProductsMntFishProductsMntSweetProductsMntGoldProds
Marital_Status
Absurd355.50000084.500000312.500000205.50000030.500000204.000000
Alone184.6666674.00000026.3333337.6666677.00000027.000000
Divorced324.84482827.426724150.20689735.04310326.81896646.288793
Married299.48032425.734954160.68171335.38078726.70138942.822917
Single288.33125026.835417182.10833338.21666727.26250043.729167
Together306.82586225.350000168.10344838.99137926.12241442.994828
Widow369.27272733.090909189.28571451.38961039.01298756.766234
YOLO322.0000003.00000050.0000004.0000003.00000042.000000
\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | " MntWines MntFruits MntMeatProducts MntFishProducts \\\n", 223 | "Marital_Status \n", 224 | "Absurd 355.500000 84.500000 312.500000 205.500000 \n", 225 | "Alone 184.666667 4.000000 26.333333 7.666667 \n", 226 | "Divorced 324.844828 27.426724 150.206897 35.043103 \n", 227 | "Married 299.480324 25.734954 160.681713 35.380787 \n", 228 | "Single 288.331250 26.835417 182.108333 38.216667 \n", 229 | "Together 306.825862 25.350000 168.103448 38.991379 \n", 230 | "Widow 369.272727 33.090909 189.285714 51.389610 \n", 231 | "YOLO 322.000000 3.000000 50.000000 4.000000 \n", 232 | "\n", 233 | " MntSweetProducts MntGoldProds \n", 234 | "Marital_Status \n", 235 | "Absurd 30.500000 204.000000 \n", 236 | "Alone 7.000000 27.000000 \n", 237 | "Divorced 26.818966 46.288793 \n", 238 | "Married 26.701389 42.822917 \n", 239 | "Single 27.262500 43.729167 \n", 240 | "Together 26.122414 42.994828 \n", 241 | "Widow 39.012987 56.766234 \n", 242 | "YOLO 3.000000 42.000000 " 243 | ] 244 | }, 245 | "execution_count": 8, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.groupby(['Marital_Status']).mean()[amt_bought]" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "16f357e8", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "a1300044", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "id": "93206f7c", 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | } 278 | ], 279 | "metadata": { 280 | "kernelspec": { 281 | "display_name": "Python 3 (ipykernel)", 282 | "language": "python", 283 | "name": "python3" 284 | }, 285 | "language_info": { 286 | "codemirror_mode": { 287 | "name": "ipython", 288 | "version": 3 289 | }, 290 | "file_extension": ".py", 291 | "mimetype": "text/x-python", 292 | "name": "python", 293 | "nbconvert_exporter": "python", 294 | "pygments_lexer": "ipython3", 295 | "version": "3.9.6" 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 5 300 | } 301 | -------------------------------------------------------------------------------- /05_01_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ee7e7699", 6 | "metadata": {}, 7 | "source": [ 8 | "# Grouping data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "29452d05", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "dc791129", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "\n", 32 | "RangeIndex: 2240 entries, 0 to 2239\n", 33 | "Data columns (total 29 columns):\n", 34 | " # Column Non-Null Count Dtype \n", 35 | "--- ------ -------------- ----- \n", 36 | " 0 ID 2240 non-null int64 \n", 37 | " 1 Year_Birth 2240 non-null int64 \n", 38 | " 2 Education 2240 non-null object \n", 39 | " 3 Marital_Status 2240 non-null object \n", 40 | " 4 Income 2216 non-null float64\n", 41 | " 5 Kidhome 2240 non-null int64 \n", 42 | " 6 Teenhome 2240 non-null int64 \n", 43 | " 7 Dt_Customer 2240 non-null object \n", 44 | " 8 Recency 2240 non-null int64 \n", 45 | " 9 MntWines 2240 non-null int64 \n", 46 | " 10 MntFruits 2240 non-null int64 \n", 47 | " 11 MntMeatProducts 2240 non-null int64 \n", 48 | " 12 MntFishProducts 2240 non-null int64 \n", 49 | " 13 MntSweetProducts 2240 non-null int64 \n", 50 | " 14 MntGoldProds 2240 non-null int64 \n", 51 | " 15 NumDealsPurchases 2240 non-null int64 \n", 52 | " 16 NumWebPurchases 2240 non-null int64 \n", 53 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 54 | " 18 NumStorePurchases 2240 non-null int64 \n", 55 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 56 | " 20 AcceptedCmp3 2240 non-null int64 \n", 57 | " 21 AcceptedCmp4 2240 non-null int64 \n", 58 | " 22 AcceptedCmp5 2240 non-null int64 \n", 59 | " 23 AcceptedCmp1 2240 non-null int64 \n", 60 | " 24 AcceptedCmp2 2240 non-null int64 \n", 61 | " 25 Complain 2240 non-null int64 \n", 62 | " 26 Z_CostContact 2240 non-null int64 \n", 63 | " 27 Z_Revenue 2240 non-null int64 \n", 64 | " 28 Response 2240 non-null int64 \n", 65 | "dtypes: float64(1), int64(25), object(3)\n", 66 | "memory usage: 507.6+ KB\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 72 | "df.info()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "a39d6a11", 78 | "metadata": {}, 79 | "source": [ 80 | "## Average amount of each product bought by each group of customers" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 7, 86 | "id": "178450b4", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "amt_bought = [\n", 91 | " 'MntWines',\n", 92 | " 'MntFruits',\n", 93 | " 'MntMeatProducts',\n", 94 | " 'MntFishProducts',\n", 95 | " 'MntSweetProducts',\n", 96 | " 'MntGoldProds'\n", 97 | "]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 8, 103 | "id": "f886b74a", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/html": [ 109 | "
\n", 110 | "\n", 123 | "\n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
MntWinesMntFruitsMntMeatProductsMntFishProductsMntSweetProductsMntGoldProds
Marital_Status
Absurd355.50000084.500000312.500000205.50000030.500000204.000000
Alone184.6666674.00000026.3333337.6666677.00000027.000000
Divorced324.84482827.426724150.20689735.04310326.81896646.288793
Married299.48032425.734954160.68171335.38078726.70138942.822917
Single288.33125026.835417182.10833338.21666727.26250043.729167
Together306.82586225.350000168.10344838.99137926.12241442.994828
Widow369.27272733.090909189.28571451.38961039.01298756.766234
YOLO322.0000003.00000050.0000004.0000003.00000042.000000
\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | " MntWines MntFruits MntMeatProducts MntFishProducts \\\n", 223 | "Marital_Status \n", 224 | "Absurd 355.500000 84.500000 312.500000 205.500000 \n", 225 | "Alone 184.666667 4.000000 26.333333 7.666667 \n", 226 | "Divorced 324.844828 27.426724 150.206897 35.043103 \n", 227 | "Married 299.480324 25.734954 160.681713 35.380787 \n", 228 | "Single 288.331250 26.835417 182.108333 38.216667 \n", 229 | "Together 306.825862 25.350000 168.103448 38.991379 \n", 230 | "Widow 369.272727 33.090909 189.285714 51.389610 \n", 231 | "YOLO 322.000000 3.000000 50.000000 4.000000 \n", 232 | "\n", 233 | " MntSweetProducts MntGoldProds \n", 234 | "Marital_Status \n", 235 | "Absurd 30.500000 204.000000 \n", 236 | "Alone 7.000000 27.000000 \n", 237 | "Divorced 26.818966 46.288793 \n", 238 | "Married 26.701389 42.822917 \n", 239 | "Single 27.262500 43.729167 \n", 240 | "Together 26.122414 42.994828 \n", 241 | "Widow 39.012987 56.766234 \n", 242 | "YOLO 3.000000 42.000000 " 243 | ] 244 | }, 245 | "execution_count": 8, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.groupby(['Marital_Status']).mean()[amt_bought]" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "16f357e8", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "a1300044", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "id": "93206f7c", 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | } 278 | ], 279 | "metadata": { 280 | "kernelspec": { 281 | "display_name": "Python 3 (ipykernel)", 282 | "language": "python", 283 | "name": "python3" 284 | }, 285 | "language_info": { 286 | "codemirror_mode": { 287 | "name": "ipython", 288 | "version": 3 289 | }, 290 | "file_extension": ".py", 291 | "mimetype": "text/x-python", 292 | "name": "python", 293 | "nbconvert_exporter": "python", 294 | "pygments_lexer": "ipython3", 295 | "version": "3.9.6" 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 5 300 | } 301 | -------------------------------------------------------------------------------- /05_02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ee7e7699", 6 | "metadata": {}, 7 | "source": [ 8 | "# Grouping data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 2, 14 | "id": "29452d05", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "id": "dc791129", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "\n", 32 | "RangeIndex: 2240 entries, 0 to 2239\n", 33 | "Data columns (total 29 columns):\n", 34 | " # Column Non-Null Count Dtype \n", 35 | "--- ------ -------------- ----- \n", 36 | " 0 ID 2240 non-null int64 \n", 37 | " 1 Year_Birth 2240 non-null int64 \n", 38 | " 2 Education 2240 non-null object \n", 39 | " 3 Marital_Status 2240 non-null object \n", 40 | " 4 Income 2216 non-null float64\n", 41 | " 5 Kidhome 2240 non-null int64 \n", 42 | " 6 Teenhome 2240 non-null int64 \n", 43 | " 7 Dt_Customer 2240 non-null object \n", 44 | " 8 Recency 2240 non-null int64 \n", 45 | " 9 MntWines 2240 non-null int64 \n", 46 | " 10 MntFruits 2240 non-null int64 \n", 47 | " 11 MntMeatProducts 2240 non-null int64 \n", 48 | " 12 MntFishProducts 2240 non-null int64 \n", 49 | " 13 MntSweetProducts 2240 non-null int64 \n", 50 | " 14 MntGoldProds 2240 non-null int64 \n", 51 | " 15 NumDealsPurchases 2240 non-null int64 \n", 52 | " 16 NumWebPurchases 2240 non-null int64 \n", 53 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 54 | " 18 NumStorePurchases 2240 non-null int64 \n", 55 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 56 | " 20 AcceptedCmp3 2240 non-null int64 \n", 57 | " 21 AcceptedCmp4 2240 non-null int64 \n", 58 | " 22 AcceptedCmp5 2240 non-null int64 \n", 59 | " 23 AcceptedCmp1 2240 non-null int64 \n", 60 | " 24 AcceptedCmp2 2240 non-null int64 \n", 61 | " 25 Complain 2240 non-null int64 \n", 62 | " 26 Z_CostContact 2240 non-null int64 \n", 63 | " 27 Z_Revenue 2240 non-null int64 \n", 64 | " 28 Response 2240 non-null int64 \n", 65 | "dtypes: float64(1), int64(25), object(3)\n", 66 | "memory usage: 507.6+ KB\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 72 | "df.info()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "a39d6a11", 78 | "metadata": {}, 79 | "source": [ 80 | "## Average amount of each product bought by each group of customers" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "id": "178450b4", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "amt_bought = [\n", 91 | " 'MntWines',\n", 92 | " 'MntFruits',\n", 93 | " 'MntMeatProducts',\n", 94 | " 'MntFishProducts',\n", 95 | " 'MntSweetProducts',\n", 96 | " 'MntGoldProds'\n", 97 | "]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "f886b74a", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/html": [ 109 | "
\n", 110 | "\n", 123 | "\n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
MntWinesMntFruitsMntMeatProductsMntFishProductsMntSweetProductsMntGoldProds
Marital_Status
Absurd355.50000084.500000312.500000205.50000030.500000204.000000
Alone184.6666674.00000026.3333337.6666677.00000027.000000
Divorced324.84482827.426724150.20689735.04310326.81896646.288793
Married299.48032425.734954160.68171335.38078726.70138942.822917
Single288.33125026.835417182.10833338.21666727.26250043.729167
Together306.82586225.350000168.10344838.99137926.12241442.994828
Widow369.27272733.090909189.28571451.38961039.01298756.766234
YOLO322.0000003.00000050.0000004.0000003.00000042.000000
\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | " MntWines MntFruits MntMeatProducts MntFishProducts \\\n", 223 | "Marital_Status \n", 224 | "Absurd 355.500000 84.500000 312.500000 205.500000 \n", 225 | "Alone 184.666667 4.000000 26.333333 7.666667 \n", 226 | "Divorced 324.844828 27.426724 150.206897 35.043103 \n", 227 | "Married 299.480324 25.734954 160.681713 35.380787 \n", 228 | "Single 288.331250 26.835417 182.108333 38.216667 \n", 229 | "Together 306.825862 25.350000 168.103448 38.991379 \n", 230 | "Widow 369.272727 33.090909 189.285714 51.389610 \n", 231 | "YOLO 322.000000 3.000000 50.000000 4.000000 \n", 232 | "\n", 233 | " MntSweetProducts MntGoldProds \n", 234 | "Marital_Status \n", 235 | "Absurd 30.500000 204.000000 \n", 236 | "Alone 7.000000 27.000000 \n", 237 | "Divorced 26.818966 46.288793 \n", 238 | "Married 26.701389 42.822917 \n", 239 | "Single 27.262500 43.729167 \n", 240 | "Together 26.122414 42.994828 \n", 241 | "Widow 39.012987 56.766234 \n", 242 | "YOLO 3.000000 42.000000 " 243 | ] 244 | }, 245 | "execution_count": 5, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.groupby(['Marital_Status']).mean()[amt_bought]" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "e628ab39", 257 | "metadata": {}, 258 | "source": [ 259 | "## Grouping by multiple columns" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 6, 265 | "id": "93206f7c", 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/html": [ 271 | "
\n", 272 | "\n", 285 | "\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | "
medianmean
EducationMarital_Status
2n CycleDivorced49118.049395.130435
Married46462.546201.100000
Single48668.553673.944444
Together45774.044736.410714
Widow47682.051392.200000
BasicDivorced9548.09548.000000
Married22352.021960.500000
Single16383.018238.666667
Together23179.021240.071429
Widow22123.022123.000000
GraduationAbsurd79244.079244.000000
Alone34176.034176.000000
Divorced55635.054526.042017
Married50737.050800.258741
Single49973.551322.182927
Together53977.055758.480702
Widow58275.054976.657143
MasterAbsurd65487.065487.000000
Alone61331.061331.000000
Divorced49476.050331.945946
Married53088.553286.028986
Single49494.053530.560000
Together49736.052109.009804
Widow51529.058401.545455
PhDAlone35860.035860.000000
Divorced50613.553096.615385
Married57081.558138.031579
Single50198.053314.614583
Together56756.056041.422414
Widow57032.060288.083333
YOLO48432.048432.000000
\n", 463 | "
" 464 | ], 465 | "text/plain": [ 466 | " median mean\n", 467 | "Education Marital_Status \n", 468 | "2n Cycle Divorced 49118.0 49395.130435\n", 469 | " Married 46462.5 46201.100000\n", 470 | " Single 48668.5 53673.944444\n", 471 | " Together 45774.0 44736.410714\n", 472 | " Widow 47682.0 51392.200000\n", 473 | "Basic Divorced 9548.0 9548.000000\n", 474 | " Married 22352.0 21960.500000\n", 475 | " Single 16383.0 18238.666667\n", 476 | " Together 23179.0 21240.071429\n", 477 | " Widow 22123.0 22123.000000\n", 478 | "Graduation Absurd 79244.0 79244.000000\n", 479 | " Alone 34176.0 34176.000000\n", 480 | " Divorced 55635.0 54526.042017\n", 481 | " Married 50737.0 50800.258741\n", 482 | " Single 49973.5 51322.182927\n", 483 | " Together 53977.0 55758.480702\n", 484 | " Widow 58275.0 54976.657143\n", 485 | "Master Absurd 65487.0 65487.000000\n", 486 | " Alone 61331.0 61331.000000\n", 487 | " Divorced 49476.0 50331.945946\n", 488 | " Married 53088.5 53286.028986\n", 489 | " Single 49494.0 53530.560000\n", 490 | " Together 49736.0 52109.009804\n", 491 | " Widow 51529.0 58401.545455\n", 492 | "PhD Alone 35860.0 35860.000000\n", 493 | " Divorced 50613.5 53096.615385\n", 494 | " Married 57081.5 58138.031579\n", 495 | " Single 50198.0 53314.614583\n", 496 | " Together 56756.0 56041.422414\n", 497 | " Widow 57032.0 60288.083333\n", 498 | " YOLO 48432.0 48432.000000" 499 | ] 500 | }, 501 | "execution_count": 6, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "df.groupby(['Education', 'Marital_Status'])['Income'].agg(['median', 'mean'])" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "id": "f89e98ee", 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "id": "8ba0901a", 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "id": "78d4b0c5", 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [] 533 | } 534 | ], 535 | "metadata": { 536 | "kernelspec": { 537 | "display_name": "Python 3 (ipykernel)", 538 | "language": "python", 539 | "name": "python3" 540 | }, 541 | "language_info": { 542 | "codemirror_mode": { 543 | "name": "ipython", 544 | "version": 3 545 | }, 546 | "file_extension": ".py", 547 | "mimetype": "text/x-python", 548 | "name": "python", 549 | "nbconvert_exporter": "python", 550 | "pygments_lexer": "ipython3", 551 | "version": "3.9.6" 552 | } 553 | }, 554 | "nbformat": 4, 555 | "nbformat_minor": 5 556 | } 557 | -------------------------------------------------------------------------------- /05_02_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ee7e7699", 6 | "metadata": {}, 7 | "source": [ 8 | "# Grouping data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 2, 14 | "id": "29452d05", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "id": "dc791129", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "\n", 32 | "RangeIndex: 2240 entries, 0 to 2239\n", 33 | "Data columns (total 29 columns):\n", 34 | " # Column Non-Null Count Dtype \n", 35 | "--- ------ -------------- ----- \n", 36 | " 0 ID 2240 non-null int64 \n", 37 | " 1 Year_Birth 2240 non-null int64 \n", 38 | " 2 Education 2240 non-null object \n", 39 | " 3 Marital_Status 2240 non-null object \n", 40 | " 4 Income 2216 non-null float64\n", 41 | " 5 Kidhome 2240 non-null int64 \n", 42 | " 6 Teenhome 2240 non-null int64 \n", 43 | " 7 Dt_Customer 2240 non-null object \n", 44 | " 8 Recency 2240 non-null int64 \n", 45 | " 9 MntWines 2240 non-null int64 \n", 46 | " 10 MntFruits 2240 non-null int64 \n", 47 | " 11 MntMeatProducts 2240 non-null int64 \n", 48 | " 12 MntFishProducts 2240 non-null int64 \n", 49 | " 13 MntSweetProducts 2240 non-null int64 \n", 50 | " 14 MntGoldProds 2240 non-null int64 \n", 51 | " 15 NumDealsPurchases 2240 non-null int64 \n", 52 | " 16 NumWebPurchases 2240 non-null int64 \n", 53 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 54 | " 18 NumStorePurchases 2240 non-null int64 \n", 55 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 56 | " 20 AcceptedCmp3 2240 non-null int64 \n", 57 | " 21 AcceptedCmp4 2240 non-null int64 \n", 58 | " 22 AcceptedCmp5 2240 non-null int64 \n", 59 | " 23 AcceptedCmp1 2240 non-null int64 \n", 60 | " 24 AcceptedCmp2 2240 non-null int64 \n", 61 | " 25 Complain 2240 non-null int64 \n", 62 | " 26 Z_CostContact 2240 non-null int64 \n", 63 | " 27 Z_Revenue 2240 non-null int64 \n", 64 | " 28 Response 2240 non-null int64 \n", 65 | "dtypes: float64(1), int64(25), object(3)\n", 66 | "memory usage: 507.6+ KB\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 72 | "df.info()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "a39d6a11", 78 | "metadata": {}, 79 | "source": [ 80 | "## Average amount of each product bought by each group of customers" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "id": "178450b4", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "amt_bought = [\n", 91 | " 'MntWines',\n", 92 | " 'MntFruits',\n", 93 | " 'MntMeatProducts',\n", 94 | " 'MntFishProducts',\n", 95 | " 'MntSweetProducts',\n", 96 | " 'MntGoldProds'\n", 97 | "]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "f886b74a", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/html": [ 109 | "
\n", 110 | "\n", 123 | "\n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
MntWinesMntFruitsMntMeatProductsMntFishProductsMntSweetProductsMntGoldProds
Marital_Status
Absurd355.50000084.500000312.500000205.50000030.500000204.000000
Alone184.6666674.00000026.3333337.6666677.00000027.000000
Divorced324.84482827.426724150.20689735.04310326.81896646.288793
Married299.48032425.734954160.68171335.38078726.70138942.822917
Single288.33125026.835417182.10833338.21666727.26250043.729167
Together306.82586225.350000168.10344838.99137926.12241442.994828
Widow369.27272733.090909189.28571451.38961039.01298756.766234
YOLO322.0000003.00000050.0000004.0000003.00000042.000000
\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | " MntWines MntFruits MntMeatProducts MntFishProducts \\\n", 223 | "Marital_Status \n", 224 | "Absurd 355.500000 84.500000 312.500000 205.500000 \n", 225 | "Alone 184.666667 4.000000 26.333333 7.666667 \n", 226 | "Divorced 324.844828 27.426724 150.206897 35.043103 \n", 227 | "Married 299.480324 25.734954 160.681713 35.380787 \n", 228 | "Single 288.331250 26.835417 182.108333 38.216667 \n", 229 | "Together 306.825862 25.350000 168.103448 38.991379 \n", 230 | "Widow 369.272727 33.090909 189.285714 51.389610 \n", 231 | "YOLO 322.000000 3.000000 50.000000 4.000000 \n", 232 | "\n", 233 | " MntSweetProducts MntGoldProds \n", 234 | "Marital_Status \n", 235 | "Absurd 30.500000 204.000000 \n", 236 | "Alone 7.000000 27.000000 \n", 237 | "Divorced 26.818966 46.288793 \n", 238 | "Married 26.701389 42.822917 \n", 239 | "Single 27.262500 43.729167 \n", 240 | "Together 26.122414 42.994828 \n", 241 | "Widow 39.012987 56.766234 \n", 242 | "YOLO 3.000000 42.000000 " 243 | ] 244 | }, 245 | "execution_count": 5, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.groupby(['Marital_Status']).mean()[amt_bought]" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "5e1c894c", 257 | "metadata": {}, 258 | "source": [ 259 | "## Grouping by multiple columns" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "d3bf469b", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "id": "7b9fb703", 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "id": "73a62425", 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3 (ipykernel)", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.9.6" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 5 308 | } 309 | -------------------------------------------------------------------------------- /05_03_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ee7e7699", 6 | "metadata": {}, 7 | "source": [ 8 | "# Grouping data" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "29452d05", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "dc791129", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "\n", 32 | "RangeIndex: 2240 entries, 0 to 2239\n", 33 | "Data columns (total 29 columns):\n", 34 | " # Column Non-Null Count Dtype \n", 35 | "--- ------ -------------- ----- \n", 36 | " 0 ID 2240 non-null int64 \n", 37 | " 1 Year_Birth 2240 non-null int64 \n", 38 | " 2 Education 2240 non-null object \n", 39 | " 3 Marital_Status 2240 non-null object \n", 40 | " 4 Income 2216 non-null float64\n", 41 | " 5 Kidhome 2240 non-null int64 \n", 42 | " 6 Teenhome 2240 non-null int64 \n", 43 | " 7 Dt_Customer 2240 non-null object \n", 44 | " 8 Recency 2240 non-null int64 \n", 45 | " 9 MntWines 2240 non-null int64 \n", 46 | " 10 MntFruits 2240 non-null int64 \n", 47 | " 11 MntMeatProducts 2240 non-null int64 \n", 48 | " 12 MntFishProducts 2240 non-null int64 \n", 49 | " 13 MntSweetProducts 2240 non-null int64 \n", 50 | " 14 MntGoldProds 2240 non-null int64 \n", 51 | " 15 NumDealsPurchases 2240 non-null int64 \n", 52 | " 16 NumWebPurchases 2240 non-null int64 \n", 53 | " 17 NumCatalogPurchases 2240 non-null int64 \n", 54 | " 18 NumStorePurchases 2240 non-null int64 \n", 55 | " 19 NumWebVisitsMonth 2240 non-null int64 \n", 56 | " 20 AcceptedCmp3 2240 non-null int64 \n", 57 | " 21 AcceptedCmp4 2240 non-null int64 \n", 58 | " 22 AcceptedCmp5 2240 non-null int64 \n", 59 | " 23 AcceptedCmp1 2240 non-null int64 \n", 60 | " 24 AcceptedCmp2 2240 non-null int64 \n", 61 | " 25 Complain 2240 non-null int64 \n", 62 | " 26 Z_CostContact 2240 non-null int64 \n", 63 | " 27 Z_Revenue 2240 non-null int64 \n", 64 | " 28 Response 2240 non-null int64 \n", 65 | "dtypes: float64(1), int64(25), object(3)\n", 66 | "memory usage: 507.6+ KB\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "df = pd.read_csv(\"./data/marketing_campaign.csv\", sep='\\t')\n", 72 | "df.info()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "a39d6a11", 78 | "metadata": {}, 79 | "source": [ 80 | "## Average amount of each product bought by each group of customers" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "id": "178450b4", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "amt_bought = [\n", 91 | " 'MntWines',\n", 92 | " 'MntFruits',\n", 93 | " 'MntMeatProducts',\n", 94 | " 'MntFishProducts',\n", 95 | " 'MntSweetProducts',\n", 96 | " 'MntGoldProds'\n", 97 | "]" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 4, 103 | "id": "f886b74a", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/html": [ 109 | "
\n", 110 | "\n", 123 | "\n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
MntWinesMntFruitsMntMeatProductsMntFishProductsMntSweetProductsMntGoldProds
Marital_Status
Absurd355.50000084.500000312.500000205.50000030.500000204.000000
Alone184.6666674.00000026.3333337.6666677.00000027.000000
Divorced324.84482827.426724150.20689735.04310326.81896646.288793
Married299.48032425.734954160.68171335.38078726.70138942.822917
Single288.33125026.835417182.10833338.21666727.26250043.729167
Together306.82586225.350000168.10344838.99137926.12241442.994828
Widow369.27272733.090909189.28571451.38961039.01298756.766234
YOLO322.0000003.00000050.0000004.0000003.00000042.000000
\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | " MntWines MntFruits MntMeatProducts MntFishProducts \\\n", 223 | "Marital_Status \n", 224 | "Absurd 355.500000 84.500000 312.500000 205.500000 \n", 225 | "Alone 184.666667 4.000000 26.333333 7.666667 \n", 226 | "Divorced 324.844828 27.426724 150.206897 35.043103 \n", 227 | "Married 299.480324 25.734954 160.681713 35.380787 \n", 228 | "Single 288.331250 26.835417 182.108333 38.216667 \n", 229 | "Together 306.825862 25.350000 168.103448 38.991379 \n", 230 | "Widow 369.272727 33.090909 189.285714 51.389610 \n", 231 | "YOLO 322.000000 3.000000 50.000000 4.000000 \n", 232 | "\n", 233 | " MntSweetProducts MntGoldProds \n", 234 | "Marital_Status \n", 235 | "Absurd 30.500000 204.000000 \n", 236 | "Alone 7.000000 27.000000 \n", 237 | "Divorced 26.818966 46.288793 \n", 238 | "Married 26.701389 42.822917 \n", 239 | "Single 27.262500 43.729167 \n", 240 | "Together 26.122414 42.994828 \n", 241 | "Widow 39.012987 56.766234 \n", 242 | "YOLO 3.000000 42.000000 " 243 | ] 244 | }, 245 | "execution_count": 4, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.groupby(['Marital_Status']).mean()[amt_bought]" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "e628ab39", 257 | "metadata": {}, 258 | "source": [ 259 | "## Grouping by multiple columns" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 5, 265 | "id": "93206f7c", 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/html": [ 271 | "
\n", 272 | "\n", 285 | "\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | "
medianmean
EducationMarital_Status
2n CycleDivorced49118.049395.130435
Married46462.546201.100000
Single48668.553673.944444
Together45774.044736.410714
Widow47682.051392.200000
BasicDivorced9548.09548.000000
Married22352.021960.500000
Single16383.018238.666667
Together23179.021240.071429
Widow22123.022123.000000
GraduationAbsurd79244.079244.000000
Alone34176.034176.000000
Divorced55635.054526.042017
Married50737.050800.258741
Single49973.551322.182927
Together53977.055758.480702
Widow58275.054976.657143
MasterAbsurd65487.065487.000000
Alone61331.061331.000000
Divorced49476.050331.945946
Married53088.553286.028986
Single49494.053530.560000
Together49736.052109.009804
Widow51529.058401.545455
PhDAlone35860.035860.000000
Divorced50613.553096.615385
Married57081.558138.031579
Single50198.053314.614583
Together56756.056041.422414
Widow57032.060288.083333
YOLO48432.048432.000000
\n", 463 | "
" 464 | ], 465 | "text/plain": [ 466 | " median mean\n", 467 | "Education Marital_Status \n", 468 | "2n Cycle Divorced 49118.0 49395.130435\n", 469 | " Married 46462.5 46201.100000\n", 470 | " Single 48668.5 53673.944444\n", 471 | " Together 45774.0 44736.410714\n", 472 | " Widow 47682.0 51392.200000\n", 473 | "Basic Divorced 9548.0 9548.000000\n", 474 | " Married 22352.0 21960.500000\n", 475 | " Single 16383.0 18238.666667\n", 476 | " Together 23179.0 21240.071429\n", 477 | " Widow 22123.0 22123.000000\n", 478 | "Graduation Absurd 79244.0 79244.000000\n", 479 | " Alone 34176.0 34176.000000\n", 480 | " Divorced 55635.0 54526.042017\n", 481 | " Married 50737.0 50800.258741\n", 482 | " Single 49973.5 51322.182927\n", 483 | " Together 53977.0 55758.480702\n", 484 | " Widow 58275.0 54976.657143\n", 485 | "Master Absurd 65487.0 65487.000000\n", 486 | " Alone 61331.0 61331.000000\n", 487 | " Divorced 49476.0 50331.945946\n", 488 | " Married 53088.5 53286.028986\n", 489 | " Single 49494.0 53530.560000\n", 490 | " Together 49736.0 52109.009804\n", 491 | " Widow 51529.0 58401.545455\n", 492 | "PhD Alone 35860.0 35860.000000\n", 493 | " Divorced 50613.5 53096.615385\n", 494 | " Married 57081.5 58138.031579\n", 495 | " Single 50198.0 53314.614583\n", 496 | " Together 56756.0 56041.422414\n", 497 | " Widow 57032.0 60288.083333\n", 498 | " YOLO 48432.0 48432.000000" 499 | ] 500 | }, 501 | "execution_count": 5, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "df.groupby(['Education', 'Marital_Status'])['Income'].agg(['median', 'mean'])" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "id": "bdd1ea6d", 513 | "metadata": {}, 514 | "source": [ 515 | "## Applying a custom aggregate function" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "id": "d19009a8", 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "id": "a898789e", 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "id": "8f0b486a", 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "id": "e9d2e3a1", 546 | "metadata": {}, 547 | "outputs": [], 548 | "source": [] 549 | } 550 | ], 551 | "metadata": { 552 | "kernelspec": { 553 | "display_name": "Python 3 (ipykernel)", 554 | "language": "python", 555 | "name": "python3" 556 | }, 557 | "language_info": { 558 | "codemirror_mode": { 559 | "name": "ipython", 560 | "version": 3 561 | }, 562 | "file_extension": ".py", 563 | "mimetype": "text/x-python", 564 | "name": "python", 565 | "nbconvert_exporter": "python", 566 | "pygments_lexer": "ipython3", 567 | "version": "3.9.6" 568 | } 569 | }, 570 | "nbformat": 4, 571 | "nbformat_minor": 5 572 | } 573 | -------------------------------------------------------------------------------- /05_04_begin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "fba5b622", 6 | "metadata": {}, 7 | "source": [ 8 | "## Calculate yearly stock price returns" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "29452d05", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "id": "12cd9f70", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | "
AAPLMSFTXOMSPX
2003-01-027.4021.1129.22909.03
2003-01-037.4521.1429.24908.59
2003-01-067.4521.5229.96929.01
2003-01-077.4321.9328.95922.93
2003-01-087.2821.3128.83909.93
\n", 92 | "
" 93 | ], 94 | "text/plain": [ 95 | " AAPL MSFT XOM SPX\n", 96 | "2003-01-02 7.40 21.11 29.22 909.03\n", 97 | "2003-01-03 7.45 21.14 29.24 908.59\n", 98 | "2003-01-06 7.45 21.52 29.96 929.01\n", 99 | "2003-01-07 7.43 21.93 28.95 922.93\n", 100 | "2003-01-08 7.28 21.31 28.83 909.93" 101 | ] 102 | }, 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "stocks = pd.read_csv(\"./data/stock_data.csv\",\n", 110 | " index_col=\"Unnamed: 0\",\n", 111 | " parse_dates=True)\n", 112 | "\n", 113 | "stocks.head()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 5, 119 | "id": "99bff4b2", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/html": [ 125 | "
\n", 126 | "\n", 139 | "\n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | "
AAPLMSFTXOMSPX
2003-01-027.4021.1129.22909.03
2003-01-037.4521.1429.24908.59
2003-01-067.4521.5229.96929.01
2003-01-077.4321.9328.95922.93
2003-01-087.2821.3128.83909.93
...............
2011-10-10388.8126.9476.281194.89
2011-10-11400.2927.0076.271195.54
2011-10-12402.1926.9677.161207.25
2011-10-13408.4327.1876.371203.66
2011-10-14422.0027.2778.111224.58
\n", 229 | "

2214 rows × 4 columns

\n", 230 | "
" 231 | ], 232 | "text/plain": [ 233 | " AAPL MSFT XOM SPX\n", 234 | "2003-01-02 7.40 21.11 29.22 909.03\n", 235 | "2003-01-03 7.45 21.14 29.24 908.59\n", 236 | "2003-01-06 7.45 21.52 29.96 929.01\n", 237 | "2003-01-07 7.43 21.93 28.95 922.93\n", 238 | "2003-01-08 7.28 21.31 28.83 909.93\n", 239 | "... ... ... ... ...\n", 240 | "2011-10-10 388.81 26.94 76.28 1194.89\n", 241 | "2011-10-11 400.29 27.00 76.27 1195.54\n", 242 | "2011-10-12 402.19 26.96 77.16 1207.25\n", 243 | "2011-10-13 408.43 27.18 76.37 1203.66\n", 244 | "2011-10-14 422.00 27.27 78.11 1224.58\n", 245 | "\n", 246 | "[2214 rows x 4 columns]" 247 | ] 248 | }, 249 | "execution_count": 5, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "stocks" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 6, 261 | "id": "e4791f69", 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/html": [ 267 | "
\n", 268 | "\n", 281 | "\n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | "
AAPLMSFTXOMSPX
2003-01-030.0067570.0014210.000684-0.000484
2003-01-060.0000000.0179750.0246240.022474
2003-01-07-0.0026850.019052-0.033712-0.006545
2003-01-08-0.020188-0.028272-0.004145-0.014086
2003-01-090.0082420.0290940.0211590.019386
...............
2011-10-100.0514060.0262860.0369770.034125
2011-10-110.0295260.002227-0.0001310.000544
2011-10-120.004747-0.0014810.0116690.009795
2011-10-130.0155150.008160-0.010238-0.002974
2011-10-140.0332250.0033110.0227840.017380
\n", 371 | "

2213 rows × 4 columns

\n", 372 | "
" 373 | ], 374 | "text/plain": [ 375 | " AAPL MSFT XOM SPX\n", 376 | "2003-01-03 0.006757 0.001421 0.000684 -0.000484\n", 377 | "2003-01-06 0.000000 0.017975 0.024624 0.022474\n", 378 | "2003-01-07 -0.002685 0.019052 -0.033712 -0.006545\n", 379 | "2003-01-08 -0.020188 -0.028272 -0.004145 -0.014086\n", 380 | "2003-01-09 0.008242 0.029094 0.021159 0.019386\n", 381 | "... ... ... ... ...\n", 382 | "2011-10-10 0.051406 0.026286 0.036977 0.034125\n", 383 | "2011-10-11 0.029526 0.002227 -0.000131 0.000544\n", 384 | "2011-10-12 0.004747 -0.001481 0.011669 0.009795\n", 385 | "2011-10-13 0.015515 0.008160 -0.010238 -0.002974\n", 386 | "2011-10-14 0.033225 0.003311 0.022784 0.017380\n", 387 | "\n", 388 | "[2213 rows x 4 columns]" 389 | ] 390 | }, 391 | "execution_count": 6, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "##calculate daily return\n", 398 | "\n", 399 | "rets = stocks.pct_change().dropna()\n", 400 | "rets" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 8, 406 | "id": "f2194d0a", 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "data": { 411 | "text/html": [ 412 | "
\n", 413 | "\n", 426 | "\n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | "
AAPLMSFTXOMSPX
200343.5848726.89866418.77167621.552070
2004118.44221810.18502125.9433589.226193
200587.9453290.09516213.8372453.483644
200623.83239616.85925934.84580113.269720
200791.96579521.52935824.5684724.740316
2008-66.939672-47.032987-0.880614-40.145891
200996.23338654.235841-10.09584824.783483
201046.161592-4.33262811.26374213.656375
201129.5849611.78425610.903390-0.663912
\n", 502 | "
" 503 | ], 504 | "text/plain": [ 505 | " AAPL MSFT XOM SPX\n", 506 | "2003 43.584872 6.898664 18.771676 21.552070\n", 507 | "2004 118.442218 10.185021 25.943358 9.226193\n", 508 | "2005 87.945329 0.095162 13.837245 3.483644\n", 509 | "2006 23.832396 16.859259 34.845801 13.269720\n", 510 | "2007 91.965795 21.529358 24.568472 4.740316\n", 511 | "2008 -66.939672 -47.032987 -0.880614 -40.145891\n", 512 | "2009 96.233386 54.235841 -10.095848 24.783483\n", 513 | "2010 46.161592 -4.332628 11.263742 13.656375\n", 514 | "2011 29.584961 1.784256 10.903390 -0.663912" 515 | ] 516 | }, 517 | "execution_count": 8, 518 | "metadata": {}, 519 | "output_type": "execute_result" 520 | } 521 | ], 522 | "source": [ 523 | "get_year = lambda x: x.year\n", 524 | "by_year_stocks = rets.groupby(get_year).sum()*100\n", 525 | "by_year_stocks" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": null, 531 | "id": "c6e304d8", 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "id": "63a611fa", 540 | "metadata": {}, 541 | "outputs": [], 542 | "source": [] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": null, 547 | "id": "1e844c8d", 548 | "metadata": {}, 549 | "outputs": [], 550 | "source": [] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "id": "0de20c8c", 556 | "metadata": {}, 557 | "outputs": [], 558 | "source": [] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "id": "06e54a34", 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": null, 571 | "id": "ed650974", 572 | "metadata": {}, 573 | "outputs": [], 574 | "source": [] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": null, 579 | "id": "0673c1ed", 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [] 583 | } 584 | ], 585 | "metadata": { 586 | "kernelspec": { 587 | "display_name": "Python 3 (ipykernel)", 588 | "language": "python", 589 | "name": "python3" 590 | }, 591 | "language_info": { 592 | "codemirror_mode": { 593 | "name": "ipython", 594 | "version": 3 595 | }, 596 | "file_extension": ".py", 597 | "mimetype": "text/x-python", 598 | "name": "python", 599 | "nbconvert_exporter": "python", 600 | "pygments_lexer": "ipython3", 601 | "version": "3.9.6" 602 | } 603 | }, 604 | "nbformat": 4, 605 | "nbformat_minor": 5 606 | } 607 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | Contribution Agreement 3 | ====================== 4 | 5 | This repository does not accept pull requests (PRs). All pull requests will be closed. 6 | 7 | However, if any contributions (through pull requests, issues, feedback or otherwise) are provided, as a contributor, you represent that the code you submit is your original work or that of your employer (in which case you represent you have the right to bind your employer). By submitting code (or otherwise providing feedback), you (and, if applicable, your employer) are licensing the submitted code (and/or feedback) to LinkedIn and the open source community subject to the BSD 2-Clause license. 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LinkedIn Learning Exercise Files License Agreement 2 | ================================================== 3 | 4 | This License Agreement (the "Agreement") is a binding legal agreement 5 | between you (as an individual or entity, as applicable) and LinkedIn 6 | Corporation (“LinkedIn”). By downloading or using the LinkedIn Learning 7 | exercise files in this repository (“Licensed Materials”), you agree to 8 | be bound by the terms of this Agreement. If you do not agree to these 9 | terms, do not download or use the Licensed Materials. 10 | 11 | 1. License. 12 | - a. Subject to the terms of this Agreement, LinkedIn hereby grants LinkedIn 13 | members during their LinkedIn Learning subscription a non-exclusive, 14 | non-transferable copyright license, for internal use only, to 1) make a 15 | reasonable number of copies of the Licensed Materials, and 2) make 16 | derivative works of the Licensed Materials for the sole purpose of 17 | practicing skills taught in LinkedIn Learning courses. 18 | - b. Distribution. Unless otherwise noted in the Licensed Materials, subject 19 | to the terms of this Agreement, LinkedIn hereby grants LinkedIn members 20 | with a LinkedIn Learning subscription a non-exclusive, non-transferable 21 | copyright license to distribute the Licensed Materials, except the 22 | Licensed Materials may not be included in any product or service (or 23 | otherwise used) to instruct or educate others. 24 | 25 | 2. Restrictions and Intellectual Property. 26 | - a. You may not to use, modify, copy, make derivative works of, publish, 27 | distribute, rent, lease, sell, sublicense, assign or otherwise transfer the 28 | Licensed Materials, except as expressly set forth above in Section 1. 29 | - b. Linkedin (and its licensors) retains its intellectual property rights 30 | in the Licensed Materials. Except as expressly set forth in Section 1, 31 | LinkedIn grants no licenses. 32 | - c. You indemnify LinkedIn and its licensors and affiliates for i) any 33 | alleged infringement or misappropriation of any intellectual property rights 34 | of any third party based on modifications you make to the Licensed Materials, 35 | ii) any claims arising from your use or distribution of all or part of the 36 | Licensed Materials and iii) a breach of this Agreement. You will defend, hold 37 | harmless, and indemnify LinkedIn and its affiliates (and our and their 38 | respective employees, shareholders, and directors) from any claim or action 39 | brought by a third party, including all damages, liabilities, costs and 40 | expenses, including reasonable attorneys’ fees, to the extent resulting from, 41 | alleged to have resulted from, or in connection with: (a) your breach of your 42 | obligations herein; or (b) your use or distribution of any Licensed Materials. 43 | 44 | 3. Open source. This code may include open source software, which may be 45 | subject to other license terms as provided in the files. 46 | 47 | 4. Warranty Disclaimer. LINKEDIN PROVIDES THE LICENSED MATERIALS ON AN “AS IS” 48 | AND “AS AVAILABLE” BASIS. LINKEDIN MAKES NO REPRESENTATION OR WARRANTY, 49 | WHETHER EXPRESS OR IMPLIED, ABOUT THE LICENSED MATERIALS, INCLUDING ANY 50 | REPRESENTATION THAT THE LICENSED MATERIALS WILL BE FREE OF ERRORS, BUGS OR 51 | INTERRUPTIONS, OR THAT THE LICENSED MATERIALS ARE ACCURATE, COMPLETE OR 52 | OTHERWISE VALID. TO THE FULLEST EXTENT PERMITTED BY LAW, LINKEDIN AND ITS 53 | AFFILIATES DISCLAIM ANY IMPLIED OR STATUTORY WARRANTY OR CONDITION, INCLUDING 54 | ANY IMPLIED WARRANTY OR CONDITION OF MERCHANTABILITY OR FITNESS FOR A 55 | PARTICULAR PURPOSE, AVAILABILITY, SECURITY, TITLE AND/OR NON-INFRINGEMENT. 56 | YOUR USE OF THE LICENSED MATERIALS IS AT YOUR OWN DISCRETION AND RISK, AND 57 | YOU WILL BE SOLELY RESPONSIBLE FOR ANY DAMAGE THAT RESULTS FROM USE OF THE 58 | LICENSED MATERIALS TO YOUR COMPUTER SYSTEM OR LOSS OF DATA. NO ADVICE OR 59 | INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM US OR THROUGH OR 60 | FROM THE LICENSED MATERIALS WILL CREATE ANY WARRANTY OR CONDITION NOT 61 | EXPRESSLY STATED IN THESE TERMS. 62 | 63 | 5. Limitation of Liability. LINKEDIN SHALL NOT BE LIABLE FOR ANY INDIRECT, 64 | INCIDENTAL, SPECIAL, PUNITIVE, CONSEQUENTIAL OR EXEMPLARY DAMAGES, INCLUDING 65 | BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE, DATA OR OTHER 66 | INTANGIBLE LOSSES . IN NO EVENT WILL LINKEDIN'S AGGREGATE LIABILITY TO YOU 67 | EXCEED $100. THIS LIMITATION OF LIABILITY SHALL: 68 | - i. APPLY REGARDLESS OF WHETHER (A) YOU BASE YOUR CLAIM ON CONTRACT, TORT, 69 | STATUTE, OR ANY OTHER LEGAL THEORY, (B) WE KNEW OR SHOULD HAVE KNOWN ABOUT 70 | THE POSSIBILITY OF SUCH DAMAGES, OR (C) THE LIMITED REMEDIES PROVIDED IN THIS 71 | SECTION FAIL OF THEIR ESSENTIAL PURPOSE; AND 72 | - ii. NOT APPLY TO ANY DAMAGE THAT LINKEDIN MAY CAUSE YOU INTENTIONALLY OR 73 | KNOWINGLY IN VIOLATION OF THESE TERMS OR APPLICABLE LAW, OR AS OTHERWISE 74 | MANDATED BY APPLICABLE LAW THAT CANNOT BE DISCLAIMED IN THESE TERMS. 75 | 76 | 6. Termination. This Agreement automatically terminates upon your breach of 77 | this Agreement or termination of your LinkedIn Learning subscription. On 78 | termination, all licenses granted under this Agreement will terminate 79 | immediately and you will delete the Licensed Materials. Sections 2-7 of this 80 | Agreement survive any termination of this Agreement. LinkedIn may discontinue 81 | the availability of some or all of the Licensed Materials at any time for any 82 | reason. 83 | 84 | 7. Miscellaneous. This Agreement will be governed by and construed in 85 | accordance with the laws of the State of California without regard to conflict 86 | of laws principles. The exclusive forum for any disputes arising out of or 87 | relating to this Agreement shall be an appropriate federal or state court 88 | sitting in the County of Santa Clara, State of California. If LinkedIn does 89 | not act to enforce a breach of this Agreement, that does not mean that 90 | LinkedIn has waived its right to enforce this Agreement. The Agreement does 91 | not create a partnership, agency relationship, or joint venture between the 92 | parties. Neither party has the power or authority to bind the other or to 93 | create any obligation or responsibility on behalf of the other. You may not, 94 | without LinkedIn’s prior written consent, assign or delegate any rights or 95 | obligations under these terms, including in connection with a change of 96 | control. Any purported assignment and delegation shall be ineffective. The 97 | Agreement shall bind and inure to the benefit of the parties, their respective 98 | successors and permitted assigns. If any provision of the Agreement is 99 | unenforceable, that provision will be modified to render it enforceable to the 100 | extent possible to give effect to the parties’ intentions and the remaining 101 | provisions will not be affected. This Agreement is the only agreement between 102 | you and LinkedIn regarding the Licensed Materials, and supersedes all prior 103 | agreements relating to the Licensed Materials. 104 | 105 | Last Updated: March 2019 106 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2022 LinkedIn Corporation 2 | All Rights Reserved. 3 | 4 | Licensed under the LinkedIn Learning Exercise File License (the "License"). 5 | See LICENSE in the project root for license information. 6 | 7 | ATTRIBUTIONS: 8 | 9 | Pandas 10 | https://github.com/pandas-dev/pandas 11 | Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team 12 | Copyright (c) 2011-2021, Open source contributors. 13 | License: BSD 3-Clause 14 | https://opensource.org/licenses/BSD-3-Clause 15 | 16 | Please note, this project may automatically load third party code from external 17 | repositories (for example, NPM modules, Composer packages, or other dependencies). 18 | If so, such third party code may be subject to other license terms than as set 19 | forth above. In addition, such third party code may also depend on and load 20 | multiple tiers of dependencies. Please review the applicable licenses of the 21 | additional dependencies. 22 | 23 | =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 24 | 25 | BSD 3-Clause License 26 | 27 | Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team 28 | All rights reserved. 29 | 30 | Copyright (c) 2011-2022, Open source contributors. 31 | 32 | Redistribution and use in source and binary forms, with or without 33 | modification, are permitted provided that the following conditions are met: 34 | 35 | * Redistributions of source code must retain the above copyright notice, this 36 | list of conditions and the following disclaimer. 37 | 38 | * Redistributions in binary form must reproduce the above copyright notice, 39 | this list of conditions and the following disclaimer in the documentation 40 | and/or other materials provided with the distribution. 41 | 42 | * Neither the name of the copyright holder nor the names of its 43 | contributors may be used to endorse or promote products derived from 44 | this software without specific prior written permission. 45 | 46 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 47 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 49 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 50 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 52 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 53 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 54 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 55 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pandas Code Challenges 2 | This is the repository for the LinkedIn Learning course pandas Code Challenges. The full course is available from [LinkedIn Learning][lil-course-url]. 3 | 4 | ![pandas Code Challenges][lil-thumbnail-url] 5 | 6 | Want to test your pandas skills? These concise challenges let you stretch your brain and test your talents. Instructor Harshit Tyagi shares over a dozen pandas challenges, as well as his own solutions to each problem. Harshit’s challenges cover: Reading files and initial exploration of data using pandas attributes; data cleaning; creating subsets of data using indexing and slicing; writing queries to filter out rows based on conditional statements and Boolean indexing; and grouping and aggregation to answer categorical questions. Learn to apply statistical functions to groups. And since each challenge is self-contained, you can complete the course in any order—and at your own pace. Tune in to get the hands-on practice you need to keep your skills sharp. 7 | 8 | 9 | ### Instructor 10 | 11 | Harshit Tyagi 12 | 13 | 14 | 15 | 16 | 17 | Check out my other courses on [LinkedIn Learning](https://www.linkedin.com/learning/instructors/harshit-tyagi). 18 | 19 | [lil-course-url]: https://www.linkedin.com/learning/pandas-code-challenges 20 | [lil-thumbnail-url]: https://cdn.lynda.com/course/3017222/3017222-1648236499141-16x9.jpg 21 | -------------------------------------------------------------------------------- /data/auto_mpg.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7 2 | 18.0,8,307.0,130.0,3504.0,12.0,70,India 3 | 15.0,8,350.0,165.0,3693.0,11.5,70,India 4 | 18.0,8,318.0,150.0,3436.0,11.0,70,India 5 | 16.0,8,304.0,150.0,3433.0,12.0,70,India 6 | 17.0,8,302.0,140.0,3449.0,10.5,70,India 7 | 15.0,8,429.0,198.0,4341.0,10.0,70,India 8 | 14.0,8,454.0,220.0,4354.0,9.0,70,India 9 | 14.0,8,440.0,215.0,4312.0,8.5,70,India 10 | 14.0,8,455.0,225.0,4425.0,10.0,70,India 11 | 15.0,8,390.0,190.0,3850.0,8.5,70,India 12 | 15.0,8,383.0,170.0,3563.0,10.0,70,India 13 | 14.0,8,340.0,160.0,3609.0,8.0,70,India 14 | 15.0,8,400.0,150.0,3761.0,9.5,70,India 15 | 14.0,8,455.0,225.0,3086.0,10.0,70,India 16 | 24.0,4,113.0,95.0,2372.0,15.0,70,Germany 17 | 22.0,6,198.0,95.0,2833.0,15.5,70,India 18 | 18.0,6,199.0,97.0,2774.0,15.5,70,India 19 | 21.0,6,200.0,85.0,2587.0,16.0,70,India 20 | 27.0,4,97.0,88.0,2130.0,14.5,70,Germany 21 | 26.0,4,97.0,46.0,1835.0,20.5,70,USA 22 | 25.0,4,110.0,87.0,2672.0,17.5,70,USA 23 | 24.0,4,107.0,90.0,2430.0,14.5,70,USA 24 | 25.0,4,104.0,95.0,2375.0,17.5,70,USA 25 | 26.0,4,121.0,113.0,2234.0,12.5,70,USA 26 | 21.0,6,199.0,90.0,2648.0,15.0,70,India 27 | 10.0,8,360.0,215.0,4615.0,14.0,70,India 28 | 10.0,8,307.0,200.0,4376.0,15.0,70,India 29 | 11.0,8,318.0,210.0,4382.0,13.5,70,India 30 | 9.0,8,304.0,193.0,4732.0,18.5,70,India 31 | 27.0,4,97.0,88.0,2130.0,14.5,71,Germany 32 | 28.0,4,140.0,90.0,2264.0,15.5,71,India 33 | 25.0,4,113.0,95.0,2228.0,14.0,71,Germany 34 | 25.0,4,98.0,,2046.0,19.0,71,India 35 | 19.0,6,232.0,100.0,2634.0,13.0,71,India 36 | 16.0,6,225.0,105.0,3439.0,15.5,71,India 37 | 17.0,6,250.0,100.0,3329.0,15.5,71,India 38 | 19.0,6,250.0,88.0,3302.0,15.5,71,India 39 | 18.0,6,232.0,100.0,3288.0,15.5,71,India 40 | 14.0,8,350.0,165.0,4209.0,12.0,71,India 41 | 14.0,8,400.0,175.0,4464.0,11.5,71,India 42 | 14.0,8,351.0,153.0,4154.0,13.5,71,India 43 | 14.0,8,318.0,150.0,4096.0,13.0,71,India 44 | 12.0,8,383.0,180.0,4955.0,11.5,71,India 45 | 13.0,8,400.0,170.0,4746.0,12.0,71,India 46 | 13.0,8,400.0,175.0,5140.0,12.0,71,India 47 | 18.0,6,258.0,110.0,2962.0,13.5,71,India 48 | 22.0,4,140.0,72.0,2408.0,19.0,71,India 49 | 19.0,6,250.0,100.0,3282.0,15.0,71,India 50 | 18.0,6,250.0,88.0,3139.0,14.5,71,India 51 | 23.0,4,122.0,86.0,2220.0,14.0,71,India 52 | 28.0,4,116.0,90.0,2123.0,14.0,71,USA 53 | 30.0,4,79.0,70.0,2074.0,19.5,71,USA 54 | 30.0,4,88.0,76.0,2065.0,14.5,71,USA 55 | 31.0,4,71.0,65.0,1773.0,19.0,71,Germany 56 | 35.0,4,72.0,69.0,1613.0,18.0,71,Germany 57 | 27.0,4,97.0,60.0,1834.0,19.0,71,USA 58 | 26.0,4,91.0,70.0,1955.0,20.5,71,India 59 | 24.0,4,113.0,95.0,2278.0,15.5,72,Germany 60 | 25.0,4,97.5,80.0,2126.0,17.0,72,India 61 | 23.0,4,97.0,54.0,2254.0,23.5,72,USA 62 | 20.0,4,140.0,90.0,2408.0,19.5,72,India 63 | 21.0,4,122.0,86.0,2226.0,16.5,72,India 64 | 13.0,8,350.0,165.0,4274.0,12.0,72,India 65 | 14.0,8,400.0,175.0,4385.0,12.0,72,India 66 | 15.0,8,318.0,150.0,4135.0,13.5,72,India 67 | 14.0,8,351.0,153.0,4129.0,13.0,72,India 68 | 17.0,8,304.0,150.0,3672.0,11.5,72,India 69 | 11.0,8,429.0,208.0,4633.0,11.0,72,India 70 | 13.0,8,350.0,155.0,4502.0,13.5,72,India 71 | 12.0,8,350.0,160.0,4456.0,13.5,72,India 72 | 13.0,8,400.0,190.0,4422.0,12.5,72,India 73 | 19.0,3,70.0,97.0,2330.0,13.5,72,Germany 74 | 15.0,8,304.0,150.0,3892.0,12.5,72,India 75 | 13.0,8,307.0,130.0,4098.0,14.0,72,India 76 | 13.0,8,302.0,140.0,4294.0,16.0,72,India 77 | 14.0,8,318.0,150.0,4077.0,14.0,72,India 78 | 18.0,4,121.0,112.0,2933.0,14.5,72,USA 79 | 22.0,4,121.0,76.0,2511.0,18.0,72,USA 80 | 21.0,4,120.0,87.0,2979.0,19.5,72,USA 81 | 26.0,4,96.0,69.0,2189.0,18.0,72,USA 82 | 22.0,4,122.0,86.0,2395.0,16.0,72,India 83 | 28.0,4,97.0,92.0,2288.0,17.0,72,Germany 84 | 23.0,4,120.0,97.0,2506.0,14.5,72,Germany 85 | 28.0,4,98.0,80.0,2164.0,15.0,72,India 86 | 27.0,4,97.0,88.0,2100.0,16.5,72,Germany 87 | 13.0,8,350.0,175.0,4100.0,13.0,73,India 88 | 14.0,8,304.0,150.0,3672.0,11.5,73,India 89 | 13.0,8,350.0,145.0,3988.0,13.0,73,India 90 | 14.0,8,302.0,137.0,4042.0,14.5,73,India 91 | 15.0,8,318.0,150.0,3777.0,12.5,73,India 92 | 12.0,8,429.0,198.0,4952.0,11.5,73,India 93 | 13.0,8,400.0,150.0,4464.0,12.0,73,India 94 | 13.0,8,351.0,158.0,4363.0,13.0,73,India 95 | 14.0,8,318.0,150.0,4237.0,14.5,73,India 96 | 13.0,8,440.0,215.0,4735.0,11.0,73,India 97 | 12.0,8,455.0,225.0,4951.0,11.0,73,India 98 | 13.0,8,360.0,175.0,3821.0,11.0,73,India 99 | 18.0,6,225.0,105.0,3121.0,16.5,73,India 100 | 16.0,6,250.0,100.0,3278.0,18.0,73,India 101 | 18.0,6,232.0,100.0,2945.0,16.0,73,India 102 | 18.0,6,250.0,88.0,3021.0,16.5,73,India 103 | 23.0,6,198.0,95.0,2904.0,16.0,73,India 104 | 26.0,4,97.0,46.0,1950.0,21.0,73,USA 105 | 11.0,8,400.0,150.0,4997.0,14.0,73,India 106 | 12.0,8,400.0,167.0,4906.0,12.5,73,India 107 | 13.0,8,360.0,170.0,4654.0,13.0,73,India 108 | 12.0,8,350.0,180.0,4499.0,12.5,73,India 109 | 18.0,6,232.0,100.0,2789.0,15.0,73,India 110 | 20.0,4,97.0,88.0,2279.0,19.0,73,Germany 111 | 21.0,4,140.0,72.0,2401.0,19.5,73,India 112 | 22.0,4,108.0,94.0,2379.0,16.5,73,Germany 113 | 18.0,3,70.0,90.0,2124.0,13.5,73,Germany 114 | 19.0,4,122.0,85.0,2310.0,18.5,73,India 115 | 21.0,6,155.0,107.0,2472.0,14.0,73,India 116 | 26.0,4,98.0,90.0,2265.0,15.5,73,USA 117 | 15.0,8,350.0,145.0,4082.0,13.0,73,India 118 | 16.0,8,400.0,230.0,4278.0,9.5,73,India 119 | 29.0,4,68.0,49.0,1867.0,19.5,73,USA 120 | 24.0,4,116.0,75.0,2158.0,15.5,73,USA 121 | 20.0,4,114.0,91.0,2582.0,14.0,73,USA 122 | 19.0,4,121.0,112.0,2868.0,15.5,73,USA 123 | 15.0,8,318.0,150.0,3399.0,11.0,73,India 124 | 24.0,4,121.0,110.0,2660.0,14.0,73,USA 125 | 20.0,6,156.0,122.0,2807.0,13.5,73,Germany 126 | 11.0,8,350.0,180.0,3664.0,11.0,73,India 127 | 20.0,6,198.0,95.0,3102.0,16.5,74,India 128 | 21.0,6,200.0,,2875.0,17.0,74,India 129 | 19.0,6,232.0,100.0,2901.0,16.0,74,India 130 | 15.0,6,250.0,100.0,3336.0,17.0,74,India 131 | 31.0,4,79.0,67.0,1950.0,19.0,74,Germany 132 | 26.0,4,122.0,80.0,2451.0,16.5,74,India 133 | 32.0,4,71.0,65.0,1836.0,21.0,74,Germany 134 | 25.0,4,140.0,75.0,2542.0,17.0,74,India 135 | 16.0,6,250.0,100.0,3781.0,17.0,74,India 136 | 16.0,6,258.0,110.0,3632.0,18.0,74,India 137 | 18.0,6,225.0,105.0,3613.0,16.5,74,India 138 | 16.0,8,302.0,140.0,4141.0,14.0,74,India 139 | 13.0,8,350.0,150.0,4699.0,14.5,74,India 140 | 14.0,8,318.0,150.0,4457.0,13.5,74,India 141 | 14.0,8,302.0,140.0,4638.0,16.0,74,India 142 | 14.0,8,304.0,150.0,4257.0,15.5,74,India 143 | 29.0,4,98.0,83.0,2219.0,16.5,74,USA 144 | 26.0,4,79.0,67.0,1963.0,15.5,74,USA 145 | 26.0,4,97.0,78.0,2300.0,14.5,74,USA 146 | 31.0,4,76.0,52.0,1649.0,16.5,74,Germany 147 | 32.0,4,83.0,61.0,2003.0,19.0,74,Germany 148 | 28.0,4,90.0,75.0,2125.0,14.5,74,India 149 | 24.0,4,90.0,75.0,2108.0,15.5,74,USA 150 | 26.0,4,116.0,75.0,2246.0,14.0,74,USA 151 | 24.0,4,120.0,97.0,2489.0,15.0,74,Germany 152 | 26.0,4,108.0,93.0,2391.0,15.5,74,Germany 153 | 31.0,4,79.0,67.0,2000.0,16.0,74,USA 154 | 19.0,6,225.0,95.0,3264.0,16.0,75,India 155 | 18.0,6,250.0,105.0,3459.0,16.0,75,India 156 | 15.0,6,250.0,72.0,3432.0,21.0,75,India 157 | 15.0,6,250.0,72.0,3158.0,19.5,75,India 158 | 16.0,8,400.0,170.0,4668.0,11.5,75,India 159 | 15.0,8,350.0,145.0,4440.0,14.0,75,India 160 | 16.0,8,318.0,150.0,4498.0,14.5,75,India 161 | 14.0,8,351.0,148.0,4657.0,13.5,75,India 162 | 17.0,6,231.0,110.0,3907.0,21.0,75,India 163 | 16.0,6,250.0,105.0,3897.0,18.5,75,India 164 | 15.0,6,258.0,110.0,3730.0,19.0,75,India 165 | 18.0,6,225.0,95.0,3785.0,19.0,75,India 166 | 21.0,6,231.0,110.0,3039.0,15.0,75,India 167 | 20.0,8,262.0,110.0,3221.0,13.5,75,India 168 | 13.0,8,302.0,129.0,3169.0,12.0,75,India 169 | 29.0,4,97.0,75.0,2171.0,16.0,75,Germany 170 | 23.0,4,140.0,83.0,2639.0,17.0,75,India 171 | 20.0,6,232.0,100.0,2914.0,16.0,75,India 172 | 23.0,4,140.0,78.0,2592.0,18.5,75,India 173 | 24.0,4,134.0,96.0,2702.0,13.5,75,Germany 174 | 25.0,4,90.0,71.0,2223.0,16.5,75,USA 175 | 24.0,4,119.0,97.0,2545.0,17.0,75,Germany 176 | 18.0,6,171.0,97.0,2984.0,14.5,75,India 177 | 29.0,4,90.0,70.0,1937.0,14.0,75,USA 178 | 19.0,6,232.0,90.0,3211.0,17.0,75,India 179 | 23.0,4,115.0,95.0,2694.0,15.0,75,USA 180 | 23.0,4,120.0,88.0,2957.0,17.0,75,USA 181 | 22.0,4,121.0,98.0,2945.0,14.5,75,USA 182 | 25.0,4,121.0,115.0,2671.0,13.5,75,USA 183 | 33.0,4,91.0,53.0,1795.0,17.5,75,Germany 184 | 28.0,4,107.0,86.0,2464.0,15.5,76,USA 185 | 25.0,4,116.0,81.0,2220.0,16.9,76,USA 186 | 25.0,4,140.0,92.0,2572.0,14.9,76,India 187 | 26.0,4,98.0,79.0,2255.0,17.7,76,India 188 | 27.0,4,101.0,83.0,2202.0,15.3,76,USA 189 | 17.5,8,305.0,140.0,4215.0,13.0,76,India 190 | 16.0,8,318.0,150.0,4190.0,13.0,76,India 191 | 15.5,8,304.0,120.0,3962.0,13.9,76,India 192 | 14.5,8,351.0,152.0,4215.0,12.8,76,India 193 | 22.0,6,225.0,100.0,3233.0,15.4,76,India 194 | 22.0,6,250.0,105.0,3353.0,14.5,76,India 195 | 24.0,6,200.0,81.0,3012.0,17.6,76,India 196 | 22.5,6,232.0,90.0,3085.0,17.6,76,India 197 | 29.0,4,85.0,52.0,2035.0,22.2,76,India 198 | 24.5,4,98.0,60.0,2164.0,22.1,76,India 199 | 29.0,4,90.0,70.0,1937.0,14.2,76,USA 200 | 33.0,4,91.0,53.0,1795.0,17.4,76,Germany 201 | 20.0,6,225.0,100.0,3651.0,17.7,76,India 202 | 18.0,6,250.0,78.0,3574.0,21.0,76,India 203 | 18.5,6,250.0,110.0,3645.0,16.2,76,India 204 | 17.5,6,258.0,95.0,3193.0,17.8,76,India 205 | 29.5,4,97.0,71.0,1825.0,12.2,76,USA 206 | 32.0,4,85.0,70.0,1990.0,17.0,76,Germany 207 | 28.0,4,97.0,75.0,2155.0,16.4,76,Germany 208 | 26.5,4,140.0,72.0,2565.0,13.6,76,India 209 | 20.0,4,130.0,102.0,3150.0,15.7,76,USA 210 | 13.0,8,318.0,150.0,3940.0,13.2,76,India 211 | 19.0,4,120.0,88.0,3270.0,21.9,76,USA 212 | 19.0,6,156.0,108.0,2930.0,15.5,76,Germany 213 | 16.5,6,168.0,120.0,3820.0,16.7,76,USA 214 | 16.5,8,350.0,180.0,4380.0,12.1,76,India 215 | 13.0,8,350.0,145.0,4055.0,12.0,76,India 216 | 13.0,8,302.0,130.0,3870.0,15.0,76,India 217 | 13.0,8,318.0,150.0,3755.0,14.0,76,India 218 | 31.5,4,98.0,68.0,2045.0,18.5,77,Germany 219 | 30.0,4,111.0,80.0,2155.0,14.8,77,India 220 | 36.0,4,79.0,58.0,1825.0,18.6,77,USA 221 | 25.5,4,122.0,96.0,2300.0,15.5,77,India 222 | 33.5,4,85.0,70.0,1945.0,16.8,77,Germany 223 | 17.5,8,305.0,145.0,3880.0,12.5,77,India 224 | 17.0,8,260.0,110.0,4060.0,19.0,77,India 225 | 15.5,8,318.0,145.0,4140.0,13.7,77,India 226 | 15.0,8,302.0,130.0,4295.0,14.9,77,India 227 | 17.5,6,250.0,110.0,3520.0,16.4,77,India 228 | 20.5,6,231.0,105.0,3425.0,16.9,77,India 229 | 19.0,6,225.0,100.0,3630.0,17.7,77,India 230 | 18.5,6,250.0,98.0,3525.0,19.0,77,India 231 | 16.0,8,400.0,180.0,4220.0,11.1,77,India 232 | 15.5,8,350.0,170.0,4165.0,11.4,77,India 233 | 15.5,8,400.0,190.0,4325.0,12.2,77,India 234 | 16.0,8,351.0,149.0,4335.0,14.5,77,India 235 | 29.0,4,97.0,78.0,1940.0,14.5,77,USA 236 | 24.5,4,151.0,88.0,2740.0,16.0,77,India 237 | 26.0,4,97.0,75.0,2265.0,18.2,77,Germany 238 | 25.5,4,140.0,89.0,2755.0,15.8,77,India 239 | 30.5,4,98.0,63.0,2051.0,17.0,77,India 240 | 33.5,4,98.0,83.0,2075.0,15.9,77,India 241 | 30.0,4,97.0,67.0,1985.0,16.4,77,Germany 242 | 30.5,4,97.0,78.0,2190.0,14.1,77,USA 243 | 22.0,6,146.0,97.0,2815.0,14.5,77,Germany 244 | 21.5,4,121.0,110.0,2600.0,12.8,77,USA 245 | 21.5,3,80.0,110.0,2720.0,13.5,77,Germany 246 | 43.1,4,90.0,48.0,1985.0,21.5,78,USA 247 | 36.1,4,98.0,66.0,1800.0,14.4,78,India 248 | 32.8,4,78.0,52.0,1985.0,19.4,78,Germany 249 | 39.4,4,85.0,70.0,2070.0,18.6,78,Germany 250 | 36.1,4,91.0,60.0,1800.0,16.4,78,Germany 251 | 19.9,8,260.0,110.0,3365.0,15.5,78,India 252 | 19.4,8,318.0,140.0,3735.0,13.2,78,India 253 | 20.2,8,302.0,139.0,3570.0,12.8,78,India 254 | 19.2,6,231.0,105.0,3535.0,19.2,78,India 255 | 20.5,6,200.0,95.0,3155.0,18.2,78,India 256 | 20.2,6,200.0,85.0,2965.0,15.8,78,India 257 | 25.1,4,140.0,88.0,2720.0,15.4,78,India 258 | 20.5,6,225.0,100.0,3430.0,17.2,78,India 259 | 19.4,6,232.0,90.0,3210.0,17.2,78,India 260 | 20.6,6,231.0,105.0,3380.0,15.8,78,India 261 | 20.8,6,200.0,85.0,3070.0,16.7,78,India 262 | 18.6,6,225.0,110.0,3620.0,18.7,78,India 263 | 18.1,6,258.0,120.0,3410.0,15.1,78,India 264 | 19.2,8,305.0,145.0,3425.0,13.2,78,India 265 | 17.7,6,231.0,165.0,3445.0,13.4,78,India 266 | 18.1,8,302.0,139.0,3205.0,11.2,78,India 267 | 17.5,8,318.0,140.0,4080.0,13.7,78,India 268 | 30.0,4,98.0,68.0,2155.0,16.5,78,India 269 | 27.5,4,134.0,95.0,2560.0,14.2,78,Germany 270 | 27.2,4,119.0,97.0,2300.0,14.7,78,Germany 271 | 30.9,4,105.0,75.0,2230.0,14.5,78,India 272 | 21.1,4,134.0,95.0,2515.0,14.8,78,Germany 273 | 23.2,4,156.0,105.0,2745.0,16.7,78,India 274 | 23.8,4,151.0,85.0,2855.0,17.6,78,India 275 | 23.9,4,119.0,97.0,2405.0,14.9,78,Germany 276 | 20.3,5,131.0,103.0,2830.0,15.9,78,USA 277 | 17.0,6,163.0,125.0,3140.0,13.6,78,USA 278 | 21.6,4,121.0,115.0,2795.0,15.7,78,USA 279 | 16.2,6,163.0,133.0,3410.0,15.8,78,USA 280 | 31.5,4,89.0,71.0,1990.0,14.9,78,USA 281 | 29.5,4,98.0,68.0,2135.0,16.6,78,Germany 282 | 21.5,6,231.0,115.0,3245.0,15.4,79,India 283 | 19.8,6,200.0,85.0,2990.0,18.2,79,India 284 | 22.3,4,140.0,88.0,2890.0,17.3,79,India 285 | 20.2,6,232.0,90.0,3265.0,18.2,79,India 286 | 20.6,6,225.0,110.0,3360.0,16.6,79,India 287 | 17.0,8,305.0,130.0,3840.0,15.4,79,India 288 | 17.6,8,302.0,129.0,3725.0,13.4,79,India 289 | 16.5,8,351.0,138.0,3955.0,13.2,79,India 290 | 18.2,8,318.0,135.0,3830.0,15.2,79,India 291 | 16.9,8,350.0,155.0,4360.0,14.9,79,India 292 | 15.5,8,351.0,142.0,4054.0,14.3,79,India 293 | 19.2,8,267.0,125.0,3605.0,15.0,79,India 294 | 18.5,8,360.0,150.0,3940.0,13.0,79,India 295 | 31.9,4,89.0,71.0,1925.0,14.0,79,USA 296 | 34.1,4,86.0,65.0,1975.0,15.2,79,Germany 297 | 35.7,4,98.0,80.0,1915.0,14.4,79,India 298 | 27.4,4,121.0,80.0,2670.0,15.0,79,India 299 | 25.4,5,183.0,77.0,3530.0,20.1,79,USA 300 | 23.0,8,350.0,125.0,3900.0,17.4,79,India 301 | 27.2,4,141.0,71.0,3190.0,24.8,79,USA 302 | 23.9,8,260.0,90.0,3420.0,22.2,79,India 303 | 34.2,4,105.0,70.0,2200.0,13.2,79,India 304 | 34.5,4,105.0,70.0,2150.0,14.9,79,India 305 | 31.8,4,85.0,65.0,2020.0,19.2,79,Germany 306 | 37.3,4,91.0,69.0,2130.0,14.7,79,USA 307 | 28.4,4,151.0,90.0,2670.0,16.0,79,India 308 | 28.8,6,173.0,115.0,2595.0,11.3,79,India 309 | 26.8,6,173.0,115.0,2700.0,12.9,79,India 310 | 33.5,4,151.0,90.0,2556.0,13.2,79,India 311 | 41.5,4,98.0,76.0,2144.0,14.7,80,USA 312 | 38.1,4,89.0,60.0,1968.0,18.8,80,Germany 313 | 32.1,4,98.0,70.0,2120.0,15.5,80,India 314 | 37.2,4,86.0,65.0,2019.0,16.4,80,Germany 315 | 28.0,4,151.0,90.0,2678.0,16.5,80,India 316 | 26.4,4,140.0,88.0,2870.0,18.1,80,India 317 | 24.3,4,151.0,90.0,3003.0,20.1,80,India 318 | 19.1,6,225.0,90.0,3381.0,18.7,80,India 319 | 34.3,4,97.0,78.0,2188.0,15.8,80,USA 320 | 29.8,4,134.0,90.0,2711.0,15.5,80,Germany 321 | 31.3,4,120.0,75.0,2542.0,17.5,80,Germany 322 | 37.0,4,119.0,92.0,2434.0,15.0,80,Germany 323 | 32.2,4,108.0,75.0,2265.0,15.2,80,Germany 324 | 46.6,4,86.0,65.0,2110.0,17.9,80,Germany 325 | 27.9,4,156.0,105.0,2800.0,14.4,80,India 326 | 40.8,4,85.0,65.0,2110.0,19.2,80,Germany 327 | 44.3,4,90.0,48.0,2085.0,21.7,80,USA 328 | 43.4,4,90.0,48.0,2335.0,23.7,80,USA 329 | 36.4,5,121.0,67.0,2950.0,19.9,80,USA 330 | 30.0,4,146.0,67.0,3250.0,21.8,80,USA 331 | 44.6,4,91.0,67.0,1850.0,13.8,80,Germany 332 | 40.9,4,85.0,,1835.0,17.3,80,USA 333 | 33.8,4,97.0,67.0,2145.0,18.0,80,Germany 334 | 29.8,4,89.0,62.0,1845.0,15.3,80,USA 335 | 32.7,6,168.0,132.0,2910.0,11.4,80,Germany 336 | 23.7,3,70.0,100.0,2420.0,12.5,80,Germany 337 | 35.0,4,122.0,88.0,2500.0,15.1,80,USA 338 | 23.6,4,140.0,,2905.0,14.3,80,India 339 | 32.4,4,107.0,72.0,2290.0,17.0,80,Germany 340 | 27.2,4,135.0,84.0,2490.0,15.7,81,India 341 | 26.6,4,151.0,84.0,2635.0,16.4,81,India 342 | 25.8,4,156.0,92.0,2620.0,14.4,81,India 343 | 23.5,6,173.0,110.0,2725.0,12.6,81,India 344 | 30.0,4,135.0,84.0,2385.0,12.9,81,India 345 | 39.1,4,79.0,58.0,1755.0,16.9,81,Germany 346 | 39.0,4,86.0,64.0,1875.0,16.4,81,India 347 | 35.1,4,81.0,60.0,1760.0,16.1,81,Germany 348 | 32.3,4,97.0,67.0,2065.0,17.8,81,Germany 349 | 37.0,4,85.0,65.0,1975.0,19.4,81,Germany 350 | 37.7,4,89.0,62.0,2050.0,17.3,81,Germany 351 | 34.1,4,91.0,68.0,1985.0,16.0,81,Germany 352 | 34.7,4,105.0,63.0,2215.0,14.9,81,India 353 | 34.4,4,98.0,65.0,2045.0,16.2,81,India 354 | 29.9,4,98.0,65.0,2380.0,20.7,81,India 355 | 33.0,4,105.0,74.0,2190.0,14.2,81,USA 356 | 34.5,4,100.0,,2320.0,15.8,81,USA 357 | 33.7,4,107.0,75.0,2210.0,14.4,81,Germany 358 | 32.4,4,108.0,75.0,2350.0,16.8,81,Germany 359 | 32.9,4,119.0,100.0,2615.0,14.8,81,Germany 360 | 31.6,4,120.0,74.0,2635.0,18.3,81,Germany 361 | 28.1,4,141.0,80.0,3230.0,20.4,81,USA 362 | 30.7,6,145.0,76.0,3160.0,19.6,81,USA 363 | 25.4,6,168.0,116.0,2900.0,12.6,81,Germany 364 | 24.2,6,146.0,120.0,2930.0,13.8,81,Germany 365 | 22.4,6,231.0,110.0,3415.0,15.8,81,India 366 | 26.6,8,350.0,105.0,3725.0,19.0,81,India 367 | 20.2,6,200.0,88.0,3060.0,17.1,81,India 368 | 17.6,6,225.0,85.0,3465.0,16.6,81,India 369 | 28.0,4,112.0,88.0,2605.0,19.6,82,India 370 | 27.0,4,112.0,88.0,2640.0,18.6,82,India 371 | 34.0,4,112.0,88.0,2395.0,18.0,82,India 372 | 31.0,4,112.0,85.0,2575.0,16.2,82,India 373 | 29.0,4,135.0,84.0,2525.0,16.0,82,India 374 | 27.0,4,151.0,90.0,2735.0,18.0,82,India 375 | 24.0,4,140.0,92.0,2865.0,16.4,82,India 376 | 23.0,4,151.0,,3035.0,20.5,82,India 377 | 36.0,4,105.0,74.0,1980.0,15.3,82,USA 378 | 37.0,4,91.0,68.0,2025.0,18.2,82,Germany 379 | 31.0,4,91.0,68.0,1970.0,17.6,82,Germany 380 | 38.0,4,105.0,63.0,2125.0,14.7,82,India 381 | 36.0,4,98.0,70.0,2125.0,17.3,82,India 382 | 36.0,4,120.0,88.0,2160.0,14.5,82,Germany 383 | 36.0,4,107.0,75.0,2205.0,14.5,82,Germany 384 | 34.0,4,108.0,70.0,2245.0,16.9,82,Germany 385 | 38.0,4,91.0,67.0,1965.0,15.0,82,Germany 386 | 32.0,4,91.0,67.0,1965.0,15.7,82,Germany 387 | 38.0,4,91.0,67.0,1995.0,16.2,82,Germany 388 | 25.0,6,181.0,110.0,2945.0,16.4,82,India 389 | 38.0,6,262.0,85.0,3015.0,17.0,82,India 390 | 26.0,4,156.0,92.0,2585.0,14.5,82,India 391 | 22.0,6,232.0,112.0,2835.0,14.7,82,India 392 | 32.0,4,144.0,96.0,2665.0,13.9,82,Germany 393 | 36.0,4,135.0,84.0,2370.0,13.0,82,India 394 | 27.0,4,151.0,90.0,2950.0,17.3,82,India 395 | 27.0,4,140.0,86.0,2790.0,15.6,82,India 396 | 44.0,4,97.0,52.0,2130.0,24.6,82,USA 397 | 32.0,4,135.0,84.0,2295.0,11.6,82,India 398 | 28.0,4,120.0,79.0,2625.0,18.6,82,India 399 | 31.0,4,119.0,82.0,2720.0,19.4,82,India 400 | --------------------------------------------------------------------------------