├── .devcontainer
    └── devcontainer.json
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── main.yml
├── .gitignore
├── .vscode
    └── settings.json
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── data
    ├── Superstore-Sales.csv
    ├── iris.csv
    ├── iris.data.csv
    ├── mtcars.csv
    └── test
├── notebooks
    ├── 02_03b.ipynb
    ├── 02_03e.ipynb
    ├── 02_04b.ipynb
    ├── 02_04e.ipynb
    ├── 02_05b.ipynb
    ├── 02_05e.ipynb
    ├── 02_06b.ipynb
    ├── 02_06e.ipynb
    ├── 02_07b.ipynb
    ├── 02_07e.ipynb
    ├── 04_01b.ipynb
    ├── 04_01e.ipynb
    ├── 04_02b.ipynb
    ├── 04_02e.ipynb
    ├── 04_03b.ipynb
    ├── 04_03e.ipynb
    ├── 04_04b.ipynb
    ├── 04_04e.ipynb
    ├── 04_05b.ipynb
    ├── 04_05e.ipynb
    ├── 04_06b.ipynb
    ├── 04_06e.ipynb
    ├── 04_07b.ipynb
    ├── 04_07e.ipynb
    ├── 05_01b.ipynb
    ├── 05_01e.ipynb
    ├── 05_02b.ipynb
    ├── 05_02e.ipynb
    ├── 05_03b.ipynb
    ├── 05_03e.ipynb
    ├── 05_04b.ipynb
    ├── 05_04e.ipynb
    ├── 05_05b.ipynb
    ├── 05_05e.ipynb
    ├── 05_06b.ipynb
    ├── 05_06e.ipynb
    ├── 05_07b.ipynb
    ├── 05_07e.ipynb
    ├── 06_01b.ipynb
    ├── 06_01e.ipynb
    ├── 06_02b.ipynb
    ├── 06_02e.ipynb
    ├── 06_03b.ipynb
    ├── 06_03e.ipynb
    ├── 07_02b.ipynb
    ├── 07_02e.ipynb
    ├── 07_03b.ipynb
    ├── 07_03e.ipynb
    ├── 07_04b.ipynb
    ├── 07_04e.ipynb
    ├── 07_05b.ipynb
    ├── 07_05e.ipynb
    ├── 07_06b.ipynb
    ├── 07_06e.ipynb
    ├── 07_07b.ipynb
    ├── 07_07e.ipynb
    ├── csv_file
    ├── parsed_data.txt
    ├── pie_chart.png
    └── test
├── requirements.txt
├── streamlit
    ├── 08_02b.py
    ├── 08_02e.py
    ├── 08_03b.py
    ├── 08_03e.py
    ├── 08_04b.py
    ├── 08_04e.py
    ├── 08_05b.py
    ├── 08_05e.py
    ├── 08_06b.py
    └── 08_06e.py
└── test


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extensions": [
 3 |     "GitHub.github-vscode-theme",
 4 |     "ms-toolsai.jupyter",
 5 |     "ms-python.python"
 6 |     // Additional Extensions Here
 7 |   ],
 8 |   "onCreateCommand" : "[ -f requirements.txt ] && pip install -r requirements.txt; echo PS1='\"$ \"' >> ~/.bashrc", //Set Terminal Prompt to $
 9 | }
10 | 
11 | // DevContainer Reference: https://code.visualstudio.com/docs/remote/devcontainerjson-reference
12 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Codeowners for these exercise files:
2 | # * (asterisk) denotes "all files and folders"
3 | # Example: * @producer @instructor
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | BEFORE POSTING YOUR ISSUE:
 3 | - These comments won't show up when you submit the issue.
 4 | - Please use the sections below to provide information about the issue.
 5 | - Be specific: Add as much detail as possible.
 6 | -->
 7 | 
 8 | ## Issue Overview
 9 | <!-- A brief overview of the issue --->
10 | 
11 | ## Describe your environment
12 | <!-- Provide details about your environment: what editor, browser, and other software you are using and any other specifics to your setup -->
13 | 
14 | ## Steps to Reproduce
15 | <!-- Provide an unambiguous set of steps to reproduce this bug. Include code to reproduce, if relevant. Include a live link if available. -->
16 | 1.
17 | 2.
18 | 3.
19 | 4.
20 | 
21 | ## Expected Behavior
22 | <!-- What behavior did you expect? -->
23 | 
24 | ## Current Behavior
25 | <!-- What happened instead of the expected behavior? Describe the difference. -->
26 | 
27 | ## Possible Solution
28 | <!-- Optional: Do you have a fix or a suggestion on how to fix the issue? -->
29 | 
30 | ## Screenshots / Video
31 | <!-- Optional: Add any screenshots or video of the issue if available. -->
32 | 
33 | ## Related Issues
34 | <!-- List related issues -->
35 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | <!-- This repository *does not* accept pull requests (PRs). All pull requests will be closed. See CONTRIBUTING.md for further details. -->
2 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Copy To Branches
 2 | on:
 3 |   workflow_dispatch:
 4 | jobs:
 5 |   copy-to-branches:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: actions/checkout@v2
 9 |         with:
10 |           fetch-depth: 0
11 |       - name: Copy To Branches Action
12 |         uses: planetoftheweb/copy-to-branches@v1.2
13 |         env:
14 |           key: main
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules
3 | .tmp
4 | npm-debug.log
5 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "editor.bracketPairColorization.enabled": true,
 3 |   "editor.cursorBlinking": "solid",
 4 |   "editor.fontFamily": "ui-monospace, Menlo, Monaco, 'Cascadia Mono', 'Segoe UI Mono', 'Roboto Mono', 'Oxygen Mono', 'Ubuntu Monospace', 'Source Code Pro', 'Fira Mono', 'Droid Sans Mono', 'Courier New', monospace",
 5 |   "editor.fontLigatures": false,
 6 |   "editor.fontSize": 22,
 7 |   "editor.formatOnPaste": true,
 8 |   "editor.formatOnSave": true,
 9 |   "editor.lineNumbers": "on",
10 |   "editor.matchBrackets": "always",
11 |   "editor.minimap.enabled": false,
12 |   "editor.smoothScrolling": true,
13 |   "editor.tabSize": 2,
14 |   "editor.useTabStops": true,
15 |   "emmet.triggerExpansionOnTab": true,
16 |   "explorer.openEditors.visible": 0,
17 |   "files.autoSave": "afterDelay",
18 |   "screencastMode.onlyKeyboardShortcuts": true,
19 |   "terminal.integrated.fontSize": 18,
20 |   "workbench.colorTheme": "Visual Studio Dark",
21 |   "workbench.fontAliasing": "antialiased",
22 |   "workbench.statusBar.visible": true
23 | }
24 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | 
2 | Contribution Agreement
3 | ======================
4 | 
5 | This repository does not accept pull requests (PRs). All pull requests will be closed.
6 | 
7 | However, if any contributions (through pull requests, issues, feedback or otherwise) are provided, as a contributor, you represent that the code you submit is your original work or that of your employer (in which case you represent you have the right to bind your employer). By submitting code (or otherwise providing feedback), you (and, if applicable, your employer) are licensing the submitted code (and/or feedback) to LinkedIn and the open source community subject to the BSD 2-Clause license.
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | LinkedIn Learning Exercise Files License Agreement
  2 | ==================================================
  3 | 
  4 | This License Agreement (the "Agreement") is a binding legal agreement
  5 | between you (as an individual or entity, as applicable) and LinkedIn
  6 | Corporation (“LinkedIn”). By downloading or using the LinkedIn Learning
  7 | exercise files in this repository (“Licensed Materials”), you agree to
  8 | be bound by the terms of this Agreement. If you do not agree to these
  9 | terms, do not download or use the Licensed Materials. 
 10 | 
 11 | 1. License.
 12 | - a. Subject to the terms of this Agreement, LinkedIn hereby grants LinkedIn
 13 | members during their LinkedIn Learning subscription a non-exclusive,
 14 | non-transferable copyright license, for internal use only, to 1) make a
 15 | reasonable number of copies of the Licensed Materials, and 2) make
 16 | derivative works of the Licensed Materials for the sole purpose of
 17 | practicing skills taught in LinkedIn Learning courses.
 18 | - b. Distribution. Unless otherwise noted in the Licensed Materials, subject
 19 | to the terms of this Agreement, LinkedIn hereby grants LinkedIn members
 20 | with a LinkedIn Learning subscription a non-exclusive, non-transferable
 21 | copyright license to distribute the Licensed Materials, except the
 22 | Licensed Materials may not be included in any product or service (or
 23 | otherwise used) to instruct or educate others.
 24 | 
 25 | 2. Restrictions and Intellectual Property. 
 26 | - a. You may not to use, modify, copy, make derivative works of, publish,
 27 | distribute, rent, lease, sell, sublicense, assign or otherwise transfer the
 28 | Licensed Materials, except as expressly set forth above in Section 1. 
 29 | - b. Linkedin (and its licensors) retains its intellectual property rights
 30 | in the Licensed Materials. Except as expressly set forth in Section 1,
 31 | LinkedIn grants no licenses.
 32 | - c. You indemnify LinkedIn and its licensors and affiliates for i) any
 33 | alleged infringement or misappropriation of any intellectual property rights
 34 | of any third party based on modifications you make to the Licensed Materials,
 35 | ii) any claims arising from your use or distribution of all or part of the
 36 | Licensed Materials and iii) a breach of this Agreement. You will defend, hold
 37 | harmless, and indemnify LinkedIn and its affiliates (and our and their
 38 | respective employees, shareholders, and directors) from any claim or action
 39 | brought by a third party, including all damages, liabilities, costs and
 40 | expenses, including reasonable attorneys’ fees, to the extent resulting from,
 41 | alleged to have resulted from, or in connection with: (a) your breach of your
 42 | obligations herein; or (b) your use or distribution of any Licensed Materials.
 43 | 
 44 | 3. Open source. This code may include open source software, which may be
 45 | subject to other license terms as provided in the files. 
 46 |  
 47 | 4. Warranty Disclaimer. LINKEDIN PROVIDES THE LICENSED MATERIALS ON AN “AS IS”
 48 | AND “AS AVAILABLE” BASIS. LINKEDIN MAKES NO REPRESENTATION OR WARRANTY,
 49 | WHETHER EXPRESS OR IMPLIED, ABOUT THE LICENSED MATERIALS, INCLUDING ANY
 50 | REPRESENTATION THAT THE LICENSED MATERIALS WILL BE FREE OF ERRORS, BUGS OR
 51 | INTERRUPTIONS, OR THAT THE LICENSED MATERIALS ARE ACCURATE, COMPLETE OR
 52 | OTHERWISE VALID. TO THE FULLEST EXTENT PERMITTED BY LAW, LINKEDIN AND ITS
 53 | AFFILIATES DISCLAIM ANY IMPLIED OR STATUTORY WARRANTY OR CONDITION, INCLUDING
 54 | ANY IMPLIED WARRANTY OR CONDITION OF MERCHANTABILITY OR FITNESS FOR A
 55 | PARTICULAR PURPOSE, AVAILABILITY, SECURITY, TITLE AND/OR NON-INFRINGEMENT.
 56 | YOUR USE OF THE LICENSED MATERIALS IS AT YOUR OWN DISCRETION AND RISK, AND
 57 | YOU WILL BE SOLELY RESPONSIBLE FOR ANY DAMAGE THAT RESULTS FROM USE OF THE
 58 | LICENSED MATERIALS TO YOUR COMPUTER SYSTEM OR LOSS OF DATA.  NO ADVICE OR
 59 | INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM US OR THROUGH OR
 60 | FROM THE LICENSED MATERIALS WILL CREATE ANY WARRANTY OR CONDITION NOT
 61 | EXPRESSLY STATED IN THESE TERMS.
 62 | 
 63 | 5. Limitation of Liability. LINKEDIN SHALL NOT BE LIABLE FOR ANY INDIRECT,
 64 | INCIDENTAL, SPECIAL, PUNITIVE, CONSEQUENTIAL OR EXEMPLARY DAMAGES, INCLUDING
 65 | BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE, DATA OR OTHER
 66 | INTANGIBLE LOSSES . IN NO EVENT WILL LINKEDIN'S AGGREGATE LIABILITY TO YOU
 67 | EXCEED $100. THIS LIMITATION OF LIABILITY SHALL:
 68 | - i. APPLY REGARDLESS OF WHETHER (A) YOU BASE YOUR CLAIM ON CONTRACT, TORT,
 69 | STATUTE, OR ANY OTHER LEGAL THEORY, (B) WE KNEW OR SHOULD HAVE KNOWN ABOUT
 70 | THE POSSIBILITY OF SUCH DAMAGES, OR (C) THE LIMITED REMEDIES PROVIDED IN THIS
 71 | SECTION FAIL OF THEIR ESSENTIAL PURPOSE; AND
 72 | - ii. NOT APPLY TO ANY DAMAGE THAT LINKEDIN MAY CAUSE YOU INTENTIONALLY OR
 73 | KNOWINGLY IN VIOLATION OF THESE TERMS OR APPLICABLE LAW, OR AS OTHERWISE
 74 | MANDATED BY APPLICABLE LAW THAT CANNOT BE DISCLAIMED IN THESE TERMS.
 75 | 
 76 | 6. Termination. This Agreement automatically terminates upon your breach of
 77 | this Agreement or termination of your LinkedIn Learning subscription. On
 78 | termination, all licenses granted under this Agreement will terminate
 79 | immediately and you will delete the Licensed Materials. Sections 2-7 of this
 80 | Agreement survive any termination of this Agreement. LinkedIn may discontinue
 81 | the availability of some or all of the Licensed Materials at any time for any
 82 | reason.
 83 | 
 84 | 7. Miscellaneous. This Agreement will be governed by and construed in
 85 | accordance with the laws of the State of California without regard to conflict
 86 | of laws principles. The exclusive forum for any disputes arising out of or
 87 | relating to this Agreement shall be an appropriate federal or state court
 88 | sitting in the County of Santa Clara, State of California. If LinkedIn does
 89 | not act to enforce a breach of this Agreement, that does not mean that
 90 | LinkedIn has waived its right to enforce this Agreement. The Agreement does
 91 | not create a partnership, agency relationship, or joint venture between the
 92 | parties.  Neither party has the power or authority to bind the other or to
 93 | create any obligation or responsibility on behalf of the other. You may not,
 94 | without LinkedIn’s prior written consent, assign or delegate any rights or
 95 | obligations under these terms, including in connection with a change of
 96 | control. Any purported assignment and delegation shall be ineffective. The
 97 | Agreement shall bind and inure to the benefit of the parties, their respective
 98 | successors and permitted assigns. If any provision of the Agreement is
 99 | unenforceable, that provision will be modified to render it enforceable to the
100 | extent possible to give effect to the parties’ intentions and the remaining
101 | provisions will not be affected. This Agreement is the only agreement between
102 | you and LinkedIn regarding the Licensed Materials, and supersedes all prior
103 | agreements relating to the Licensed Materials.  
104 | 
105 | Last Updated: March 2019
106 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | Copyright 2024 LinkedIn Corporation
 2 | All Rights Reserved.
 3 | 
 4 | Licensed under the LinkedIn Learning Exercise File License (the "License").
 5 | See LICENSE in the project root for license information.
 6 | 
 7 | Please note, this project may automatically load third party code from external 
 8 | repositories (for example, NPM modules, Composer packages, or other dependencies). 
 9 | If so, such third party code may be subject to other license terms than as set 
10 | forth above. In addition, such third party code may also depend on and load 
11 | multiple tiers of dependencies. Please review the applicable licenses of the 
12 | additional dependencies.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python for Data Science and Machine Learning Essential Training
 2 | This is the repository for the LinkedIn Learning course Python for Data Science and Machine Learning Essential Training. The full course is available from [LinkedIn Learning][lil-course-url].
 3 | 
 4 | ![lil-thumbnail-url]
 5 | 
 6 | Python for Data Science and Machine Learning Essential Training is one of the most popular data science courses at LinkedIn Learning. It has now been updated and expanded to two parts-giving you even more hands-on, real-world Python experience. In part one, instructor Lillian Pierson takes you step by step through a data science and machine learning project: a web scraper that downloads and analyzes data from the web. Along the way, she introduces techniques to clean, reformat, transform, and describe raw data; generate visualizations; remove outliers; perform simple data analysis; and generate web-based graphs using Streamlit. By the end of this course, you'll have acquired basic coding experience that you can take to your organization and quickly apply to your own custom data science and machine learning projects.
 7 | 
 8 | This course is integrated with GitHub Codespaces, an instant cloud developer environment that offers all the functionality of your favorite IDE without the need for any local machine setup. With GitHub Codespaces, you can get hands-on practice from any machine, at any time-all while using a tool that you'll likely encounter in the workplace. Check out the "Using GitHub Codespaces with this course" video to learn how to get started.
 9 | 
10 | ### Instructor
11 | 
12 | Lillian Pierson, P.E.
13 | 
14 | Engineer, CEO, and Head of Product at Data-Mania
15 | 
16 |                             
17 | 
18 | Check out my other courses on [LinkedIn Learning](https://www.linkedin.com/learning/instructors/lillian-pierson-p-e?u=104).
19 | 
20 | [0]: # (Replace these placeholder URLs with actual course URLs)
21 | 
22 | [lil-course-url]: https://www.linkedin.com/learning/python-for-data-science-and-machine-learning-essential-training-part-1
23 | [lil-thumbnail-url]: https://media.licdn.com/dms/image/D560DAQHjOZJ6XUrh3Q/learning-public-crop_675_1200/0/1709944557924?e=2147483647&v=beta&t=M7Tpw3XXS2hMAb2QXpJ73m7Bp6awqE82A8jReKHbkPk
24 | 
25 | 


--------------------------------------------------------------------------------
/data/Superstore-Sales.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/data/Superstore-Sales.csv


--------------------------------------------------------------------------------
/data/iris.csv:
--------------------------------------------------------------------------------
  1 | "","Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species"
  2 | "1",5.1,3.5,1.4,0.2,"setosa"
  3 | "2",4.9,3,1.4,0.2,"setosa"
  4 | "3",4.7,3.2,1.3,0.2,"setosa"
  5 | "4",4.6,3.1,1.5,0.2,"setosa"
  6 | "5",5,3.6,1.4,0.2,"setosa"
  7 | "6",5.4,3.9,1.7,0.4,"setosa"
  8 | "7",4.6,3.4,1.4,0.3,"setosa"
  9 | "8",5,3.4,1.5,0.2,"setosa"
 10 | "9",4.4,2.9,1.4,0.2,"setosa"
 11 | "10",4.9,3.1,1.5,0.1,"setosa"
 12 | "11",5.4,3.7,1.5,0.2,"setosa"
 13 | "12",4.8,3.4,1.6,0.2,"setosa"
 14 | "13",4.8,3,1.4,0.1,"setosa"
 15 | "14",4.3,3,1.1,0.1,"setosa"
 16 | "15",5.8,4,1.2,0.2,"setosa"
 17 | "16",5.7,4.4,1.5,0.4,"setosa"
 18 | "17",5.4,3.9,1.3,0.4,"setosa"
 19 | "18",5.1,3.5,1.4,0.3,"setosa"
 20 | "19",5.7,3.8,1.7,0.3,"setosa"
 21 | "20",5.1,3.8,1.5,0.3,"setosa"
 22 | "21",5.4,3.4,1.7,0.2,"setosa"
 23 | "22",5.1,3.7,1.5,0.4,"setosa"
 24 | "23",4.6,3.6,1,0.2,"setosa"
 25 | "24",5.1,3.3,1.7,0.5,"setosa"
 26 | "25",4.8,3.4,1.9,0.2,"setosa"
 27 | "26",5,3,1.6,0.2,"setosa"
 28 | "27",5,3.4,1.6,0.4,"setosa"
 29 | "28",5.2,3.5,1.5,0.2,"setosa"
 30 | "29",5.2,3.4,1.4,0.2,"setosa"
 31 | "30",4.7,3.2,1.6,0.2,"setosa"
 32 | "31",4.8,3.1,1.6,0.2,"setosa"
 33 | "32",5.4,3.4,1.5,0.4,"setosa"
 34 | "33",5.2,4.1,1.5,0.1,"setosa"
 35 | "34",5.5,4.2,1.4,0.2,"setosa"
 36 | "35",4.9,3.1,1.5,0.2,"setosa"
 37 | "36",5,3.2,1.2,0.2,"setosa"
 38 | "37",5.5,3.5,1.3,0.2,"setosa"
 39 | "38",4.9,3.6,1.4,0.1,"setosa"
 40 | "39",4.4,3,1.3,0.2,"setosa"
 41 | "40",5.1,3.4,1.5,0.2,"setosa"
 42 | "41",5,3.5,1.3,0.3,"setosa"
 43 | "42",4.5,2.3,1.3,0.3,"setosa"
 44 | "43",4.4,3.2,1.3,0.2,"setosa"
 45 | "44",5,3.5,1.6,0.6,"setosa"
 46 | "45",5.1,3.8,1.9,0.4,"setosa"
 47 | "46",4.8,3,1.4,0.3,"setosa"
 48 | "47",5.1,3.8,1.6,0.2,"setosa"
 49 | "48",4.6,3.2,1.4,0.2,"setosa"
 50 | "49",5.3,3.7,1.5,0.2,"setosa"
 51 | "50",5,3.3,1.4,0.2,"setosa"
 52 | "51",7,3.2,4.7,1.4,"versicolor"
 53 | "52",6.4,3.2,4.5,1.5,"versicolor"
 54 | "53",6.9,3.1,4.9,1.5,"versicolor"
 55 | "54",5.5,2.3,4,1.3,"versicolor"
 56 | "55",6.5,2.8,4.6,1.5,"versicolor"
 57 | "56",5.7,2.8,4.5,1.3,"versicolor"
 58 | "57",6.3,3.3,4.7,1.6,"versicolor"
 59 | "58",4.9,2.4,3.3,1,"versicolor"
 60 | "59",6.6,2.9,4.6,1.3,"versicolor"
 61 | "60",5.2,2.7,3.9,1.4,"versicolor"
 62 | "61",5,2,3.5,1,"versicolor"
 63 | "62",5.9,3,4.2,1.5,"versicolor"
 64 | "63",6,2.2,4,1,"versicolor"
 65 | "64",6.1,2.9,4.7,1.4,"versicolor"
 66 | "65",5.6,2.9,3.6,1.3,"versicolor"
 67 | "66",6.7,3.1,4.4,1.4,"versicolor"
 68 | "67",5.6,3,4.5,1.5,"versicolor"
 69 | "68",5.8,2.7,4.1,1,"versicolor"
 70 | "69",6.2,2.2,4.5,1.5,"versicolor"
 71 | "70",5.6,2.5,3.9,1.1,"versicolor"
 72 | "71",5.9,3.2,4.8,1.8,"versicolor"
 73 | "72",6.1,2.8,4,1.3,"versicolor"
 74 | "73",6.3,2.5,4.9,1.5,"versicolor"
 75 | "74",6.1,2.8,4.7,1.2,"versicolor"
 76 | "75",6.4,2.9,4.3,1.3,"versicolor"
 77 | "76",6.6,3,4.4,1.4,"versicolor"
 78 | "77",6.8,2.8,4.8,1.4,"versicolor"
 79 | "78",6.7,3,5,1.7,"versicolor"
 80 | "79",6,2.9,4.5,1.5,"versicolor"
 81 | "80",5.7,2.6,3.5,1,"versicolor"
 82 | "81",5.5,2.4,3.8,1.1,"versicolor"
 83 | "82",5.5,2.4,3.7,1,"versicolor"
 84 | "83",5.8,2.7,3.9,1.2,"versicolor"
 85 | "84",6,2.7,5.1,1.6,"versicolor"
 86 | "85",5.4,3,4.5,1.5,"versicolor"
 87 | "86",6,3.4,4.5,1.6,"versicolor"
 88 | "87",6.7,3.1,4.7,1.5,"versicolor"
 89 | "88",6.3,2.3,4.4,1.3,"versicolor"
 90 | "89",5.6,3,4.1,1.3,"versicolor"
 91 | "90",5.5,2.5,4,1.3,"versicolor"
 92 | "91",5.5,2.6,4.4,1.2,"versicolor"
 93 | "92",6.1,3,4.6,1.4,"versicolor"
 94 | "93",5.8,2.6,4,1.2,"versicolor"
 95 | "94",5,2.3,3.3,1,"versicolor"
 96 | "95",5.6,2.7,4.2,1.3,"versicolor"
 97 | "96",5.7,3,4.2,1.2,"versicolor"
 98 | "97",5.7,2.9,4.2,1.3,"versicolor"
 99 | "98",6.2,2.9,4.3,1.3,"versicolor"
100 | "99",5.1,2.5,3,1.1,"versicolor"
101 | "100",5.7,2.8,4.1,1.3,"versicolor"
102 | "101",6.3,3.3,6,2.5,"virginica"
103 | "102",5.8,2.7,5.1,1.9,"virginica"
104 | "103",7.1,3,5.9,2.1,"virginica"
105 | "104",6.3,2.9,5.6,1.8,"virginica"
106 | "105",6.5,3,5.8,2.2,"virginica"
107 | "106",7.6,3,6.6,2.1,"virginica"
108 | "107",4.9,2.5,4.5,1.7,"virginica"
109 | "108",7.3,2.9,6.3,1.8,"virginica"
110 | "109",6.7,2.5,5.8,1.8,"virginica"
111 | "110",7.2,3.6,6.1,2.5,"virginica"
112 | "111",6.5,3.2,5.1,2,"virginica"
113 | "112",6.4,2.7,5.3,1.9,"virginica"
114 | "113",6.8,3,5.5,2.1,"virginica"
115 | "114",5.7,2.5,5,2,"virginica"
116 | "115",5.8,2.8,5.1,2.4,"virginica"
117 | "116",6.4,3.2,5.3,2.3,"virginica"
118 | "117",6.5,3,5.5,1.8,"virginica"
119 | "118",7.7,3.8,6.7,2.2,"virginica"
120 | "119",7.7,2.6,6.9,2.3,"virginica"
121 | "120",6,2.2,5,1.5,"virginica"
122 | "121",6.9,3.2,5.7,2.3,"virginica"
123 | "122",5.6,2.8,4.9,2,"virginica"
124 | "123",7.7,2.8,6.7,2,"virginica"
125 | "124",6.3,2.7,4.9,1.8,"virginica"
126 | "125",6.7,3.3,5.7,2.1,"virginica"
127 | "126",7.2,3.2,6,1.8,"virginica"
128 | "127",6.2,2.8,4.8,1.8,"virginica"
129 | "128",6.1,3,4.9,1.8,"virginica"
130 | "129",6.4,2.8,5.6,2.1,"virginica"
131 | "130",7.2,3,5.8,1.6,"virginica"
132 | "131",7.4,2.8,6.1,1.9,"virginica"
133 | "132",7.9,3.8,6.4,2,"virginica"
134 | "133",6.4,2.8,5.6,2.2,"virginica"
135 | "134",6.3,2.8,5.1,1.5,"virginica"
136 | "135",6.1,2.6,5.6,1.4,"virginica"
137 | "136",7.7,3,6.1,2.3,"virginica"
138 | "137",6.3,3.4,5.6,2.4,"virginica"
139 | "138",6.4,3.1,5.5,1.8,"virginica"
140 | "139",6,3,4.8,1.8,"virginica"
141 | "140",6.9,3.1,5.4,2.1,"virginica"
142 | "141",6.7,3.1,5.6,2.4,"virginica"
143 | "142",6.9,3.1,5.1,2.3,"virginica"
144 | "143",5.8,2.7,5.1,1.9,"virginica"
145 | "144",6.8,3.2,5.9,2.3,"virginica"
146 | "145",6.7,3.3,5.7,2.5,"virginica"
147 | "146",6.7,3,5.2,2.3,"virginica"
148 | "147",6.3,2.5,5,1.9,"virginica"
149 | "148",6.5,3,5.2,2,"virginica"
150 | "149",6.2,3.4,5.4,2.3,"virginica"
151 | "150",5.9,3,5.1,1.8,"virginica"
152 | 


--------------------------------------------------------------------------------
/data/iris.data.csv:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,setosa
  2 | 4.9,3.0,1.4,0.2,setosa
  3 | 4.7,3.2,1.3,0.2,setosa
  4 | 4.6,3.1,1.5,0.2,setosa
  5 | 5.0,3.6,1.4,0.2,setosa
  6 | 5.4,3.9,1.7,0.4,setosa
  7 | 4.6,3.4,1.4,0.3,setosa
  8 | 5.0,3.4,1.5,0.2,setosa
  9 | 4.4,2.9,1.4,0.2,setosa
 10 | 4.9,3.1,1.5,0.1,setosa
 11 | 5.4,3.7,1.5,0.2,setosa
 12 | 4.8,3.4,1.6,0.2,setosa
 13 | 4.8,3.0,1.4,0.1,setosa
 14 | 4.3,3.0,1.1,0.1,setosa
 15 | 5.8,4.0,1.2,0.2,setosa
 16 | 5.7,4.4,1.5,0.4,setosa
 17 | 5.4,3.9,1.3,0.4,setosa
 18 | 5.1,3.5,1.4,0.3,setosa
 19 | 5.7,3.8,1.7,0.3,setosa
 20 | 5.1,3.8,1.5,0.3,setosa
 21 | 5.4,3.4,1.7,0.2,setosa
 22 | 5.1,3.7,1.5,0.4,setosa
 23 | 4.6,3.6,1.0,0.2,setosa
 24 | 5.1,3.3,1.7,0.5,setosa
 25 | 4.8,3.4,1.9,0.2,setosa
 26 | 5.0,3.0,1.6,0.2,setosa
 27 | 5.0,3.4,1.6,0.4,setosa
 28 | 5.2,3.5,1.5,0.2,setosa
 29 | 5.2,3.4,1.4,0.2,setosa
 30 | 4.7,3.2,1.6,0.2,setosa
 31 | 4.8,3.1,1.6,0.2,setosa
 32 | 5.4,3.4,1.5,0.4,setosa
 33 | 5.2,4.1,1.5,0.1,setosa
 34 | 5.5,4.2,1.4,0.2,setosa
 35 | 4.9,3.1,1.5,0.2,setosa
 36 | 5.0,3.2,1.2,0.2,setosa
 37 | 5.5,3.5,1.3,0.2,setosa
 38 | 4.9,3.6,1.4,0.1,setosa
 39 | 4.4,3.0,1.3,0.2,setosa
 40 | 5.1,3.4,1.5,0.2,setosa
 41 | 5.0,3.5,1.3,0.3,setosa
 42 | 4.5,2.3,1.3,0.3,setosa
 43 | 4.4,3.2,1.3,0.2,setosa
 44 | 5.0,3.5,1.6,0.6,setosa
 45 | 5.1,3.8,1.9,0.4,setosa
 46 | 4.8,3.0,1.4,0.3,setosa
 47 | 5.1,3.8,1.6,0.2,setosa
 48 | 4.6,3.2,1.4,0.2,setosa
 49 | 5.3,3.7,1.5,0.2,setosa
 50 | 5.0,3.3,1.4,0.2,setosa
 51 | 7.0,3.2,4.7,1.4,versicolor
 52 | 6.4,3.2,4.5,1.5,versicolor
 53 | 6.9,3.1,4.9,1.5,versicolor
 54 | 5.5,2.3,4.0,1.3,versicolor
 55 | 6.5,2.8,4.6,1.5,versicolor
 56 | 5.7,2.8,4.5,1.3,versicolor
 57 | 6.3,3.3,4.7,1.6,versicolor
 58 | 4.9,2.4,3.3,1.0,versicolor
 59 | 6.6,2.9,4.6,1.3,versicolor
 60 | 5.2,2.7,3.9,1.4,versicolor
 61 | 5.0,2.0,3.5,1.0,versicolor
 62 | 5.9,3.0,4.2,1.5,versicolor
 63 | 6.0,2.2,4.0,1.0,versicolor
 64 | 6.1,2.9,4.7,1.4,versicolor
 65 | 5.6,2.9,3.6,1.3,versicolor
 66 | 6.7,3.1,4.4,1.4,versicolor
 67 | 5.6,3.0,4.5,1.5,versicolor
 68 | 5.8,2.7,4.1,1.0,versicolor
 69 | 6.2,2.2,4.5,1.5,versicolor
 70 | 5.6,2.5,3.9,1.1,versicolor
 71 | 5.9,3.2,4.8,1.8,versicolor
 72 | 6.1,2.8,4.0,1.3,versicolor
 73 | 6.3,2.5,4.9,1.5,versicolor
 74 | 6.1,2.8,4.7,1.2,versicolor
 75 | 6.4,2.9,4.3,1.3,versicolor
 76 | 6.6,3.0,4.4,1.4,versicolor
 77 | 6.8,2.8,4.8,1.4,versicolor
 78 | 6.7,3.0,5.0,1.7,versicolor
 79 | 6.0,2.9,4.5,1.5,versicolor
 80 | 5.7,2.6,3.5,1.0,versicolor
 81 | 5.5,2.4,3.8,1.1,versicolor
 82 | 5.5,2.4,3.7,1.0,versicolor
 83 | 5.8,2.7,3.9,1.2,versicolor
 84 | 6.0,2.7,5.1,1.6,versicolor
 85 | 5.4,3.0,4.5,1.5,versicolor
 86 | 6.0,3.4,4.5,1.6,versicolor
 87 | 6.7,3.1,4.7,1.5,versicolor
 88 | 6.3,2.3,4.4,1.3,versicolor
 89 | 5.6,3.0,4.1,1.3,versicolor
 90 | 5.5,2.5,4.0,1.3,versicolor
 91 | 5.5,2.6,4.4,1.2,versicolor
 92 | 6.1,3.0,4.6,1.4,versicolor
 93 | 5.8,2.6,4.0,1.2,versicolor
 94 | 5.0,2.3,3.3,1.0,versicolor
 95 | 5.6,2.7,4.2,1.3,versicolor
 96 | 5.7,3.0,4.2,1.2,versicolor
 97 | 5.7,2.9,4.2,1.3,versicolor
 98 | 6.2,2.9,4.3,1.3,versicolor
 99 | 5.1,2.5,3.0,1.1,versicolor
100 | 5.7,2.8,4.1,1.3,versicolor
101 | 6.3,3.3,6.0,2.5,virginica
102 | 5.8,2.7,5.1,1.9,virginica
103 | 7.1,3.0,5.9,2.1,virginica
104 | 6.3,2.9,5.6,1.8,virginica
105 | 6.5,3.0,5.8,2.2,virginica
106 | 7.6,3.0,6.6,2.1,virginica
107 | 4.9,2.5,4.5,1.7,virginica
108 | 7.3,2.9,6.3,1.8,virginica
109 | 6.7,2.5,5.8,1.8,virginica
110 | 7.2,3.6,6.1,2.5,virginica
111 | 6.5,3.2,5.1,2.0,virginica
112 | 6.4,2.7,5.3,1.9,virginica
113 | 6.8,3.0,5.5,2.1,virginica
114 | 5.7,2.5,5.0,2.0,virginica
115 | 5.8,2.8,5.1,2.4,virginica
116 | 6.4,3.2,5.3,2.3,virginica
117 | 6.5,3.0,5.5,1.8,virginica
118 | 7.7,3.8,6.7,2.2,virginica
119 | 7.7,2.6,6.9,2.3,virginica
120 | 6.0,2.2,5.0,1.5,virginica
121 | 6.9,3.2,5.7,2.3,virginica
122 | 5.6,2.8,4.9,2.0,virginica
123 | 7.7,2.8,6.7,2.0,virginica
124 | 6.3,2.7,4.9,1.8,virginica
125 | 6.7,3.3,5.7,2.1,virginica
126 | 7.2,3.2,6.0,1.8,virginica
127 | 6.2,2.8,4.8,1.8,virginica
128 | 6.1,3.0,4.9,1.8,virginica
129 | 6.4,2.8,5.6,2.1,virginica
130 | 7.2,3.0,5.8,1.6,virginica
131 | 7.4,2.8,6.1,1.9,virginica
132 | 7.9,3.8,6.4,2.0,virginica
133 | 6.4,2.8,5.6,2.2,virginica
134 | 6.3,2.8,5.1,1.5,virginica
135 | 6.1,2.6,5.6,1.4,virginica
136 | 7.7,3.0,6.1,2.3,virginica
137 | 6.3,3.4,5.6,2.4,virginica
138 | 6.4,3.1,5.5,1.8,virginica
139 | 6.0,3.0,4.8,1.8,virginica
140 | 6.9,3.1,5.4,2.1,virginica
141 | 6.7,3.1,5.6,2.4,virginica
142 | 6.9,3.1,5.1,2.3,virginica
143 | 5.8,2.7,5.1,1.9,virginica
144 | 6.8,3.2,5.9,2.3,virginica
145 | 6.7,3.3,5.7,2.5,virginica
146 | 6.7,3.0,5.2,2.3,virginica
147 | 6.3,2.5,5.0,1.9,virginica
148 | 6.5,3.0,5.2,2.0,virginica
149 | 6.2,3.4,5.4,2.3,virginica
150 | 5.9,3.0,5.1,1.8,virginica
151 | 


--------------------------------------------------------------------------------
/data/mtcars.csv:
--------------------------------------------------------------------------------
 1 | "","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb"
 2 | "Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4,4
 3 | "Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4,4
 4 | "Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
 5 | "Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
 6 | "Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
 7 | "Valiant",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1
 8 | "Duster 360",14.3,8,360,245,3.21,3.57,15.84,0,0,3,4
 9 | "Merc 240D",24.4,4,146.7,62,3.69,3.19,20,1,0,4,2
10 | "Merc 230",22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
11 | "Merc 280",19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
12 | "Merc 280C",17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
13 | "Merc 450SE",16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
14 | "Merc 450SL",17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
15 | "Merc 450SLC",15.2,8,275.8,180,3.07,3.78,18,0,0,3,3
16 | "Cadillac Fleetwood",10.4,8,472,205,2.93,5.25,17.98,0,0,3,4
17 | "Lincoln Continental",10.4,8,460,215,3,5.424,17.82,0,0,3,4
18 | "Chrysler Imperial",14.7,8,440,230,3.23,5.345,17.42,0,0,3,4
19 | "Fiat 128",32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
20 | "Honda Civic",30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
21 | "Toyota Corolla",33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1
22 | "Toyota Corona",21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1
23 | "Dodge Challenger",15.5,8,318,150,2.76,3.52,16.87,0,0,3,2
24 | "AMC Javelin",15.2,8,304,150,3.15,3.435,17.3,0,0,3,2
25 | "Camaro Z28",13.3,8,350,245,3.73,3.84,15.41,0,0,3,4
26 | "Pontiac Firebird",19.2,8,400,175,3.08,3.845,17.05,0,0,3,2
27 | "Fiat X1-9",27.3,4,79,66,4.08,1.935,18.9,1,1,4,1
28 | "Porsche 914-2",26,4,120.3,91,4.43,2.14,16.7,0,1,5,2
29 | "Lotus Europa",30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
30 | "Ford Pantera L",15.8,8,351,264,4.22,3.17,14.5,0,1,5,4
31 | "Ferrari Dino",19.7,6,145,175,3.62,2.77,15.5,0,1,5,6
32 | "Maserati Bora",15,8,301,335,3.54,3.57,14.6,0,1,5,8
33 | "Volvo 142E",21.4,4,121,109,4.11,2.78,18.6,1,1,4,2
34 | 


--------------------------------------------------------------------------------
/data/test:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/notebooks/02_03b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Data Filtering and Selection"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "id": "acd063a4",
 14 |    "metadata": {},
 15 |    "outputs": [
 16 |     {
 17 |      "name": "stdout",
 18 |      "output_type": "stream",
 19 |      "text": [
 20 |       "Requirement already satisfied: numpy in /home/codespace/.local/lib/python3.10/site-packages (1.26.1)\n",
 21 |       "\n",
 22 |       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
 23 |       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n",
 24 |       "Requirement already satisfied: pandas in /home/codespace/.local/lib/python3.10/site-packages (2.1.1)\n",
 25 |       "Requirement already satisfied: numpy>=1.22.4 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (1.26.1)\n",
 26 |       "Requirement already satisfied: python-dateutil>=2.8.2 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (2.8.2)\n",
 27 |       "Requirement already satisfied: pytz>=2020.1 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (2023.3.post1)\n",
 28 |       "Requirement already satisfied: tzdata>=2022.1 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (2023.3)\n",
 29 |       "Requirement already satisfied: six>=1.5 in /home/codespace/.local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
 30 |       "\n",
 31 |       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
 32 |       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
 33 |      ]
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "!pip install numpy\n",
 38 |     "!pip install pandas"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "id": "0b48db56-f3a4-47b5-998e-37d64588ae49",
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "import numpy as np\n",
 49 |     "import pandas as pd\n",
 50 |     "\n",
 51 |     "from pandas import DataFrame"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 13,
 57 |    "id": "bbba446d",
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "data": {
 62 |       "text/html": [
 63 |        "<div>\n",
 64 |        "<style scoped>\n",
 65 |        "    .dataframe tbody tr th:only-of-type {\n",
 66 |        "        vertical-align: middle;\n",
 67 |        "    }\n",
 68 |        "\n",
 69 |        "    .dataframe tbody tr th {\n",
 70 |        "        vertical-align: top;\n",
 71 |        "    }\n",
 72 |        "\n",
 73 |        "    .dataframe thead th {\n",
 74 |        "        text-align: right;\n",
 75 |        "    }\n",
 76 |        "</style>\n",
 77 |        "<table border=\"1\" class=\"dataframe\">\n",
 78 |        "  <thead>\n",
 79 |        "    <tr style=\"text-align: right;\">\n",
 80 |        "      <th></th>\n",
 81 |        "      <th>column 1</th>\n",
 82 |        "      <th>column 2</th>\n",
 83 |        "      <th>column 3</th>\n",
 84 |        "    </tr>\n",
 85 |        "  </thead>\n",
 86 |        "  <tbody>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>row 1</th>\n",
 89 |        "      <td>0</td>\n",
 90 |        "      <td>3</td>\n",
 91 |        "      <td>6</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>row 2</th>\n",
 95 |        "      <td>9</td>\n",
 96 |        "      <td>12</td>\n",
 97 |        "      <td>15</td>\n",
 98 |        "    </tr>\n",
 99 |        "    <tr>\n",
100 |        "      <th>row 3</th>\n",
101 |        "      <td>18</td>\n",
102 |        "      <td>21</td>\n",
103 |        "      <td>24</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>row 4</th>\n",
107 |        "      <td>27</td>\n",
108 |        "      <td>30</td>\n",
109 |        "      <td>33</td>\n",
110 |        "    </tr>\n",
111 |        "    <tr>\n",
112 |        "      <th>row 5</th>\n",
113 |        "      <td>36</td>\n",
114 |        "      <td>39</td>\n",
115 |        "      <td>42</td>\n",
116 |        "    </tr>\n",
117 |        "    <tr>\n",
118 |        "      <th>row 6</th>\n",
119 |        "      <td>45</td>\n",
120 |        "      <td>48</td>\n",
121 |        "      <td>51</td>\n",
122 |        "    </tr>\n",
123 |        "    <tr>\n",
124 |        "      <th>row 7</th>\n",
125 |        "      <td>54</td>\n",
126 |        "      <td>57</td>\n",
127 |        "      <td>60</td>\n",
128 |        "    </tr>\n",
129 |        "    <tr>\n",
130 |        "      <th>row 8</th>\n",
131 |        "      <td>63</td>\n",
132 |        "      <td>66</td>\n",
133 |        "      <td>69</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>row 9</th>\n",
137 |        "      <td>72</td>\n",
138 |        "      <td>75</td>\n",
139 |        "      <td>78</td>\n",
140 |        "    </tr>\n",
141 |        "    <tr>\n",
142 |        "      <th>row 10</th>\n",
143 |        "      <td>81</td>\n",
144 |        "      <td>84</td>\n",
145 |        "      <td>87</td>\n",
146 |        "    </tr>\n",
147 |        "  </tbody>\n",
148 |        "</table>\n",
149 |        "</div>"
150 |       ],
151 |       "text/plain": [
152 |        "        column 1  column 2  column 3\n",
153 |        "row 1          0         3         6\n",
154 |        "row 2          9        12        15\n",
155 |        "row 3         18        21        24\n",
156 |        "row 4         27        30        33\n",
157 |        "row 5         36        39        42\n",
158 |        "row 6         45        48        51\n",
159 |        "row 7         54        57        60\n",
160 |        "row 8         63        66        69\n",
161 |        "row 9         72        75        78\n",
162 |        "row 10        81        84        87"
163 |       ]
164 |      },
165 |      "execution_count": 13,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "numbers_df = DataFrame(np.arange(0,90,3).reshape(10,3), index = ['row 1','row 2','row 3','row 4','row 5','row 6','row 7','row 8','row 9','row 10'],columns=['column 1','column 2','column 3'])\n",
172 |     "numbers_df"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "id": "68b700fd-5320-47e2-852c-a13aab49a486",
178 |    "metadata": {},
179 |    "source": [
180 |     "#### Comparison operators (> < = <= => == !=) and Masking."
181 |    ]
182 |   }
183 |  ],
184 |  "metadata": {
185 |   "kernelspec": {
186 |    "display_name": "Python 3",
187 |    "language": "python",
188 |    "name": "python3"
189 |   },
190 |   "language_info": {
191 |    "codemirror_mode": {
192 |     "name": "ipython",
193 |     "version": 3
194 |    },
195 |    "file_extension": ".py",
196 |    "mimetype": "text/x-python",
197 |    "name": "python",
198 |    "nbconvert_exporter": "python",
199 |    "pygments_lexer": "ipython3",
200 |    "version": "3.10.8"
201 |   }
202 |  },
203 |  "nbformat": 4,
204 |  "nbformat_minor": 5
205 | }
206 | 


--------------------------------------------------------------------------------
/notebooks/02_04b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "c9327e33-5afa-4ffb-aca9-6bdc963ef9ff",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Working with Missing Data in Pandas"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 1,
14 |    "id": "5be0cfbf-e779-42b3-8bd6-f3dd46888ebb",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import numpy as np\n",
19 |     "import pandas as pd\n",
20 |     "\n",
21 |     "from pandas import DataFrame"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "markdown",
26 |    "id": "61bbc7e0-5198-4609-a66a-d4b437606dac",
27 |    "metadata": {},
28 |    "source": [
29 |     "### Filling missing values using fillna(), replace() and interpolate()"
30 |    ]
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "kernelspec": {
35 |    "display_name": "Python 3",
36 |    "language": "python",
37 |    "name": "python3"
38 |   },
39 |   "language_info": {
40 |    "codemirror_mode": {
41 |     "name": "ipython",
42 |     "version": 3
43 |    },
44 |    "file_extension": ".py",
45 |    "mimetype": "text/x-python",
46 |    "name": "python",
47 |    "nbconvert_exporter": "python",
48 |    "pygments_lexer": "ipython3",
49 |    "version": "3.10.8"
50 |   }
51 |  },
52 |  "nbformat": 4,
53 |  "nbformat_minor": 5
54 | }
55 | 


--------------------------------------------------------------------------------
/notebooks/02_05b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n",
10 |     "import pandas as pd\n",
11 |     "\n",
12 |     "from pandas import Series, DataFrame"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "markdown",
17 |    "metadata": {},
18 |    "source": [
19 |     "### Removing duplicates"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": []
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "anaconda-cloud": {},
32 |   "kernelspec": {
33 |    "display_name": "Python 3",
34 |    "language": "python",
35 |    "name": "python3"
36 |   },
37 |   "language_info": {
38 |    "codemirror_mode": {
39 |     "name": "ipython",
40 |     "version": 3
41 |    },
42 |    "file_extension": ".py",
43 |    "mimetype": "text/x-python",
44 |    "name": "python",
45 |    "nbconvert_exporter": "python",
46 |    "pygments_lexer": "ipython3",
47 |    "version": "3.8.8"
48 |   }
49 |  },
50 |  "nbformat": 4,
51 |  "nbformat_minor": 1
52 | }
53 | 


--------------------------------------------------------------------------------
/notebooks/02_05e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "\n",
 12 |     "from pandas import Series, DataFrame"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "### Removing duplicates"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/html": [
 30 |        "<div>\n",
 31 |        "<style scoped>\n",
 32 |        "    .dataframe tbody tr th:only-of-type {\n",
 33 |        "        vertical-align: middle;\n",
 34 |        "    }\n",
 35 |        "\n",
 36 |        "    .dataframe tbody tr th {\n",
 37 |        "        vertical-align: top;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe thead th {\n",
 41 |        "        text-align: right;\n",
 42 |        "    }\n",
 43 |        "</style>\n",
 44 |        "<table border=\"1\" class=\"dataframe\">\n",
 45 |        "  <thead>\n",
 46 |        "    <tr style=\"text-align: right;\">\n",
 47 |        "      <th></th>\n",
 48 |        "      <th>column 1</th>\n",
 49 |        "      <th>column 2</th>\n",
 50 |        "      <th>column 3</th>\n",
 51 |        "    </tr>\n",
 52 |        "  </thead>\n",
 53 |        "  <tbody>\n",
 54 |        "    <tr>\n",
 55 |        "      <th>0</th>\n",
 56 |        "      <td>1</td>\n",
 57 |        "      <td>a</td>\n",
 58 |        "      <td>A</td>\n",
 59 |        "    </tr>\n",
 60 |        "    <tr>\n",
 61 |        "      <th>1</th>\n",
 62 |        "      <td>1</td>\n",
 63 |        "      <td>a</td>\n",
 64 |        "      <td>A</td>\n",
 65 |        "    </tr>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>2</th>\n",
 68 |        "      <td>2</td>\n",
 69 |        "      <td>b</td>\n",
 70 |        "      <td>B</td>\n",
 71 |        "    </tr>\n",
 72 |        "    <tr>\n",
 73 |        "      <th>3</th>\n",
 74 |        "      <td>2</td>\n",
 75 |        "      <td>b</td>\n",
 76 |        "      <td>B</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>4</th>\n",
 80 |        "      <td>3</td>\n",
 81 |        "      <td>c</td>\n",
 82 |        "      <td>C</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>5</th>\n",
 86 |        "      <td>3</td>\n",
 87 |        "      <td>c</td>\n",
 88 |        "      <td>C</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>6</th>\n",
 92 |        "      <td>3</td>\n",
 93 |        "      <td>c</td>\n",
 94 |        "      <td>C</td>\n",
 95 |        "    </tr>\n",
 96 |        "  </tbody>\n",
 97 |        "</table>\n",
 98 |        "</div>"
 99 |       ],
100 |       "text/plain": [
101 |        "   column 1 column 2 column 3\n",
102 |        "0         1        a        A\n",
103 |        "1         1        a        A\n",
104 |        "2         2        b        B\n",
105 |        "3         2        b        B\n",
106 |        "4         3        c        C\n",
107 |        "5         3        c        C\n",
108 |        "6         3        c        C"
109 |       ]
110 |      },
111 |      "execution_count": 2,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "DF_obj = DataFrame({'column 1': [1,1,2,2,3,3,3],\n",
118 |     "                    'column 2':['a', 'a', 'b', 'b', 'c', 'c', 'c'],\n",
119 |     "                    'column 3': ['A', 'A', 'B', 'B', 'C', 'C', 'C']})\n",
120 |     "DF_obj"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 3,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "0    False\n",
132 |        "1     True\n",
133 |        "2    False\n",
134 |        "3     True\n",
135 |        "4    False\n",
136 |        "5     True\n",
137 |        "6     True\n",
138 |        "dtype: bool"
139 |       ]
140 |      },
141 |      "execution_count": 3,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "DF_obj.duplicated()"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 4,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "data": {
157 |       "text/html": [
158 |        "<div>\n",
159 |        "<style scoped>\n",
160 |        "    .dataframe tbody tr th:only-of-type {\n",
161 |        "        vertical-align: middle;\n",
162 |        "    }\n",
163 |        "\n",
164 |        "    .dataframe tbody tr th {\n",
165 |        "        vertical-align: top;\n",
166 |        "    }\n",
167 |        "\n",
168 |        "    .dataframe thead th {\n",
169 |        "        text-align: right;\n",
170 |        "    }\n",
171 |        "</style>\n",
172 |        "<table border=\"1\" class=\"dataframe\">\n",
173 |        "  <thead>\n",
174 |        "    <tr style=\"text-align: right;\">\n",
175 |        "      <th></th>\n",
176 |        "      <th>column 1</th>\n",
177 |        "      <th>column 2</th>\n",
178 |        "      <th>column 3</th>\n",
179 |        "    </tr>\n",
180 |        "  </thead>\n",
181 |        "  <tbody>\n",
182 |        "    <tr>\n",
183 |        "      <th>0</th>\n",
184 |        "      <td>1</td>\n",
185 |        "      <td>a</td>\n",
186 |        "      <td>A</td>\n",
187 |        "    </tr>\n",
188 |        "    <tr>\n",
189 |        "      <th>2</th>\n",
190 |        "      <td>2</td>\n",
191 |        "      <td>b</td>\n",
192 |        "      <td>B</td>\n",
193 |        "    </tr>\n",
194 |        "    <tr>\n",
195 |        "      <th>4</th>\n",
196 |        "      <td>3</td>\n",
197 |        "      <td>c</td>\n",
198 |        "      <td>C</td>\n",
199 |        "    </tr>\n",
200 |        "  </tbody>\n",
201 |        "</table>\n",
202 |        "</div>"
203 |       ],
204 |       "text/plain": [
205 |        "   column 1 column 2 column 3\n",
206 |        "0         1        a        A\n",
207 |        "2         2        b        B\n",
208 |        "4         3        c        C"
209 |       ]
210 |      },
211 |      "execution_count": 4,
212 |      "metadata": {},
213 |      "output_type": "execute_result"
214 |     }
215 |    ],
216 |    "source": [
217 |     "DF_obj.drop_duplicates()"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 5,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": [
226 |     "DF_obj = DataFrame({'column 1': [1,1,2,2,3,3,3],\n",
227 |     "                    'column 2':['a', 'a', 'b', 'b', 'c', 'c', 'c'],\n",
228 |     "                    'column 3': ['A', 'A', 'B', 'B', 'C', 'D', 'C']})"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 6,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "data": {
238 |       "text/html": [
239 |        "<div>\n",
240 |        "<style scoped>\n",
241 |        "    .dataframe tbody tr th:only-of-type {\n",
242 |        "        vertical-align: middle;\n",
243 |        "    }\n",
244 |        "\n",
245 |        "    .dataframe tbody tr th {\n",
246 |        "        vertical-align: top;\n",
247 |        "    }\n",
248 |        "\n",
249 |        "    .dataframe thead th {\n",
250 |        "        text-align: right;\n",
251 |        "    }\n",
252 |        "</style>\n",
253 |        "<table border=\"1\" class=\"dataframe\">\n",
254 |        "  <thead>\n",
255 |        "    <tr style=\"text-align: right;\">\n",
256 |        "      <th></th>\n",
257 |        "      <th>column 1</th>\n",
258 |        "      <th>column 2</th>\n",
259 |        "      <th>column 3</th>\n",
260 |        "    </tr>\n",
261 |        "  </thead>\n",
262 |        "  <tbody>\n",
263 |        "    <tr>\n",
264 |        "      <th>0</th>\n",
265 |        "      <td>1</td>\n",
266 |        "      <td>a</td>\n",
267 |        "      <td>A</td>\n",
268 |        "    </tr>\n",
269 |        "    <tr>\n",
270 |        "      <th>1</th>\n",
271 |        "      <td>1</td>\n",
272 |        "      <td>a</td>\n",
273 |        "      <td>A</td>\n",
274 |        "    </tr>\n",
275 |        "    <tr>\n",
276 |        "      <th>2</th>\n",
277 |        "      <td>2</td>\n",
278 |        "      <td>b</td>\n",
279 |        "      <td>B</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>3</th>\n",
283 |        "      <td>2</td>\n",
284 |        "      <td>b</td>\n",
285 |        "      <td>B</td>\n",
286 |        "    </tr>\n",
287 |        "    <tr>\n",
288 |        "      <th>4</th>\n",
289 |        "      <td>3</td>\n",
290 |        "      <td>c</td>\n",
291 |        "      <td>C</td>\n",
292 |        "    </tr>\n",
293 |        "    <tr>\n",
294 |        "      <th>5</th>\n",
295 |        "      <td>3</td>\n",
296 |        "      <td>c</td>\n",
297 |        "      <td>D</td>\n",
298 |        "    </tr>\n",
299 |        "    <tr>\n",
300 |        "      <th>6</th>\n",
301 |        "      <td>3</td>\n",
302 |        "      <td>c</td>\n",
303 |        "      <td>C</td>\n",
304 |        "    </tr>\n",
305 |        "  </tbody>\n",
306 |        "</table>\n",
307 |        "</div>"
308 |       ],
309 |       "text/plain": [
310 |        "   column 1 column 2 column 3\n",
311 |        "0         1        a        A\n",
312 |        "1         1        a        A\n",
313 |        "2         2        b        B\n",
314 |        "3         2        b        B\n",
315 |        "4         3        c        C\n",
316 |        "5         3        c        D\n",
317 |        "6         3        c        C"
318 |       ]
319 |      },
320 |      "execution_count": 6,
321 |      "metadata": {},
322 |      "output_type": "execute_result"
323 |     }
324 |    ],
325 |    "source": [
326 |     "DF_obj"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 7,
332 |    "metadata": {},
333 |    "outputs": [
334 |     {
335 |      "data": {
336 |       "text/html": [
337 |        "<div>\n",
338 |        "<style scoped>\n",
339 |        "    .dataframe tbody tr th:only-of-type {\n",
340 |        "        vertical-align: middle;\n",
341 |        "    }\n",
342 |        "\n",
343 |        "    .dataframe tbody tr th {\n",
344 |        "        vertical-align: top;\n",
345 |        "    }\n",
346 |        "\n",
347 |        "    .dataframe thead th {\n",
348 |        "        text-align: right;\n",
349 |        "    }\n",
350 |        "</style>\n",
351 |        "<table border=\"1\" class=\"dataframe\">\n",
352 |        "  <thead>\n",
353 |        "    <tr style=\"text-align: right;\">\n",
354 |        "      <th></th>\n",
355 |        "      <th>column 1</th>\n",
356 |        "      <th>column 2</th>\n",
357 |        "      <th>column 3</th>\n",
358 |        "    </tr>\n",
359 |        "  </thead>\n",
360 |        "  <tbody>\n",
361 |        "    <tr>\n",
362 |        "      <th>0</th>\n",
363 |        "      <td>1</td>\n",
364 |        "      <td>a</td>\n",
365 |        "      <td>A</td>\n",
366 |        "    </tr>\n",
367 |        "    <tr>\n",
368 |        "      <th>2</th>\n",
369 |        "      <td>2</td>\n",
370 |        "      <td>b</td>\n",
371 |        "      <td>B</td>\n",
372 |        "    </tr>\n",
373 |        "    <tr>\n",
374 |        "      <th>4</th>\n",
375 |        "      <td>3</td>\n",
376 |        "      <td>c</td>\n",
377 |        "      <td>C</td>\n",
378 |        "    </tr>\n",
379 |        "    <tr>\n",
380 |        "      <th>5</th>\n",
381 |        "      <td>3</td>\n",
382 |        "      <td>c</td>\n",
383 |        "      <td>D</td>\n",
384 |        "    </tr>\n",
385 |        "  </tbody>\n",
386 |        "</table>\n",
387 |        "</div>"
388 |       ],
389 |       "text/plain": [
390 |        "   column 1 column 2 column 3\n",
391 |        "0         1        a        A\n",
392 |        "2         2        b        B\n",
393 |        "4         3        c        C\n",
394 |        "5         3        c        D"
395 |       ]
396 |      },
397 |      "execution_count": 7,
398 |      "metadata": {},
399 |      "output_type": "execute_result"
400 |     }
401 |    ],
402 |    "source": [
403 |     "DF_obj.drop_duplicates(['column 3'])"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": []
412 |   }
413 |  ],
414 |  "metadata": {
415 |   "anaconda-cloud": {},
416 |   "kernelspec": {
417 |    "display_name": "Python 3",
418 |    "language": "python",
419 |    "name": "python3"
420 |   },
421 |   "language_info": {
422 |    "codemirror_mode": {
423 |     "name": "ipython",
424 |     "version": 3
425 |    },
426 |    "file_extension": ".py",
427 |    "mimetype": "text/x-python",
428 |    "name": "python",
429 |    "nbconvert_exporter": "python",
430 |    "pygments_lexer": "ipython3",
431 |    "version": "3.10.13"
432 |   }
433 |  },
434 |  "nbformat": 4,
435 |  "nbformat_minor": 1
436 | }
437 | 


--------------------------------------------------------------------------------
/notebooks/02_06b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n",
10 |     "import pandas as pd\n",
11 |     "\n",
12 |     "from pandas import Series, DataFrame"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "markdown",
17 |    "metadata": {},
18 |    "source": [
19 |     "### Concatenating data"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "markdown",
24 |    "metadata": {},
25 |    "source": [
26 |     "### Transforming data\n",
27 |     "#### Dropping data"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "markdown",
32 |    "metadata": {},
33 |    "source": [
34 |     "#### Adding data"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "markdown",
39 |    "metadata": {},
40 |    "source": [
41 |     "#### Sorting data"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "metadata": {},
48 |    "outputs": [],
49 |    "source": []
50 |   }
51 |  ],
52 |  "metadata": {
53 |   "kernelspec": {
54 |    "display_name": "Python 3",
55 |    "language": "python",
56 |    "name": "python3"
57 |   },
58 |   "language_info": {
59 |    "codemirror_mode": {
60 |     "name": "ipython",
61 |     "version": 3
62 |    },
63 |    "file_extension": ".py",
64 |    "mimetype": "text/x-python",
65 |    "name": "python",
66 |    "nbconvert_exporter": "python",
67 |    "pygments_lexer": "ipython3",
68 |    "version": "3.8.8"
69 |   }
70 |  },
71 |  "nbformat": 4,
72 |  "nbformat_minor": 1
73 | }
74 | 


--------------------------------------------------------------------------------
/notebooks/02_07b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n",
10 |     "import pandas as pd\n",
11 |     "from pandas import Series, DataFrame"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "markdown",
16 |    "metadata": {},
17 |    "source": [
18 |     "### Grouping data by column index"
19 |    ]
20 |   }
21 |  ],
22 |  "metadata": {
23 |   "kernelspec": {
24 |    "display_name": "Python 3",
25 |    "language": "python",
26 |    "name": "python3"
27 |   },
28 |   "language_info": {
29 |    "codemirror_mode": {
30 |     "name": "ipython",
31 |     "version": 3
32 |    },
33 |    "file_extension": ".py",
34 |    "mimetype": "text/x-python",
35 |    "name": "python",
36 |    "nbconvert_exporter": "python",
37 |    "pygments_lexer": "ipython3",
38 |    "version": "3.7.1"
39 |   }
40 |  },
41 |  "nbformat": 4,
42 |  "nbformat_minor": 1
43 | }
44 | 


--------------------------------------------------------------------------------
/notebooks/02_07e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 10,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import Series, DataFrame"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Grouping data by column index"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 11,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>car_names</th>\n",
 48 |        "      <th>mpg</th>\n",
 49 |        "      <th>cyl</th>\n",
 50 |        "      <th>disp</th>\n",
 51 |        "      <th>hp</th>\n",
 52 |        "      <th>drat</th>\n",
 53 |        "      <th>wt</th>\n",
 54 |        "      <th>qsec</th>\n",
 55 |        "      <th>vs</th>\n",
 56 |        "      <th>am</th>\n",
 57 |        "      <th>gear</th>\n",
 58 |        "      <th>carb</th>\n",
 59 |        "    </tr>\n",
 60 |        "  </thead>\n",
 61 |        "  <tbody>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>0</th>\n",
 64 |        "      <td>Mazda RX4</td>\n",
 65 |        "      <td>21.0</td>\n",
 66 |        "      <td>6</td>\n",
 67 |        "      <td>160.0</td>\n",
 68 |        "      <td>110</td>\n",
 69 |        "      <td>3.90</td>\n",
 70 |        "      <td>2.620</td>\n",
 71 |        "      <td>16.46</td>\n",
 72 |        "      <td>0</td>\n",
 73 |        "      <td>1</td>\n",
 74 |        "      <td>4</td>\n",
 75 |        "      <td>4</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>1</th>\n",
 79 |        "      <td>Mazda RX4 Wag</td>\n",
 80 |        "      <td>21.0</td>\n",
 81 |        "      <td>6</td>\n",
 82 |        "      <td>160.0</td>\n",
 83 |        "      <td>110</td>\n",
 84 |        "      <td>3.90</td>\n",
 85 |        "      <td>2.875</td>\n",
 86 |        "      <td>17.02</td>\n",
 87 |        "      <td>0</td>\n",
 88 |        "      <td>1</td>\n",
 89 |        "      <td>4</td>\n",
 90 |        "      <td>4</td>\n",
 91 |        "    </tr>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>2</th>\n",
 94 |        "      <td>Datsun 710</td>\n",
 95 |        "      <td>22.8</td>\n",
 96 |        "      <td>4</td>\n",
 97 |        "      <td>108.0</td>\n",
 98 |        "      <td>93</td>\n",
 99 |        "      <td>3.85</td>\n",
100 |        "      <td>2.320</td>\n",
101 |        "      <td>18.61</td>\n",
102 |        "      <td>1</td>\n",
103 |        "      <td>1</td>\n",
104 |        "      <td>4</td>\n",
105 |        "      <td>1</td>\n",
106 |        "    </tr>\n",
107 |        "    <tr>\n",
108 |        "      <th>3</th>\n",
109 |        "      <td>Hornet 4 Drive</td>\n",
110 |        "      <td>21.4</td>\n",
111 |        "      <td>6</td>\n",
112 |        "      <td>258.0</td>\n",
113 |        "      <td>110</td>\n",
114 |        "      <td>3.08</td>\n",
115 |        "      <td>3.215</td>\n",
116 |        "      <td>19.44</td>\n",
117 |        "      <td>1</td>\n",
118 |        "      <td>0</td>\n",
119 |        "      <td>3</td>\n",
120 |        "      <td>1</td>\n",
121 |        "    </tr>\n",
122 |        "    <tr>\n",
123 |        "      <th>4</th>\n",
124 |        "      <td>Hornet Sportabout</td>\n",
125 |        "      <td>18.7</td>\n",
126 |        "      <td>8</td>\n",
127 |        "      <td>360.0</td>\n",
128 |        "      <td>175</td>\n",
129 |        "      <td>3.15</td>\n",
130 |        "      <td>3.440</td>\n",
131 |        "      <td>17.02</td>\n",
132 |        "      <td>0</td>\n",
133 |        "      <td>0</td>\n",
134 |        "      <td>3</td>\n",
135 |        "      <td>2</td>\n",
136 |        "    </tr>\n",
137 |        "  </tbody>\n",
138 |        "</table>\n",
139 |        "</div>"
140 |       ],
141 |       "text/plain": [
142 |        "           car_names   mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  \\\n",
143 |        "0          Mazda RX4  21.0    6  160.0  110  3.90  2.620  16.46   0   1     4   \n",
144 |        "1      Mazda RX4 Wag  21.0    6  160.0  110  3.90  2.875  17.02   0   1     4   \n",
145 |        "2         Datsun 710  22.8    4  108.0   93  3.85  2.320  18.61   1   1     4   \n",
146 |        "3     Hornet 4 Drive  21.4    6  258.0  110  3.08  3.215  19.44   1   0     3   \n",
147 |        "4  Hornet Sportabout  18.7    8  360.0  175  3.15  3.440  17.02   0   0     3   \n",
148 |        "\n",
149 |        "   carb  \n",
150 |        "0     4  \n",
151 |        "1     4  \n",
152 |        "2     1  \n",
153 |        "3     1  \n",
154 |        "4     2  "
155 |       ]
156 |      },
157 |      "execution_count": 11,
158 |      "metadata": {},
159 |      "output_type": "execute_result"
160 |     }
161 |    ],
162 |    "source": [
163 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
164 |     "\n",
165 |     "cars = pd.read_csv(address)\n",
166 |     "\n",
167 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n",
168 |     "\n",
169 |     "cars.head()"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 13,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "text/html": [
180 |        "<div>\n",
181 |        "<style scoped>\n",
182 |        "    .dataframe tbody tr th:only-of-type {\n",
183 |        "        vertical-align: middle;\n",
184 |        "    }\n",
185 |        "\n",
186 |        "    .dataframe tbody tr th {\n",
187 |        "        vertical-align: top;\n",
188 |        "    }\n",
189 |        "\n",
190 |        "    .dataframe thead th {\n",
191 |        "        text-align: right;\n",
192 |        "    }\n",
193 |        "</style>\n",
194 |        "<table border=\"1\" class=\"dataframe\">\n",
195 |        "  <thead>\n",
196 |        "    <tr style=\"text-align: right;\">\n",
197 |        "      <th></th>\n",
198 |        "      <th>mpg</th>\n",
199 |        "      <th>disp</th>\n",
200 |        "      <th>hp</th>\n",
201 |        "      <th>drat</th>\n",
202 |        "      <th>wt</th>\n",
203 |        "      <th>qsec</th>\n",
204 |        "      <th>vs</th>\n",
205 |        "      <th>am</th>\n",
206 |        "      <th>gear</th>\n",
207 |        "      <th>carb</th>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>cyl</th>\n",
211 |        "      <th></th>\n",
212 |        "      <th></th>\n",
213 |        "      <th></th>\n",
214 |        "      <th></th>\n",
215 |        "      <th></th>\n",
216 |        "      <th></th>\n",
217 |        "      <th></th>\n",
218 |        "      <th></th>\n",
219 |        "      <th></th>\n",
220 |        "      <th></th>\n",
221 |        "    </tr>\n",
222 |        "  </thead>\n",
223 |        "  <tbody>\n",
224 |        "    <tr>\n",
225 |        "      <th>4</th>\n",
226 |        "      <td>26.663636</td>\n",
227 |        "      <td>105.136364</td>\n",
228 |        "      <td>82.636364</td>\n",
229 |        "      <td>4.070909</td>\n",
230 |        "      <td>2.285727</td>\n",
231 |        "      <td>19.137273</td>\n",
232 |        "      <td>0.909091</td>\n",
233 |        "      <td>0.727273</td>\n",
234 |        "      <td>4.090909</td>\n",
235 |        "      <td>1.545455</td>\n",
236 |        "    </tr>\n",
237 |        "    <tr>\n",
238 |        "      <th>6</th>\n",
239 |        "      <td>19.742857</td>\n",
240 |        "      <td>183.314286</td>\n",
241 |        "      <td>122.285714</td>\n",
242 |        "      <td>3.585714</td>\n",
243 |        "      <td>3.117143</td>\n",
244 |        "      <td>17.977143</td>\n",
245 |        "      <td>0.571429</td>\n",
246 |        "      <td>0.428571</td>\n",
247 |        "      <td>3.857143</td>\n",
248 |        "      <td>3.428571</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <th>8</th>\n",
252 |        "      <td>15.100000</td>\n",
253 |        "      <td>353.100000</td>\n",
254 |        "      <td>209.214286</td>\n",
255 |        "      <td>3.229286</td>\n",
256 |        "      <td>3.999214</td>\n",
257 |        "      <td>16.772143</td>\n",
258 |        "      <td>0.000000</td>\n",
259 |        "      <td>0.142857</td>\n",
260 |        "      <td>3.285714</td>\n",
261 |        "      <td>3.500000</td>\n",
262 |        "    </tr>\n",
263 |        "  </tbody>\n",
264 |        "</table>\n",
265 |        "</div>"
266 |       ],
267 |       "text/plain": [
268 |        "           mpg        disp          hp      drat        wt       qsec  \\\n",
269 |        "cyl                                                                     \n",
270 |        "4    26.663636  105.136364   82.636364  4.070909  2.285727  19.137273   \n",
271 |        "6    19.742857  183.314286  122.285714  3.585714  3.117143  17.977143   \n",
272 |        "8    15.100000  353.100000  209.214286  3.229286  3.999214  16.772143   \n",
273 |        "\n",
274 |        "           vs        am      gear      carb  \n",
275 |        "cyl                                          \n",
276 |        "4    0.909091  0.727273  4.090909  1.545455  \n",
277 |        "6    0.571429  0.428571  3.857143  3.428571  \n",
278 |        "8    0.000000  0.142857  3.285714  3.500000  "
279 |       ]
280 |      },
281 |      "execution_count": 13,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "cars_groups = cars.groupby(cars['cyl'])\n",
288 |     "cars_groups.mean(numeric_only=True)"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": []
295 |   }
296 |  ],
297 |  "metadata": {
298 |   "kernelspec": {
299 |    "display_name": "Python 3",
300 |    "language": "python",
301 |    "name": "python3"
302 |   },
303 |   "language_info": {
304 |    "codemirror_mode": {
305 |     "name": "ipython",
306 |     "version": 3
307 |    },
308 |    "file_extension": ".py",
309 |    "mimetype": "text/x-python",
310 |    "name": "python",
311 |    "nbconvert_exporter": "python",
312 |    "pygments_lexer": "ipython3",
313 |    "version": "3.10.13"
314 |   }
315 |  },
316 |  "nbformat": 4,
317 |  "nbformat_minor": 1
318 | }
319 | 


--------------------------------------------------------------------------------
/notebooks/04_01b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "51445501-72bb-48f0-bfb8-715edb50087e",
 6 |    "metadata": {},
 7 |    "source": []
 8 |   },
 9 |   {
10 |    "cell_type": "code",
11 |    "execution_count": null,
12 |    "metadata": {},
13 |    "outputs": [],
14 |    "source": []
15 |   },
16 |   {
17 |    "cell_type": "code",
18 |    "execution_count": null,
19 |    "metadata": {},
20 |    "outputs": [],
21 |    "source": []
22 |   },
23 |   {
24 |    "cell_type": "markdown",
25 |    "metadata": {},
26 |    "source": [
27 |     "### Matplotlib's Bar Chart"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "markdown",
32 |    "id": "7359f9c0-0051-48c5-91fc-5185eae9bfd0",
33 |    "metadata": {},
34 |    "source": [
35 |     "### Line Plot Matplotlib"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "markdown",
40 |    "id": "f1c8460d-7689-406f-a6ac-2720d8cb867d",
41 |    "metadata": {},
42 |    "source": [
43 |     "### Line Plot Seaborn"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "markdown",
48 |    "id": "16d65d0b-34a9-4a98-8550-4cdc0dc66c25",
49 |    "metadata": {},
50 |    "source": [
51 |     "### Pie Chart Matplotlib"
52 |    ]
53 |   },
54 |   {
55 |    "cell_type": "markdown",
56 |    "id": "4be3dfcc-1d4a-4b7e-bc57-427280da980e",
57 |    "metadata": {},
58 |    "source": [
59 |     "### Pie Chart Seaborn"
60 |    ]
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "Python 3",
66 |    "language": "python",
67 |    "name": "python3"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 3
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython3",
79 |    "version": "3.8.8"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 5
84 | }
85 | 


--------------------------------------------------------------------------------
/notebooks/04_02b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "### Creating a line chart from a list object"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {},
20 |    "source": [
21 |     "#### Plotting a line chart in matplotlib"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "markdown",
26 |    "metadata": {},
27 |    "source": [
28 |     "#### Plotting a line chart from a Pandas object"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
38 |     "\n",
39 |     "cars = pd.read_csv(address)\n",
40 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n",
41 |     "mpg = cars['mpg']"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "markdown",
46 |    "metadata": {},
47 |    "source": [
48 |     "### Creating bar charts\n",
49 |     "#### Creating a bar chart from a list"
50 |    ]
51 |   },
52 |   {
53 |    "cell_type": "markdown",
54 |    "metadata": {},
55 |    "source": [
56 |     "#### Creating bar charts from Pandas objects"
57 |    ]
58 |   },
59 |   {
60 |    "cell_type": "markdown",
61 |    "metadata": {},
62 |    "source": [
63 |     "### Creating a pie chart"
64 |    ]
65 |   },
66 |   {
67 |    "cell_type": "markdown",
68 |    "metadata": {},
69 |    "source": [
70 |     "### Saving a plot"
71 |    ]
72 |   }
73 |  ],
74 |  "metadata": {
75 |   "anaconda-cloud": {},
76 |   "kernelspec": {
77 |    "display_name": "Python 3",
78 |    "language": "python",
79 |    "name": "python3"
80 |   },
81 |   "language_info": {
82 |    "codemirror_mode": {
83 |     "name": "ipython",
84 |     "version": 3
85 |    },
86 |    "file_extension": ".py",
87 |    "mimetype": "text/x-python",
88 |    "name": "python",
89 |    "nbconvert_exporter": "python",
90 |    "pygments_lexer": "ipython3",
91 |    "version": "3.8.8"
92 |   }
93 |  },
94 |  "nbformat": 4,
95 |  "nbformat_minor": 1
96 | }
97 | 


--------------------------------------------------------------------------------
/notebooks/04_03b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n",
10 |     "from numpy.random import randn\n",
11 |     "import pandas as pd\n",
12 |     "from pandas import Series, DataFrame\n",
13 |     "\n",
14 |     "import matplotlib.pyplot as plt\n",
15 |     "from matplotlib import rcParams"
16 |    ]
17 |   },
18 |   {
19 |    "cell_type": "markdown",
20 |    "metadata": {},
21 |    "source": [
22 |     "### Defining axes, ticks, and grids"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "markdown",
27 |    "metadata": {},
28 |    "source": [
29 |     "### Generating multiple plots in one figure with subplots "
30 |    ]
31 |   }
32 |  ],
33 |  "metadata": {
34 |   "anaconda-cloud": {},
35 |   "kernelspec": {
36 |    "display_name": "Python 3",
37 |    "language": "python",
38 |    "name": "python3"
39 |   },
40 |   "language_info": {
41 |    "codemirror_mode": {
42 |     "name": "ipython",
43 |     "version": 3
44 |    },
45 |    "file_extension": ".py",
46 |    "mimetype": "text/x-python",
47 |    "name": "python",
48 |    "nbconvert_exporter": "python",
49 |    "pygments_lexer": "ipython3",
50 |    "version": "3.8.8"
51 |   }
52 |  },
53 |  "nbformat": 4,
54 |  "nbformat_minor": 1
55 | }
56 | 


--------------------------------------------------------------------------------
/notebooks/04_04b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import Series, DataFrame\n",
 12 |     "\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "from pylab import rcParams\n",
 15 |     "\n",
 16 |     "import seaborn as sb"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "%matplotlib inline\n",
 26 |     "rcParams['figure.figsize'] = 5, 4\n",
 27 |     "sb.set_style('whitegrid')"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Defining plot color"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": []
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": []
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
 58 |     "\n",
 59 |     "cars = pd.read_csv(address)\n",
 60 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### Customizing line styles"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "### Setting plot markers"
 75 |    ]
 76 |   }
 77 |  ],
 78 |  "metadata": {
 79 |   "anaconda-cloud": {},
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.8.8"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 1
100 | }
101 | 


--------------------------------------------------------------------------------
/notebooks/04_05b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Creating labels and annotations"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 6,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import pandas as pd\n",
 18 |     "from pandas import Series, DataFrame\n",
 19 |     "\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "from pylab import rcParams\n",
 22 |     "import seaborn as sb"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 7,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "%matplotlib inline\n",
 32 |     "rcParams['figure.figsize'] = 8,4\n",
 33 |     "sb.set_style('whitegrid')"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {
 39 |     "collapsed": true
 40 |    },
 41 |    "source": [
 42 |     "### Labeling plot features\n",
 43 |     "#### The functional method"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "#### The object-oriented method"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Adding a legend to your plot\n",
 58 |     "#### The functional method"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "#### The object-oriented method"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "source": [
 74 |     "### Annotating your plot"
 75 |    ]
 76 |   }
 77 |  ],
 78 |  "metadata": {
 79 |   "anaconda-cloud": {},
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.8.8"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 1
100 | }
101 | 


--------------------------------------------------------------------------------
/notebooks/04_06b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Visualizing time series"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import numpy as np\n",
17 |     "from numpy.random import randn\n",
18 |     "import pandas as pd\n",
19 |     "from pandas import Series, DataFrame\n",
20 |     "\n",
21 |     "import matplotlib.pyplot as plt\n",
22 |     "from pylab import rcParams\n",
23 |     "import seaborn as sb"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "code",
28 |    "execution_count": 2,
29 |    "metadata": {},
30 |    "outputs": [],
31 |    "source": [
32 |     "%matplotlib inline\n",
33 |     "rcParams['figure.figsize'] = 5, 4\n",
34 |     "sb.set_style('whitegrid')"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "markdown",
39 |    "metadata": {},
40 |    "source": [
41 |     "### The simplest time series plot"
42 |    ]
43 |   }
44 |  ],
45 |  "metadata": {
46 |   "anaconda-cloud": {},
47 |   "kernelspec": {
48 |    "display_name": "Python 3",
49 |    "language": "python",
50 |    "name": "python3"
51 |   },
52 |   "language_info": {
53 |    "codemirror_mode": {
54 |     "name": "ipython",
55 |     "version": 3
56 |    },
57 |    "file_extension": ".py",
58 |    "mimetype": "text/x-python",
59 |    "name": "python",
60 |    "nbconvert_exporter": "python",
61 |    "pygments_lexer": "ipython3",
62 |    "version": "3.8.8"
63 |   }
64 |  },
65 |  "nbformat": 4,
66 |  "nbformat_minor": 1
67 | }
68 | 


--------------------------------------------------------------------------------
/notebooks/04_07b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import Series, DataFrame\n",
 12 |     "\n",
 13 |     "from pandas.plotting import scatter_matrix\n",
 14 |     "\n",
 15 |     "import matplotlib.pyplot as plt\n",
 16 |     "from pylab import rcParams\n",
 17 |     "import seaborn as sns"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 5,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "%matplotlib inline\n",
 27 |     "rcParams['figure.figsize'] = 5, 4\n",
 28 |     "sns.set_style('whitegrid')"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "### Eyeballing dataset distributions with histograms"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 6,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
 45 |     "\n",
 46 |     "cars = pd.read_csv(address)\n",
 47 |     "\n",
 48 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n",
 49 |     "cars.index = cars.car_names\n"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": []
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": []
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "###  Seeing scatterplots in action"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": []
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": []
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "### Generating a scatter plot matrix\n"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": []
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": []
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "### Building boxplots\n"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": []
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": []
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": []
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "anaconda-cloud": {},
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.10.13"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 1
159 | }
160 | 


--------------------------------------------------------------------------------
/notebooks/05_01b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Using NumPy to perform arithmetic operations on data"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import numpy as np\n",
17 |     "from numpy.random import randn"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "markdown",
22 |    "metadata": {},
23 |    "source": [
24 |     "## Creating arrays\n",
25 |     "### Creating arrays using a list"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "markdown",
30 |    "metadata": {},
31 |    "source": [
32 |     "### Creating arrays via assignment"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "markdown",
37 |    "metadata": {
38 |     "collapsed": true
39 |    },
40 |    "source": [
41 |     "### Multiplying matrices and basic linear algebra"
42 |    ]
43 |   }
44 |  ],
45 |  "metadata": {
46 |   "kernelspec": {
47 |    "display_name": "Python 3",
48 |    "language": "python",
49 |    "name": "python3"
50 |   },
51 |   "language_info": {
52 |    "codemirror_mode": {
53 |     "name": "ipython",
54 |     "version": 3
55 |    },
56 |    "file_extension": ".py",
57 |    "mimetype": "text/x-python",
58 |    "name": "python",
59 |    "nbconvert_exporter": "python",
60 |    "pygments_lexer": "ipython3",
61 |    "version": "3.8.8"
62 |   }
63 |  },
64 |  "nbformat": 4,
65 |  "nbformat_minor": 1
66 | }
67 | 


--------------------------------------------------------------------------------
/notebooks/05_01e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Using NumPy to perform arithmetic operations on data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from numpy.random import randn"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "np.set_printoptions(precision=2)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "## Creating arrays\n",
 34 |     "### Creating arrays using a list"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "array([1, 2, 3, 4, 5, 6])"
 46 |       ]
 47 |      },
 48 |      "execution_count": 3,
 49 |      "metadata": {},
 50 |      "output_type": "execute_result"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "a = np.array([1,2,3,4,5,6])\n",
 55 |     "a"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "data": {
 65 |       "text/plain": [
 66 |        "array([[10, 20, 30],\n",
 67 |        "       [40, 50, 60]])"
 68 |       ]
 69 |      },
 70 |      "execution_count": 4,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "b = np.array([[10,20,30],[40,50,60]])\n",
 77 |     "b"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "### Creating arrays via assignment"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 5,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "array([  8.22,  36.97, -30.23, -21.28, -34.45,  -8.  ])"
 96 |       ]
 97 |      },
 98 |      "execution_count": 5,
 99 |      "metadata": {},
100 |      "output_type": "execute_result"
101 |     }
102 |    ],
103 |    "source": [
104 |     "np.random.seed(25)\n",
105 |     "c = 36*np.random.randn(6)\n",
106 |     "c"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 6,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
118 |        "       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34])"
119 |       ]
120 |      },
121 |      "execution_count": 6,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "d= np.arange(1,35)\n",
128 |     "d"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 7,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "data": {
138 |       "text/plain": [
139 |        "array([10, 20, 30, 40, 50, 60])"
140 |       ]
141 |      },
142 |      "execution_count": 7,
143 |      "metadata": {},
144 |      "output_type": "execute_result"
145 |     }
146 |    ],
147 |    "source": [
148 |     "a*10"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "array([  9.22,  38.97, -27.23, -17.28, -29.45,  -2.  ])"
160 |       ]
161 |      },
162 |      "execution_count": 8,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "c+a"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 9,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "array([  7.22,  34.97, -33.23, -25.28, -39.45, -14.  ])"
180 |       ]
181 |      },
182 |      "execution_count": 9,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "c-a"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 11,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "data": {
198 |       "text/plain": [
199 |        "array([   8.22,   73.94,  -90.68,  -85.13, -172.24,  -48.02])"
200 |       ]
201 |      },
202 |      "execution_count": 11,
203 |      "metadata": {},
204 |      "output_type": "execute_result"
205 |     }
206 |    ],
207 |    "source": [
208 |     "c*a"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 12,
214 |    "metadata": {},
215 |    "outputs": [
216 |     {
217 |      "data": {
218 |       "text/plain": [
219 |        "array([  8.22,  18.48, -10.08,  -5.32,  -6.89,  -1.33])"
220 |       ]
221 |      },
222 |      "execution_count": 12,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "c/a"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {
234 |     "collapsed": true
235 |    },
236 |    "source": [
237 |     "### Multiplying matrices and basic linear algebra"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": []
246 |   }
247 |  ],
248 |  "metadata": {
249 |   "kernelspec": {
250 |    "display_name": "Python 3",
251 |    "language": "python",
252 |    "name": "python3"
253 |   },
254 |   "language_info": {
255 |    "codemirror_mode": {
256 |     "name": "ipython",
257 |     "version": 3
258 |    },
259 |    "file_extension": ".py",
260 |    "mimetype": "text/x-python",
261 |    "name": "python",
262 |    "nbconvert_exporter": "python",
263 |    "pygments_lexer": "ipython3",
264 |    "version": "3.10.13"
265 |   }
266 |  },
267 |  "nbformat": 4,
268 |  "nbformat_minor": 1
269 | }
270 | 


--------------------------------------------------------------------------------
/notebooks/05_02b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Generating summary statistics using pandas and scipy"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 3,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import numpy as np\n",
17 |     "import pandas as pd\n",
18 |     "from pandas import Series, DataFrame\n",
19 |     "\n",
20 |     "import scipy\n",
21 |     "from scipy import stats"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": null,
27 |    "metadata": {},
28 |    "outputs": [],
29 |    "source": [
30 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
31 |     "\n",
32 |     "cars = pd.read_csv(address)\n",
33 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n"
34 |    ]
35 |   },
36 |   {
37 |    "cell_type": "markdown",
38 |    "metadata": {},
39 |    "source": [
40 |     "### Looking at summary statistics that decribe a variable's numeric values"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "markdown",
45 |    "metadata": {},
46 |    "source": [
47 |     "### Looking at summary statistics that describe variable distribution"
48 |    ]
49 |   }
50 |  ],
51 |  "metadata": {
52 |   "anaconda-cloud": {},
53 |   "kernelspec": {
54 |    "display_name": "Python 3",
55 |    "language": "python",
56 |    "name": "python3"
57 |   },
58 |   "language_info": {
59 |    "codemirror_mode": {
60 |     "name": "ipython",
61 |     "version": 3
62 |    },
63 |    "file_extension": ".py",
64 |    "mimetype": "text/x-python",
65 |    "name": "python",
66 |    "nbconvert_exporter": "python",
67 |    "pygments_lexer": "ipython3",
68 |    "version": "3.8.8"
69 |   }
70 |  },
71 |  "nbformat": 4,
72 |  "nbformat_minor": 1
73 | }
74 | 


--------------------------------------------------------------------------------
/notebooks/05_02e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Generating summary statistics using pandas and scipy"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import pandas as pd\n",
 18 |     "from pandas import Series, DataFrame\n",
 19 |     "\n",
 20 |     "import scipy\n",
 21 |     "from scipy import stats"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "data": {
 31 |       "text/html": [
 32 |        "<div>\n",
 33 |        "<style scoped>\n",
 34 |        "    .dataframe tbody tr th:only-of-type {\n",
 35 |        "        vertical-align: middle;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe tbody tr th {\n",
 39 |        "        vertical-align: top;\n",
 40 |        "    }\n",
 41 |        "\n",
 42 |        "    .dataframe thead th {\n",
 43 |        "        text-align: right;\n",
 44 |        "    }\n",
 45 |        "</style>\n",
 46 |        "<table border=\"1\" class=\"dataframe\">\n",
 47 |        "  <thead>\n",
 48 |        "    <tr style=\"text-align: right;\">\n",
 49 |        "      <th></th>\n",
 50 |        "      <th>car_names</th>\n",
 51 |        "      <th>mpg</th>\n",
 52 |        "      <th>cyl</th>\n",
 53 |        "      <th>disp</th>\n",
 54 |        "      <th>hp</th>\n",
 55 |        "      <th>drat</th>\n",
 56 |        "      <th>wt</th>\n",
 57 |        "      <th>qsec</th>\n",
 58 |        "      <th>vs</th>\n",
 59 |        "      <th>am</th>\n",
 60 |        "      <th>gear</th>\n",
 61 |        "      <th>carb</th>\n",
 62 |        "    </tr>\n",
 63 |        "  </thead>\n",
 64 |        "  <tbody>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>0</th>\n",
 67 |        "      <td>Mazda RX4</td>\n",
 68 |        "      <td>21.0</td>\n",
 69 |        "      <td>6</td>\n",
 70 |        "      <td>160.0</td>\n",
 71 |        "      <td>110</td>\n",
 72 |        "      <td>3.90</td>\n",
 73 |        "      <td>2.620</td>\n",
 74 |        "      <td>16.46</td>\n",
 75 |        "      <td>0</td>\n",
 76 |        "      <td>1</td>\n",
 77 |        "      <td>4</td>\n",
 78 |        "      <td>4</td>\n",
 79 |        "    </tr>\n",
 80 |        "    <tr>\n",
 81 |        "      <th>1</th>\n",
 82 |        "      <td>Mazda RX4 Wag</td>\n",
 83 |        "      <td>21.0</td>\n",
 84 |        "      <td>6</td>\n",
 85 |        "      <td>160.0</td>\n",
 86 |        "      <td>110</td>\n",
 87 |        "      <td>3.90</td>\n",
 88 |        "      <td>2.875</td>\n",
 89 |        "      <td>17.02</td>\n",
 90 |        "      <td>0</td>\n",
 91 |        "      <td>1</td>\n",
 92 |        "      <td>4</td>\n",
 93 |        "      <td>4</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>2</th>\n",
 97 |        "      <td>Datsun 710</td>\n",
 98 |        "      <td>22.8</td>\n",
 99 |        "      <td>4</td>\n",
100 |        "      <td>108.0</td>\n",
101 |        "      <td>93</td>\n",
102 |        "      <td>3.85</td>\n",
103 |        "      <td>2.320</td>\n",
104 |        "      <td>18.61</td>\n",
105 |        "      <td>1</td>\n",
106 |        "      <td>1</td>\n",
107 |        "      <td>4</td>\n",
108 |        "      <td>1</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>3</th>\n",
112 |        "      <td>Hornet 4 Drive</td>\n",
113 |        "      <td>21.4</td>\n",
114 |        "      <td>6</td>\n",
115 |        "      <td>258.0</td>\n",
116 |        "      <td>110</td>\n",
117 |        "      <td>3.08</td>\n",
118 |        "      <td>3.215</td>\n",
119 |        "      <td>19.44</td>\n",
120 |        "      <td>1</td>\n",
121 |        "      <td>0</td>\n",
122 |        "      <td>3</td>\n",
123 |        "      <td>1</td>\n",
124 |        "    </tr>\n",
125 |        "    <tr>\n",
126 |        "      <th>4</th>\n",
127 |        "      <td>Hornet Sportabout</td>\n",
128 |        "      <td>18.7</td>\n",
129 |        "      <td>8</td>\n",
130 |        "      <td>360.0</td>\n",
131 |        "      <td>175</td>\n",
132 |        "      <td>3.15</td>\n",
133 |        "      <td>3.440</td>\n",
134 |        "      <td>17.02</td>\n",
135 |        "      <td>0</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>3</td>\n",
138 |        "      <td>2</td>\n",
139 |        "    </tr>\n",
140 |        "  </tbody>\n",
141 |        "</table>\n",
142 |        "</div>"
143 |       ],
144 |       "text/plain": [
145 |        "           car_names   mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  \\\n",
146 |        "0          Mazda RX4  21.0    6  160.0  110  3.90  2.620  16.46   0   1     4   \n",
147 |        "1      Mazda RX4 Wag  21.0    6  160.0  110  3.90  2.875  17.02   0   1     4   \n",
148 |        "2         Datsun 710  22.8    4  108.0   93  3.85  2.320  18.61   1   1     4   \n",
149 |        "3     Hornet 4 Drive  21.4    6  258.0  110  3.08  3.215  19.44   1   0     3   \n",
150 |        "4  Hornet Sportabout  18.7    8  360.0  175  3.15  3.440  17.02   0   0     3   \n",
151 |        "\n",
152 |        "   carb  \n",
153 |        "0     4  \n",
154 |        "1     4  \n",
155 |        "2     1  \n",
156 |        "3     1  \n",
157 |        "4     2  "
158 |       ]
159 |      },
160 |      "execution_count": 3,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
167 |     "\n",
168 |     "cars = pd.read_csv(address)\n",
169 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n",
170 |     "cars.head()\n"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "### Looking at summary statistics that decribe a variable's numeric values"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 4,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "car_names    Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...\n",
189 |        "mpg                                                      642.9\n",
190 |        "cyl                                                        198\n",
191 |        "disp                                                    7383.1\n",
192 |        "hp                                                        4694\n",
193 |        "drat                                                    115.09\n",
194 |        "wt                                                     102.952\n",
195 |        "qsec                                                    571.16\n",
196 |        "vs                                                          14\n",
197 |        "am                                                          13\n",
198 |        "gear                                                       118\n",
199 |        "carb                                                        90\n",
200 |        "dtype: object"
201 |       ]
202 |      },
203 |      "execution_count": 4,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "cars.sum()"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 5,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "data": {
219 |       "text/plain": [
220 |        "0     328.980\n",
221 |        "1     329.795\n",
222 |        "2     259.580\n",
223 |        "3     426.135\n",
224 |        "4     590.310\n",
225 |        "5     385.540\n",
226 |        "6     656.920\n",
227 |        "7     270.980\n",
228 |        "8     299.570\n",
229 |        "9     350.460\n",
230 |        "10    349.660\n",
231 |        "11    510.740\n",
232 |        "12    511.500\n",
233 |        "13    509.850\n",
234 |        "14    728.560\n",
235 |        "15    726.644\n",
236 |        "16    725.695\n",
237 |        "17    213.850\n",
238 |        "18    195.165\n",
239 |        "19    206.955\n",
240 |        "20    273.775\n",
241 |        "21    519.650\n",
242 |        "22    506.085\n",
243 |        "23    646.280\n",
244 |        "24    631.175\n",
245 |        "25    208.215\n",
246 |        "26    272.570\n",
247 |        "27    273.683\n",
248 |        "28    670.690\n",
249 |        "29    379.590\n",
250 |        "30    694.710\n",
251 |        "31    288.890\n",
252 |        "dtype: float64"
253 |       ]
254 |      },
255 |      "execution_count": 5,
256 |      "metadata": {},
257 |      "output_type": "execute_result"
258 |     }
259 |    ],
260 |    "source": [
261 |     "cars.sum(axis=1, numeric_only=True)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 6,
267 |    "metadata": {},
268 |    "outputs": [
269 |     {
270 |      "data": {
271 |       "text/plain": [
272 |        "mpg      19.200\n",
273 |        "cyl       6.000\n",
274 |        "disp    196.300\n",
275 |        "hp      123.000\n",
276 |        "drat      3.695\n",
277 |        "wt        3.325\n",
278 |        "qsec     17.710\n",
279 |        "vs        0.000\n",
280 |        "am        0.000\n",
281 |        "gear      4.000\n",
282 |        "carb      2.000\n",
283 |        "dtype: float64"
284 |       ]
285 |      },
286 |      "execution_count": 6,
287 |      "metadata": {},
288 |      "output_type": "execute_result"
289 |     }
290 |    ],
291 |    "source": [
292 |     "cars.median(numeric_only='True')"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 7,
298 |    "metadata": {},
299 |    "outputs": [
300 |     {
301 |      "data": {
302 |       "text/plain": [
303 |        "mpg      20.090625\n",
304 |        "cyl       6.187500\n",
305 |        "disp    230.721875\n",
306 |        "hp      146.687500\n",
307 |        "drat      3.596563\n",
308 |        "wt        3.217250\n",
309 |        "qsec     17.848750\n",
310 |        "vs        0.437500\n",
311 |        "am        0.406250\n",
312 |        "gear      3.687500\n",
313 |        "carb      2.812500\n",
314 |        "dtype: float64"
315 |       ]
316 |      },
317 |      "execution_count": 7,
318 |      "metadata": {},
319 |      "output_type": "execute_result"
320 |     }
321 |    ],
322 |    "source": [
323 |     "cars.mean(numeric_only='True')"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": 8,
329 |    "metadata": {},
330 |    "outputs": [
331 |     {
332 |      "data": {
333 |       "text/plain": [
334 |        "car_names    Volvo 142E\n",
335 |        "mpg                33.9\n",
336 |        "cyl                   8\n",
337 |        "disp              472.0\n",
338 |        "hp                  335\n",
339 |        "drat               4.93\n",
340 |        "wt                5.424\n",
341 |        "qsec               22.9\n",
342 |        "vs                    1\n",
343 |        "am                    1\n",
344 |        "gear                  5\n",
345 |        "carb                  8\n",
346 |        "dtype: object"
347 |       ]
348 |      },
349 |      "execution_count": 8,
350 |      "metadata": {},
351 |      "output_type": "execute_result"
352 |     }
353 |    ],
354 |    "source": [
355 |     "cars.max()"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": 9,
361 |    "metadata": {},
362 |    "outputs": [
363 |     {
364 |      "data": {
365 |       "text/plain": [
366 |        "19"
367 |       ]
368 |      },
369 |      "execution_count": 9,
370 |      "metadata": {},
371 |      "output_type": "execute_result"
372 |     }
373 |    ],
374 |    "source": [
375 |     "mpg = cars.mpg\n",
376 |     "mpg.idxmax()"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "markdown",
381 |    "metadata": {},
382 |    "source": [
383 |     "### Looking at summary statistics that describe variable distribution"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": 10,
389 |    "metadata": {},
390 |    "outputs": [
391 |     {
392 |      "data": {
393 |       "text/plain": [
394 |        "mpg       6.026948\n",
395 |        "cyl       1.785922\n",
396 |        "disp    123.938694\n",
397 |        "hp       68.562868\n",
398 |        "drat      0.534679\n",
399 |        "wt        0.978457\n",
400 |        "qsec      1.786943\n",
401 |        "vs        0.504016\n",
402 |        "am        0.498991\n",
403 |        "gear      0.737804\n",
404 |        "carb      1.615200\n",
405 |        "dtype: float64"
406 |       ]
407 |      },
408 |      "execution_count": 10,
409 |      "metadata": {},
410 |      "output_type": "execute_result"
411 |     }
412 |    ],
413 |    "source": [
414 |     "cars.std(numeric_only='True')"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 11,
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "data": {
424 |       "text/plain": [
425 |        "mpg        36.324103\n",
426 |        "cyl         3.189516\n",
427 |        "disp    15360.799829\n",
428 |        "hp       4700.866935\n",
429 |        "drat        0.285881\n",
430 |        "wt          0.957379\n",
431 |        "qsec        3.193166\n",
432 |        "vs          0.254032\n",
433 |        "am          0.248992\n",
434 |        "gear        0.544355\n",
435 |        "carb        2.608871\n",
436 |        "dtype: float64"
437 |       ]
438 |      },
439 |      "execution_count": 11,
440 |      "metadata": {},
441 |      "output_type": "execute_result"
442 |     }
443 |    ],
444 |    "source": [
445 |     "cars.var(numeric_only='True')"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": 12,
451 |    "metadata": {},
452 |    "outputs": [
453 |     {
454 |      "data": {
455 |       "text/plain": [
456 |        "gear\n",
457 |        "3    15\n",
458 |        "4    12\n",
459 |        "5     5\n",
460 |        "Name: count, dtype: int64"
461 |       ]
462 |      },
463 |      "execution_count": 12,
464 |      "metadata": {},
465 |      "output_type": "execute_result"
466 |     }
467 |    ],
468 |    "source": [
469 |     "gear = cars.gear\n",
470 |     "gear.value_counts()"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": 13,
476 |    "metadata": {},
477 |    "outputs": [
478 |     {
479 |      "data": {
480 |       "text/html": [
481 |        "<div>\n",
482 |        "<style scoped>\n",
483 |        "    .dataframe tbody tr th:only-of-type {\n",
484 |        "        vertical-align: middle;\n",
485 |        "    }\n",
486 |        "\n",
487 |        "    .dataframe tbody tr th {\n",
488 |        "        vertical-align: top;\n",
489 |        "    }\n",
490 |        "\n",
491 |        "    .dataframe thead th {\n",
492 |        "        text-align: right;\n",
493 |        "    }\n",
494 |        "</style>\n",
495 |        "<table border=\"1\" class=\"dataframe\">\n",
496 |        "  <thead>\n",
497 |        "    <tr style=\"text-align: right;\">\n",
498 |        "      <th></th>\n",
499 |        "      <th>mpg</th>\n",
500 |        "      <th>cyl</th>\n",
501 |        "      <th>disp</th>\n",
502 |        "      <th>hp</th>\n",
503 |        "      <th>drat</th>\n",
504 |        "      <th>wt</th>\n",
505 |        "      <th>qsec</th>\n",
506 |        "      <th>vs</th>\n",
507 |        "      <th>am</th>\n",
508 |        "      <th>gear</th>\n",
509 |        "      <th>carb</th>\n",
510 |        "    </tr>\n",
511 |        "  </thead>\n",
512 |        "  <tbody>\n",
513 |        "    <tr>\n",
514 |        "      <th>count</th>\n",
515 |        "      <td>32.000000</td>\n",
516 |        "      <td>32.000000</td>\n",
517 |        "      <td>32.000000</td>\n",
518 |        "      <td>32.000000</td>\n",
519 |        "      <td>32.000000</td>\n",
520 |        "      <td>32.000000</td>\n",
521 |        "      <td>32.000000</td>\n",
522 |        "      <td>32.000000</td>\n",
523 |        "      <td>32.000000</td>\n",
524 |        "      <td>32.000000</td>\n",
525 |        "      <td>32.0000</td>\n",
526 |        "    </tr>\n",
527 |        "    <tr>\n",
528 |        "      <th>mean</th>\n",
529 |        "      <td>20.090625</td>\n",
530 |        "      <td>6.187500</td>\n",
531 |        "      <td>230.721875</td>\n",
532 |        "      <td>146.687500</td>\n",
533 |        "      <td>3.596563</td>\n",
534 |        "      <td>3.217250</td>\n",
535 |        "      <td>17.848750</td>\n",
536 |        "      <td>0.437500</td>\n",
537 |        "      <td>0.406250</td>\n",
538 |        "      <td>3.687500</td>\n",
539 |        "      <td>2.8125</td>\n",
540 |        "    </tr>\n",
541 |        "    <tr>\n",
542 |        "      <th>std</th>\n",
543 |        "      <td>6.026948</td>\n",
544 |        "      <td>1.785922</td>\n",
545 |        "      <td>123.938694</td>\n",
546 |        "      <td>68.562868</td>\n",
547 |        "      <td>0.534679</td>\n",
548 |        "      <td>0.978457</td>\n",
549 |        "      <td>1.786943</td>\n",
550 |        "      <td>0.504016</td>\n",
551 |        "      <td>0.498991</td>\n",
552 |        "      <td>0.737804</td>\n",
553 |        "      <td>1.6152</td>\n",
554 |        "    </tr>\n",
555 |        "    <tr>\n",
556 |        "      <th>min</th>\n",
557 |        "      <td>10.400000</td>\n",
558 |        "      <td>4.000000</td>\n",
559 |        "      <td>71.100000</td>\n",
560 |        "      <td>52.000000</td>\n",
561 |        "      <td>2.760000</td>\n",
562 |        "      <td>1.513000</td>\n",
563 |        "      <td>14.500000</td>\n",
564 |        "      <td>0.000000</td>\n",
565 |        "      <td>0.000000</td>\n",
566 |        "      <td>3.000000</td>\n",
567 |        "      <td>1.0000</td>\n",
568 |        "    </tr>\n",
569 |        "    <tr>\n",
570 |        "      <th>25%</th>\n",
571 |        "      <td>15.425000</td>\n",
572 |        "      <td>4.000000</td>\n",
573 |        "      <td>120.825000</td>\n",
574 |        "      <td>96.500000</td>\n",
575 |        "      <td>3.080000</td>\n",
576 |        "      <td>2.581250</td>\n",
577 |        "      <td>16.892500</td>\n",
578 |        "      <td>0.000000</td>\n",
579 |        "      <td>0.000000</td>\n",
580 |        "      <td>3.000000</td>\n",
581 |        "      <td>2.0000</td>\n",
582 |        "    </tr>\n",
583 |        "    <tr>\n",
584 |        "      <th>50%</th>\n",
585 |        "      <td>19.200000</td>\n",
586 |        "      <td>6.000000</td>\n",
587 |        "      <td>196.300000</td>\n",
588 |        "      <td>123.000000</td>\n",
589 |        "      <td>3.695000</td>\n",
590 |        "      <td>3.325000</td>\n",
591 |        "      <td>17.710000</td>\n",
592 |        "      <td>0.000000</td>\n",
593 |        "      <td>0.000000</td>\n",
594 |        "      <td>4.000000</td>\n",
595 |        "      <td>2.0000</td>\n",
596 |        "    </tr>\n",
597 |        "    <tr>\n",
598 |        "      <th>75%</th>\n",
599 |        "      <td>22.800000</td>\n",
600 |        "      <td>8.000000</td>\n",
601 |        "      <td>326.000000</td>\n",
602 |        "      <td>180.000000</td>\n",
603 |        "      <td>3.920000</td>\n",
604 |        "      <td>3.610000</td>\n",
605 |        "      <td>18.900000</td>\n",
606 |        "      <td>1.000000</td>\n",
607 |        "      <td>1.000000</td>\n",
608 |        "      <td>4.000000</td>\n",
609 |        "      <td>4.0000</td>\n",
610 |        "    </tr>\n",
611 |        "    <tr>\n",
612 |        "      <th>max</th>\n",
613 |        "      <td>33.900000</td>\n",
614 |        "      <td>8.000000</td>\n",
615 |        "      <td>472.000000</td>\n",
616 |        "      <td>335.000000</td>\n",
617 |        "      <td>4.930000</td>\n",
618 |        "      <td>5.424000</td>\n",
619 |        "      <td>22.900000</td>\n",
620 |        "      <td>1.000000</td>\n",
621 |        "      <td>1.000000</td>\n",
622 |        "      <td>5.000000</td>\n",
623 |        "      <td>8.0000</td>\n",
624 |        "    </tr>\n",
625 |        "  </tbody>\n",
626 |        "</table>\n",
627 |        "</div>"
628 |       ],
629 |       "text/plain": [
630 |        "             mpg        cyl        disp          hp       drat         wt  \\\n",
631 |        "count  32.000000  32.000000   32.000000   32.000000  32.000000  32.000000   \n",
632 |        "mean   20.090625   6.187500  230.721875  146.687500   3.596563   3.217250   \n",
633 |        "std     6.026948   1.785922  123.938694   68.562868   0.534679   0.978457   \n",
634 |        "min    10.400000   4.000000   71.100000   52.000000   2.760000   1.513000   \n",
635 |        "25%    15.425000   4.000000  120.825000   96.500000   3.080000   2.581250   \n",
636 |        "50%    19.200000   6.000000  196.300000  123.000000   3.695000   3.325000   \n",
637 |        "75%    22.800000   8.000000  326.000000  180.000000   3.920000   3.610000   \n",
638 |        "max    33.900000   8.000000  472.000000  335.000000   4.930000   5.424000   \n",
639 |        "\n",
640 |        "            qsec         vs         am       gear     carb  \n",
641 |        "count  32.000000  32.000000  32.000000  32.000000  32.0000  \n",
642 |        "mean   17.848750   0.437500   0.406250   3.687500   2.8125  \n",
643 |        "std     1.786943   0.504016   0.498991   0.737804   1.6152  \n",
644 |        "min    14.500000   0.000000   0.000000   3.000000   1.0000  \n",
645 |        "25%    16.892500   0.000000   0.000000   3.000000   2.0000  \n",
646 |        "50%    17.710000   0.000000   0.000000   4.000000   2.0000  \n",
647 |        "75%    18.900000   1.000000   1.000000   4.000000   4.0000  \n",
648 |        "max    22.900000   1.000000   1.000000   5.000000   8.0000  "
649 |       ]
650 |      },
651 |      "execution_count": 13,
652 |      "metadata": {},
653 |      "output_type": "execute_result"
654 |     }
655 |    ],
656 |    "source": [
657 |     "cars.describe()"
658 |    ]
659 |   },
660 |   {
661 |    "cell_type": "code",
662 |    "execution_count": null,
663 |    "metadata": {},
664 |    "outputs": [],
665 |    "source": []
666 |   }
667 |  ],
668 |  "metadata": {
669 |   "anaconda-cloud": {},
670 |   "kernelspec": {
671 |    "display_name": "Python 3",
672 |    "language": "python",
673 |    "name": "python3"
674 |   },
675 |   "language_info": {
676 |    "codemirror_mode": {
677 |     "name": "ipython",
678 |     "version": 3
679 |    },
680 |    "file_extension": ".py",
681 |    "mimetype": "text/x-python",
682 |    "name": "python",
683 |    "nbconvert_exporter": "python",
684 |    "pygments_lexer": "ipython3",
685 |    "version": "3.10.13"
686 |   }
687 |  },
688 |  "nbformat": 4,
689 |  "nbformat_minor": 1
690 | }
691 | 


--------------------------------------------------------------------------------
/notebooks/05_04b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Starting with parametric methods in pandas and scipy"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import pandas as pd\n",
17 |     "import numpy as np\n",
18 |     "\n",
19 |     "import matplotlib.pyplot as plt\n",
20 |     "import seaborn as sns\n",
21 |     "from pylab import rcParams\n",
22 |     "\n",
23 |     "import scipy\n",
24 |     "from scipy.stats import pearsonr"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": 2,
30 |    "metadata": {},
31 |    "outputs": [],
32 |    "source": [
33 |     "%matplotlib inline\n",
34 |     "rcParams['figure.figsize'] = 8, 4\n",
35 |     "sns.set_style(\"whitegrid\")"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "markdown",
40 |    "metadata": {},
41 |    "source": [
42 |     "### The Pearson Correlation"
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "code",
47 |    "execution_count": null,
48 |    "metadata": {},
49 |    "outputs": [],
50 |    "source": [
51 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
52 |     "\n",
53 |     "cars = pd.read_csv(address)\n",
54 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n"
55 |    ]
56 |   },
57 |   {
58 |    "cell_type": "markdown",
59 |    "metadata": {},
60 |    "source": [
61 |     "### Using scipy to calculate the Pearson correlation coefficient"
62 |    ]
63 |   },
64 |   {
65 |    "cell_type": "markdown",
66 |    "metadata": {},
67 |    "source": [
68 |     "### Using Seaborn to visualize the Pearson correlation coefficient"
69 |    ]
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "anaconda-cloud": {},
74 |   "kernelspec": {
75 |    "display_name": "Python 3",
76 |    "language": "python",
77 |    "name": "python3"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.8.8"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 1
94 | }
95 | 


--------------------------------------------------------------------------------
/notebooks/05_05b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Delving into non-parametric methods using pandas and scipy"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import numpy as np\n",
17 |     "import pandas as pd\n",
18 |     "\n",
19 |     "import matplotlib.pyplot as plt\n",
20 |     "import seaborn as sns\n",
21 |     "from pylab import rcParams\n",
22 |     "\n",
23 |     "import scipy\n",
24 |     "from scipy.stats import spearmanr"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": 2,
30 |    "metadata": {},
31 |    "outputs": [],
32 |    "source": [
33 |     "%matplotlib inline\n",
34 |     "rcParams['figure.figsize'] = 14, 7\n",
35 |     "sns.set_style(\"whitegrid\")"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "markdown",
40 |    "metadata": {},
41 |    "source": [
42 |     "### The Spearman Rank Correlation"
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "code",
47 |    "execution_count": null,
48 |    "metadata": {},
49 |    "outputs": [],
50 |    "source": [
51 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
52 |     "\n",
53 |     "cars = pd.read_csv(address)\n",
54 |     "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n",
55 |     "cars.head()"
56 |    ]
57 |   },
58 |   {
59 |    "cell_type": "markdown",
60 |    "metadata": {},
61 |    "source": [
62 |     "### Chi-square test for independence"
63 |    ]
64 |   }
65 |  ],
66 |  "metadata": {
67 |   "kernelspec": {
68 |    "display_name": "Python 3",
69 |    "language": "python",
70 |    "name": "python3"
71 |   },
72 |   "language_info": {
73 |    "codemirror_mode": {
74 |     "name": "ipython",
75 |     "version": 3
76 |    },
77 |    "file_extension": ".py",
78 |    "mimetype": "text/x-python",
79 |    "name": "python",
80 |    "nbconvert_exporter": "python",
81 |    "pygments_lexer": "ipython3",
82 |    "version": "3.8.8"
83 |   }
84 |  },
85 |  "nbformat": 4,
86 |  "nbformat_minor": 1
87 | }
88 | 


--------------------------------------------------------------------------------
/notebooks/05_06b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Extreme value analysis using univariate methods"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import numpy as np\n",
17 |     "import pandas as pd\n",
18 |     "\n",
19 |     "import matplotlib.pyplot as plt\n",
20 |     "from pylab import rcParams"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": 2,
26 |    "metadata": {},
27 |    "outputs": [],
28 |    "source": [
29 |     "%matplotlib inline\n",
30 |     "rcParams['figure.figsize'] = 5,4"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "markdown",
35 |    "metadata": {},
36 |    "source": [
37 |     "### Identifying outliers from Tukey boxplots"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "markdown",
42 |    "metadata": {},
43 |    "source": [
44 |     "### Applying Tukey outlier labeling"
45 |    ]
46 |   }
47 |  ],
48 |  "metadata": {
49 |   "kernelspec": {
50 |    "display_name": "Python 3",
51 |    "language": "python",
52 |    "name": "python3"
53 |   },
54 |   "language_info": {
55 |    "codemirror_mode": {
56 |     "name": "ipython",
57 |     "version": 3
58 |    },
59 |    "file_extension": ".py",
60 |    "mimetype": "text/x-python",
61 |    "name": "python",
62 |    "nbconvert_exporter": "python",
63 |    "pygments_lexer": "ipython3",
64 |    "version": "3.8.8"
65 |   }
66 |  },
67 |  "nbformat": 4,
68 |  "nbformat_minor": 1
69 | }
70 | 


--------------------------------------------------------------------------------
/notebooks/05_07b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Multivariate analysis for outlier detection"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import pandas as pd\n",
17 |     "\n",
18 |     "import matplotlib.pyplot as plt\n",
19 |     "from pylab import rcParams\n",
20 |     "import seaborn as sns"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": 2,
26 |    "metadata": {},
27 |    "outputs": [],
28 |    "source": [
29 |     "%matplotlib inline\n",
30 |     "rcParams['figure.figsize'] = 5, 4\n",
31 |     "sns.set_style('whitegrid')"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "markdown",
36 |    "metadata": {},
37 |    "source": [
38 |     "### Visually inspecting boxplots"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": null,
44 |    "metadata": {},
45 |    "outputs": [],
46 |    "source": [
47 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/iris.data.csv'\n",
48 |     "df= pd.read_csv(filepath_or_buffer=address, header=None, sep=',')\n",
49 |     "\n",
50 |     "df.columns = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']\n",
51 |     "x = df.iloc[:,0:4].values\n",
52 |     "y = df.iloc[:,4].values\n",
53 |     "df[:5]"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "metadata": {},
60 |    "outputs": [],
61 |    "source": []
62 |   },
63 |   {
64 |    "cell_type": "markdown",
65 |    "metadata": {
66 |     "collapsed": true
67 |    },
68 |    "source": [
69 |     "### Looking at the scatterplot matrix"
70 |    ]
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "Python 3",
76 |    "language": "python",
77 |    "name": "python3"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.8.8"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 1
94 | }
95 | 


--------------------------------------------------------------------------------
/notebooks/06_01b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "8ab9487e-00e3-47e2-9368-9a5a6f112a65",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Cleaning and treating categorical variables"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 1,
14 |    "id": "6ddc4c2e-8b01-449d-b823-9394f1f96b1f",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import numpy as np\n",
19 |     "from pandas import DataFrame\n",
20 |     "\n",
21 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": null,
27 |    "id": "1bef47b2",
28 |    "metadata": {},
29 |    "outputs": [],
30 |    "source": [
31 |     "data = {'names':['steve', 'john', 'richard', 'sarah', 'randy', 'micheal', 'julie'],\n",
32 |     "'age':[20, 22, 20, 21, 24, 23, 22],\n",
33 |     "'gender':['Male', 'Male', np.nan, 'Female', np.nan, 'Male', np.nan],\n",
34 |     "'rank':[2, 1, 4, 5, 3, 7, 6]}"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "code",
39 |    "execution_count": null,
40 |    "id": "385b7880",
41 |    "metadata": {},
42 |    "outputs": [],
43 |    "source": []
44 |   },
45 |   {
46 |    "cell_type": "markdown",
47 |    "id": "1321636f-e6e6-4c6e-9756-e0b47dcc56a9",
48 |    "metadata": {},
49 |    "source": [
50 |     "### Label Encoding"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "markdown",
55 |    "id": "46c8b54e-bc23-4ef7-95a3-8b3dac69b324",
56 |    "metadata": {},
57 |    "source": [
58 |     "### One Hot Encoder"
59 |    ]
60 |   }
61 |  ],
62 |  "metadata": {
63 |   "kernelspec": {
64 |    "display_name": "Python 3",
65 |    "language": "python",
66 |    "name": "python3"
67 |   },
68 |   "language_info": {
69 |    "codemirror_mode": {
70 |     "name": "ipython",
71 |     "version": 3
72 |    },
73 |    "file_extension": ".py",
74 |    "mimetype": "text/x-python",
75 |    "name": "python",
76 |    "nbconvert_exporter": "python",
77 |    "pygments_lexer": "ipython3",
78 |    "version": "3.8.8"
79 |   }
80 |  },
81 |  "nbformat": 4,
82 |  "nbformat_minor": 5
83 | }
84 | 


--------------------------------------------------------------------------------
/notebooks/06_02b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "5d519b84-6889-4827-83d8-e99dbb2ec5df",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Transforming Dataset Distributions"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 3,
14 |    "id": "868ec360-501a-4032-a6b7-c7fac40a78df",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import numpy as np\n",
19 |     "import pandas as pd\n",
20 |     "\n",
21 |     "import matplotlib.pyplot as plt\n",
22 |     "from sklearn.preprocessing import MinMaxScaler, scale"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": null,
28 |    "id": "a67f7690",
29 |    "metadata": {},
30 |    "outputs": [],
31 |    "source": [
32 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n",
33 |     "\n",
34 |     "dataset = pd.read_csv(address)\n",
35 |     "dataset.head()"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "markdown",
40 |    "id": "4c753f3f-ea35-4502-a099-168880862c3f",
41 |    "metadata": {},
42 |    "source": [
43 |     "### Normalization"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "markdown",
48 |    "id": "496a3803-34bb-4ddc-9dc5-5fca9b9b0fd1",
49 |    "metadata": {},
50 |    "source": [
51 |     "### Standardization"
52 |    ]
53 |   }
54 |  ],
55 |  "metadata": {
56 |   "kernelspec": {
57 |    "display_name": "Python 3",
58 |    "language": "python",
59 |    "name": "python3"
60 |   },
61 |   "language_info": {
62 |    "codemirror_mode": {
63 |     "name": "ipython",
64 |     "version": 3
65 |    },
66 |    "file_extension": ".py",
67 |    "mimetype": "text/x-python",
68 |    "name": "python",
69 |    "nbconvert_exporter": "python",
70 |    "pygments_lexer": "ipython3",
71 |    "version": "3.8.8"
72 |   }
73 |  },
74 |  "nbformat": 4,
75 |  "nbformat_minor": 5
76 | }
77 | 


--------------------------------------------------------------------------------
/notebooks/06_03b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Applied Machine Learning: Starter Problem"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 108,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import pandas as pd\n",
17 |     "from sklearn.model_selection import train_test_split\n",
18 |     "from sklearn.tree import DecisionTreeClassifier\n",
19 |     "from sklearn import metrics"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "id": "6b20b117",
26 |    "metadata": {},
27 |    "outputs": [],
28 |    "source": [
29 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/iris.csv'\n",
30 |     "\n",
31 |     "dataset = pd.read_csv(address)\n",
32 |     "dataset.head()"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "markdown",
37 |    "metadata": {},
38 |    "source": [
39 |     "### Separating features and labels"
40 |    ]
41 |   },
42 |   {
43 |    "cell_type": "markdown",
44 |    "metadata": {},
45 |    "source": [
46 |     "### Train Test Split"
47 |    ]
48 |   },
49 |   {
50 |    "cell_type": "markdown",
51 |    "metadata": {},
52 |    "source": [
53 |     "### Training Decision Tree Classifier"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "markdown",
58 |    "metadata": {},
59 |    "source": [
60 |     "### Evaluation metric"
61 |    ]
62 |   }
63 |  ],
64 |  "metadata": {
65 |   "kernelspec": {
66 |    "display_name": "Python 3",
67 |    "language": "python",
68 |    "name": "python3"
69 |   },
70 |   "language_info": {
71 |    "codemirror_mode": {
72 |     "name": "ipython",
73 |     "version": 3
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python",
79 |    "pygments_lexer": "ipython3",
80 |    "version": "3.7.1"
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 5
85 | }
86 | 


--------------------------------------------------------------------------------
/notebooks/06_03e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Applied Machine Learning: Starter Problem"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "from sklearn.model_selection import train_test_split\n",
 18 |     "from sklearn.tree import DecisionTreeClassifier\n",
 19 |     "from sklearn import metrics"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "id": "4e1cf7e5",
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>Unnamed: 0</th>\n",
 50 |        "      <th>Sepal.Length</th>\n",
 51 |        "      <th>Sepal.Width</th>\n",
 52 |        "      <th>Petal.Length</th>\n",
 53 |        "      <th>Petal.Width</th>\n",
 54 |        "      <th>Species</th>\n",
 55 |        "    </tr>\n",
 56 |        "  </thead>\n",
 57 |        "  <tbody>\n",
 58 |        "    <tr>\n",
 59 |        "      <th>0</th>\n",
 60 |        "      <td>1</td>\n",
 61 |        "      <td>5.1</td>\n",
 62 |        "      <td>3.5</td>\n",
 63 |        "      <td>1.4</td>\n",
 64 |        "      <td>0.2</td>\n",
 65 |        "      <td>setosa</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>1</th>\n",
 69 |        "      <td>2</td>\n",
 70 |        "      <td>4.9</td>\n",
 71 |        "      <td>3.0</td>\n",
 72 |        "      <td>1.4</td>\n",
 73 |        "      <td>0.2</td>\n",
 74 |        "      <td>setosa</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>2</th>\n",
 78 |        "      <td>3</td>\n",
 79 |        "      <td>4.7</td>\n",
 80 |        "      <td>3.2</td>\n",
 81 |        "      <td>1.3</td>\n",
 82 |        "      <td>0.2</td>\n",
 83 |        "      <td>setosa</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>3</th>\n",
 87 |        "      <td>4</td>\n",
 88 |        "      <td>4.6</td>\n",
 89 |        "      <td>3.1</td>\n",
 90 |        "      <td>1.5</td>\n",
 91 |        "      <td>0.2</td>\n",
 92 |        "      <td>setosa</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>4</th>\n",
 96 |        "      <td>5</td>\n",
 97 |        "      <td>5.0</td>\n",
 98 |        "      <td>3.6</td>\n",
 99 |        "      <td>1.4</td>\n",
100 |        "      <td>0.2</td>\n",
101 |        "      <td>setosa</td>\n",
102 |        "    </tr>\n",
103 |        "  </tbody>\n",
104 |        "</table>\n",
105 |        "</div>"
106 |       ],
107 |       "text/plain": [
108 |        "   Unnamed: 0  Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species\n",
109 |        "0           1           5.1          3.5           1.4          0.2  setosa\n",
110 |        "1           2           4.9          3.0           1.4          0.2  setosa\n",
111 |        "2           3           4.7          3.2           1.3          0.2  setosa\n",
112 |        "3           4           4.6          3.1           1.5          0.2  setosa\n",
113 |        "4           5           5.0          3.6           1.4          0.2  setosa"
114 |       ]
115 |      },
116 |      "execution_count": 2,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/iris.csv'\n",
123 |     "\n",
124 |     "dataset = pd.read_csv(address)\n",
125 |     "dataset.head()"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 3,
131 |    "id": "a10a8e24",
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "array(['setosa', 'versicolor', 'virginica'], dtype=object)"
138 |       ]
139 |      },
140 |      "execution_count": 3,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "dataset.Species.unique()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "### Separating features and labels"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 4,
159 |    "id": "0ba71df9",
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/html": [
165 |        "<div>\n",
166 |        "<style scoped>\n",
167 |        "    .dataframe tbody tr th:only-of-type {\n",
168 |        "        vertical-align: middle;\n",
169 |        "    }\n",
170 |        "\n",
171 |        "    .dataframe tbody tr th {\n",
172 |        "        vertical-align: top;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe thead th {\n",
176 |        "        text-align: right;\n",
177 |        "    }\n",
178 |        "</style>\n",
179 |        "<table border=\"1\" class=\"dataframe\">\n",
180 |        "  <thead>\n",
181 |        "    <tr style=\"text-align: right;\">\n",
182 |        "      <th></th>\n",
183 |        "      <th>Sepal.Length</th>\n",
184 |        "      <th>Sepal.Width</th>\n",
185 |        "      <th>Petal.Length</th>\n",
186 |        "      <th>Petal.Width</th>\n",
187 |        "    </tr>\n",
188 |        "  </thead>\n",
189 |        "  <tbody>\n",
190 |        "    <tr>\n",
191 |        "      <th>0</th>\n",
192 |        "      <td>5.1</td>\n",
193 |        "      <td>3.5</td>\n",
194 |        "      <td>1.4</td>\n",
195 |        "      <td>0.2</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>1</th>\n",
199 |        "      <td>4.9</td>\n",
200 |        "      <td>3.0</td>\n",
201 |        "      <td>1.4</td>\n",
202 |        "      <td>0.2</td>\n",
203 |        "    </tr>\n",
204 |        "    <tr>\n",
205 |        "      <th>2</th>\n",
206 |        "      <td>4.7</td>\n",
207 |        "      <td>3.2</td>\n",
208 |        "      <td>1.3</td>\n",
209 |        "      <td>0.2</td>\n",
210 |        "    </tr>\n",
211 |        "    <tr>\n",
212 |        "      <th>3</th>\n",
213 |        "      <td>4.6</td>\n",
214 |        "      <td>3.1</td>\n",
215 |        "      <td>1.5</td>\n",
216 |        "      <td>0.2</td>\n",
217 |        "    </tr>\n",
218 |        "    <tr>\n",
219 |        "      <th>4</th>\n",
220 |        "      <td>5.0</td>\n",
221 |        "      <td>3.6</td>\n",
222 |        "      <td>1.4</td>\n",
223 |        "      <td>0.2</td>\n",
224 |        "    </tr>\n",
225 |        "    <tr>\n",
226 |        "      <th>...</th>\n",
227 |        "      <td>...</td>\n",
228 |        "      <td>...</td>\n",
229 |        "      <td>...</td>\n",
230 |        "      <td>...</td>\n",
231 |        "    </tr>\n",
232 |        "    <tr>\n",
233 |        "      <th>145</th>\n",
234 |        "      <td>6.7</td>\n",
235 |        "      <td>3.0</td>\n",
236 |        "      <td>5.2</td>\n",
237 |        "      <td>2.3</td>\n",
238 |        "    </tr>\n",
239 |        "    <tr>\n",
240 |        "      <th>146</th>\n",
241 |        "      <td>6.3</td>\n",
242 |        "      <td>2.5</td>\n",
243 |        "      <td>5.0</td>\n",
244 |        "      <td>1.9</td>\n",
245 |        "    </tr>\n",
246 |        "    <tr>\n",
247 |        "      <th>147</th>\n",
248 |        "      <td>6.5</td>\n",
249 |        "      <td>3.0</td>\n",
250 |        "      <td>5.2</td>\n",
251 |        "      <td>2.0</td>\n",
252 |        "    </tr>\n",
253 |        "    <tr>\n",
254 |        "      <th>148</th>\n",
255 |        "      <td>6.2</td>\n",
256 |        "      <td>3.4</td>\n",
257 |        "      <td>5.4</td>\n",
258 |        "      <td>2.3</td>\n",
259 |        "    </tr>\n",
260 |        "    <tr>\n",
261 |        "      <th>149</th>\n",
262 |        "      <td>5.9</td>\n",
263 |        "      <td>3.0</td>\n",
264 |        "      <td>5.1</td>\n",
265 |        "      <td>1.8</td>\n",
266 |        "    </tr>\n",
267 |        "  </tbody>\n",
268 |        "</table>\n",
269 |        "<p>150 rows × 4 columns</p>\n",
270 |        "</div>"
271 |       ],
272 |       "text/plain": [
273 |        "     Sepal.Length  Sepal.Width  Petal.Length  Petal.Width\n",
274 |        "0             5.1          3.5           1.4          0.2\n",
275 |        "1             4.9          3.0           1.4          0.2\n",
276 |        "2             4.7          3.2           1.3          0.2\n",
277 |        "3             4.6          3.1           1.5          0.2\n",
278 |        "4             5.0          3.6           1.4          0.2\n",
279 |        "..            ...          ...           ...          ...\n",
280 |        "145           6.7          3.0           5.2          2.3\n",
281 |        "146           6.3          2.5           5.0          1.9\n",
282 |        "147           6.5          3.0           5.2          2.0\n",
283 |        "148           6.2          3.4           5.4          2.3\n",
284 |        "149           5.9          3.0           5.1          1.8\n",
285 |        "\n",
286 |        "[150 rows x 4 columns]"
287 |       ]
288 |      },
289 |      "execution_count": 4,
290 |      "metadata": {},
291 |      "output_type": "execute_result"
292 |     }
293 |    ],
294 |    "source": [
295 |     "X = dataset.iloc[:,1:5]\n",
296 |     "X"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 5,
302 |    "id": "b7fdb1a8",
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "data": {
307 |       "text/plain": [
308 |        "0         setosa\n",
309 |        "1         setosa\n",
310 |        "2         setosa\n",
311 |        "3         setosa\n",
312 |        "4         setosa\n",
313 |        "         ...    \n",
314 |        "145    virginica\n",
315 |        "146    virginica\n",
316 |        "147    virginica\n",
317 |        "148    virginica\n",
318 |        "149    virginica\n",
319 |        "Name: Species, Length: 150, dtype: object"
320 |       ]
321 |      },
322 |      "execution_count": 5,
323 |      "metadata": {},
324 |      "output_type": "execute_result"
325 |     }
326 |    ],
327 |    "source": [
328 |     "y = dataset.iloc[:,5]\n",
329 |     "y"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "markdown",
334 |    "metadata": {},
335 |    "source": [
336 |     "### Train Test Split"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 6,
342 |    "id": "61c24f24",
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "markdown",
351 |    "metadata": {},
352 |    "source": [
353 |     "### Training Decision Tree Classifier"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": 7,
359 |    "id": "994db922",
360 |    "metadata": {},
361 |    "outputs": [
362 |     {
363 |      "data": {
364 |       "text/html": [
365 |        "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div>"
366 |       ],
367 |       "text/plain": [
368 |        "DecisionTreeClassifier()"
369 |       ]
370 |      },
371 |      "execution_count": 7,
372 |      "metadata": {},
373 |      "output_type": "execute_result"
374 |     }
375 |    ],
376 |    "source": [
377 |     "clf = DecisionTreeClassifier()\n",
378 |     "clf.fit(X_train, y_train)"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": 8,
384 |    "id": "5d06c2ac",
385 |    "metadata": {},
386 |    "outputs": [
387 |     {
388 |      "data": {
389 |       "text/plain": [
390 |        "array(['virginica', 'versicolor', 'setosa', 'virginica', 'setosa',\n",
391 |        "       'virginica', 'setosa', 'versicolor', 'versicolor', 'versicolor',\n",
392 |        "       'virginica', 'versicolor', 'versicolor', 'versicolor',\n",
393 |        "       'versicolor', 'setosa', 'versicolor', 'versicolor', 'setosa',\n",
394 |        "       'setosa', 'virginica', 'versicolor', 'setosa', 'setosa',\n",
395 |        "       'virginica', 'setosa', 'setosa', 'versicolor', 'versicolor',\n",
396 |        "       'setosa', 'virginica', 'versicolor', 'setosa', 'virginica',\n",
397 |        "       'virginica', 'versicolor', 'setosa', 'virginica', 'versicolor',\n",
398 |        "       'versicolor', 'virginica', 'setosa', 'virginica', 'setosa',\n",
399 |        "       'setosa'], dtype=object)"
400 |       ]
401 |      },
402 |      "execution_count": 8,
403 |      "metadata": {},
404 |      "output_type": "execute_result"
405 |     }
406 |    ],
407 |    "source": [
408 |     "y_predict = clf.predict(X_test)\n",
409 |     "y_predict"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {},
415 |    "source": [
416 |     "### Evaluation metric"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 9,
422 |    "id": "8415063f",
423 |    "metadata": {},
424 |    "outputs": [
425 |     {
426 |      "name": "stdout",
427 |      "output_type": "stream",
428 |      "text": [
429 |       "Accuracy: 0.9777777777777777\n"
430 |      ]
431 |     }
432 |    ],
433 |    "source": [
434 |     "accuracy = metrics.accuracy_score(y_test, y_predict)\n",
435 |     "print(\"Accuracy:\", accuracy)"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": null,
441 |    "id": "ecc7f83e",
442 |    "metadata": {},
443 |    "outputs": [],
444 |    "source": []
445 |   }
446 |  ],
447 |  "metadata": {
448 |   "kernelspec": {
449 |    "display_name": "Python 3",
450 |    "language": "python",
451 |    "name": "python3"
452 |   },
453 |   "language_info": {
454 |    "codemirror_mode": {
455 |     "name": "ipython",
456 |     "version": 3
457 |    },
458 |    "file_extension": ".py",
459 |    "mimetype": "text/x-python",
460 |    "name": "python",
461 |    "nbconvert_exporter": "python",
462 |    "pygments_lexer": "ipython3",
463 |    "version": "3.10.13"
464 |   }
465 |  },
466 |  "nbformat": 4,
467 |  "nbformat_minor": 5
468 | }
469 | 


--------------------------------------------------------------------------------
/notebooks/07_02b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "d73346ef-024b-4e66-b8bb-8521e36d8aa0",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Python requests for automating data collection."
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 1,
14 |    "id": "3731af81-600b-4560-8e71-92693df00f7f",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": []
18 |   },
19 |   {
20 |    "cell_type": "markdown",
21 |    "id": "29d169cb-fe50-41c3-a145-3af10473fcc4",
22 |    "metadata": {},
23 |    "source": [
24 |     "### Headers"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "markdown",
29 |    "id": "799bc8a5-5b9f-4c93-ad74-2d56a4de8bc5",
30 |    "metadata": {},
31 |    "source": [
32 |     "### Content Types"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "markdown",
37 |    "id": "cd63c2df-ecca-48e6-a84d-1275d5df3d55",
38 |    "metadata": {},
39 |    "source": [
40 |     "### Body/Content"
41 |    ]
42 |   }
43 |  ],
44 |  "metadata": {
45 |   "kernelspec": {
46 |    "display_name": "Python 3",
47 |    "language": "python",
48 |    "name": "python3"
49 |   },
50 |   "language_info": {
51 |    "codemirror_mode": {
52 |     "name": "ipython",
53 |     "version": 3
54 |    },
55 |    "file_extension": ".py",
56 |    "mimetype": "text/x-python",
57 |    "name": "python",
58 |    "nbconvert_exporter": "python",
59 |    "pygments_lexer": "ipython3",
60 |    "version": "3.8.8"
61 |   }
62 |  },
63 |  "nbformat": 4,
64 |  "nbformat_minor": 5
65 | }
66 | 


--------------------------------------------------------------------------------
/notebooks/07_03b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Part 1 - Objects in BeautifulSoup"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "collapsed": true
 14 |    },
 15 |    "source": [
 16 |     "### BeautifulSoup objects"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "our_html_document = '''\n",
 26 |     "<html><head><title>IoT Articles</title></head>\n",
 27 |     "<body>\n",
 28 |     "<p class='title'><b>2018 Trends: Best New IoT Device Ideas for Data Scientists and Engineers</b></p>\n",
 29 |     "\n",
 30 |     "<p class='description'>It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use...\n",
 31 |     "<br>\n",
 32 |     "<br>\n",
 33 |     "It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use to get practice in designing your first IoT applications.\n",
 34 |     "<h1>Looking Back at My Coolest IoT Find in 2017</h1>\n",
 35 |     "Before going into detail about best new IoT device ideas, here’s the backstory. <span style=\"text-decoration: underline;\"><strong><a href=\"http://bit.ly/LPlNDJj\">Last month Ericsson Digital invited me</a></strong></span> to tour the Ericsson Studio in Kista, Sweden. Up until that visit, <a href=\"http://www.data-mania.com/blog/m2m-vs-iot/\">IoT</a> had been largely theoretical to me. Of course, I know the usual mumbo-jumbo about wearables and IoT-connected fitness trackers. That stuff is all well and good, but it’s somewhat old hat – plus I am not sure we are really benefiting so much from those, so I’m not that impressed.\n",
 36 |     "\n",
 37 |     "It wasn’t until I got to the Ericsson Studio that I became extremely impressed by how far IoT has really come. Relying on the promise of the 5g network expansion, IoT-powered smart devices are on the cusp of an explosive growth in adoption. It was Ericsson’s Smart Car that sent me reeling:<a href=\"bit.ly/LPlNDJj\"><img class=\"aligncenter size-full wp-image-3802\" src=\"http://www.data-mania.com/blog/wp-content/uploads/2017/12/new-IoT-device-ideas.jpg\" alt=\"Get your new iot device ideas here\" width=\"1024\" height=\"683\" /></a>\n",
 38 |     "\n",
 39 |     "This car is connected to Ericsson’s Connected Vehicle Cloud, an IoT platform that manages services for the Smart Cars to which it’s connected. The Volvo pictured above acts as a drop-off location for groceries that have been ordered by its owner.\n",
 40 |     "\n",
 41 |     "To understand how it works, imagine you’re pulling your normal 9-to-5 and you know you need to grab some groceries on your way home. Well, since you’re smart you’ve used Ericsson IoT platform to connect your car to the local grocery delivery service (<a href=\"http://mat.se/\">Mat.se</a>), so all you need to do is open the Mat.se app and make your usual order. Mat.se automatically handles the payment, grocery selection, delivery, and delivery scheduling. Since your car is IoT-enabled, Mat.se issues its trusted delivery agent a 1-time token to use for opening your car in order to place your groceries in your car for you at 4:40 pm (just before you get off from work).\n",
 42 |     "\n",
 43 |     "To watch some of the amazing IoT device demos I witnessed at Ericsson Studio, make sure to go <span style=\"text-decoration: underline;\"><strong><a href=\"http://bit.ly/LPlNDJj\">watch the videos on this page</a></strong></span>.\n",
 44 |     "<h1>Future Trends for IoT in 2018</h1>\n",
 45 |     "New IoT device ideas won’t do you much good unless you at least know the basic technology trends that are set to impact IoT over the next year(s). These include:\n",
 46 |     "<ol>\n",
 47 |     " \t<li><strong>Big Data</strong> &amp; Data Engineering: Sensors that are embedded within IoT devices spin off machine-generated data like it’s going out of style. For IoT to function, the platform must be solidly engineered to handle big data. Be assured, that requires some serious data engineering.</li>\n",
 48 |     " \t<li><strong>Machine Learning</strong> Data Science: While a lot of IoT devices are still operated according to rules-based decision criteria, the age of artificial intelligence is upon us. IoT will increasingly depend on machine learning algorithms to control device operations so that devices are able to autonomously respond to a complex set of overlapping stimuli.</li>\n",
 49 |     " \t<li><strong>Blockchain</strong>-Enabled Security: Above all else, IoT networks must be secure. Blockchain technology is primed to meet the security demands that come along with building and expanding the IoT.</li>\n",
 50 |     "</ol>\n",
 51 |     "<h1>Best New IoT Device Ideas</h1>\n",
 52 |     "This listing of new IoT device ideas has been sub-divided according to the main technology upon which the IoT devices are built. Below I’m providing a list of new IoT device ideas, but for detailed instructions on how to build these IoT applications, I recommend the <a href=\"https://click.linksynergy.com/deeplink?id=*JDLXjeE*wk&amp;mid=39197&amp;murl=https%3A%2F%2Fwww.udemy.com%2Ftopic%2Finternet-of-things%2F%3Fsort%3Dhighest-rated\">IoT courses on Udemy</a> (ß Please note: if you purchase a Udemy course through this link, I may receive a small commission), or courses that are available at <a href=\"http://www.skyfilabs.com/iot-online-courses\">SkyFi</a> and <a href=\"https://www.coursera.org/specializations/iot\">Coursera</a>.\n",
 53 |     "<h2>Raspberry Pi IoT Ideas</h2>\n",
 54 |     "Using Raspberry Pi as open-source hardware, you can build IoT applications that offer any one of the following benefits:\n",
 55 |     "<ol>\n",
 56 |     " \t<li>Enable built-in sensing to build a weather station that measures ambient temperature and humidity</li>\n",
 57 |     " \t<li>Build a system that detects discrepancies in electrical readings to identify electricity theft</li>\n",
 58 |     " \t<li>Use IoT to build a Servo that is controlled by motion detection readings</li>\n",
 59 |     " \t<li>Build a smart control switch that operates devices based on external stimuli. Use this for home automation.</li>\n",
 60 |     " \t<li>Build a music playing application that enables music for each room in your house</li>\n",
 61 |     " \t<li>Implement biometrics on IoT-connected devices</li>\n",
 62 |     "</ol>\n",
 63 |     "<h2>Arduino IoT Ideas</h2>\n",
 64 |     "There are a number of new IoT device ideas that deploy Arduino as a microcontroller. These include:\n",
 65 |     "<ol>\n",
 66 |     " \t<li>Integrate Arduino with Android to build a remote-control RGB LED device.</li>\n",
 67 |     " \t<li>Connect PIR sensors across the IoT to implement a smart building.</li>\n",
 68 |     " \t<li>Build a temperature and sunlight sensor system to remotely monitor and control the conditions of your garden.</li>\n",
 69 |     " \t<li>Deploy Arduino and IoT to automate your neighborhood streetlights.</li>\n",
 70 |     " \t<li>Build a smart irrigation system based on IoT-connected temperature and moisture sensors built-in to your agricultural plants.</li>\n",
 71 |     "</ol>\n",
 72 |     "[caption id=\"attachment_3807\" align=\"aligncenter\" width=\"300\"]<a href=\"bit.ly/LPlNDJj\"><img class=\"wp-image-3807 size-medium\" src=\"http://www.data-mania.com/blog/wp-content/uploads/2017/12/IMG_3058-300x295.jpg\" alt=\"\" width=\"300\" height=\"295\" /></a> An IoT Chatbot Tree at the Ericsson Studio[/caption]\n",
 73 |     "<h2>Wireless (GSM) IoT Ideas</h2>\n",
 74 |     "Several new IoT device ideas are developed around the GSM wireless network. Those are:\n",
 75 |     "<ol>\n",
 76 |     " \t<li>Monitor soil moisture to automate agricultural irrigation cycles.</li>\n",
 77 |     " \t<li>Automate and control the conditions of a greenhouse.</li>\n",
 78 |     " \t<li>Enable bio-metrics to build a smart security system for your home or office building</li>\n",
 79 |     " \t<li>Build an autonomously operating fitness application that automatically makes recommendations based on motion detection and heart rate sensors that are embedded on wearable fitness trackers.</li>\n",
 80 |     " \t<li>Build a healthcare monitoring system that tracks, informs, and automatically alerts healthcare providers based on sensor readings that describe a patients vital statistics (like temperature, pulse, blood pressure, etc).</li>\n",
 81 |     "</ol>\n",
 82 |     "<h2>IoT Automation Ideas</h2>\n",
 83 |     "Almost all new IoT device ideas offer automation benefits, but to outline a few more ideas:\n",
 84 |     "<ol>\n",
 85 |     " \t<li>Build an IoT device that automatically locates and reports the closest nearby parking spot.</li>\n",
 86 |     " \t<li>Build a motion detection system that automatically issues emails or sms messages to alert home owners of a likely home invasion.</li>\n",
 87 |     " \t<li>Use temperature sensors connected across the IoT to automatically alert you if your home windows or doors have been left open.</li>\n",
 88 |     " \t<li>Use bio-metric sensors to build a smart system that automate security for your home or office building</li>\n",
 89 |     "</ol>\n",
 90 |     "To learn more about IoT and what’s happening on the leading edge, be sure to pop over to Ericsson’s Studio Tour recap and <span style=\"text-decoration: underline;\"><strong><a href=\"http://bit.ly/LPlNDJj\">watch these videos</a></strong></span>.\n",
 91 |     "\n",
 92 |     "<em>(I captured some of this content on behalf of DevMode Strategies during an invite-only tour of the Ericsson Studio in Kista. Rest assure, the text and opinions are my own</em>)\n",
 93 |     "<p class='description'>...</p>\n",
 94 |     "'''"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {
100 |     "collapsed": true
101 |    },
102 |    "source": [
103 |     "### Tag objects"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {
109 |     "collapsed": true
110 |    },
111 |    "source": [
112 |     "#### Tag names"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {
118 |     "collapsed": true
119 |    },
120 |    "source": [
121 |     "#### Tag attributes"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "#### Navigating a parse tree using tags"
129 |    ]
130 |   }
131 |  ],
132 |  "metadata": {
133 |   "kernelspec": {
134 |    "display_name": "Python 3",
135 |    "language": "python",
136 |    "name": "python3"
137 |   },
138 |   "language_info": {
139 |    "codemirror_mode": {
140 |     "name": "ipython",
141 |     "version": 3
142 |    },
143 |    "file_extension": ".py",
144 |    "mimetype": "text/x-python",
145 |    "name": "python",
146 |    "nbconvert_exporter": "python",
147 |    "pygments_lexer": "ipython3",
148 |    "version": "3.8.8"
149 |   }
150 |  },
151 |  "nbformat": 4,
152 |  "nbformat_minor": 1
153 | }
154 | 


--------------------------------------------------------------------------------
/notebooks/07_04b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## NavigatableString Objects"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### NavigatableString objects"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "source": [
 23 |     "#### Utilizing NavigatableString objects"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "our_html_document = '''\n",
 33 |     "<html><head><title>IoT Articles</title></head>\n",
 34 |     "<body>\n",
 35 |     "<p class='title'><b>2018 Trends: Best New IoT Device Ideas for Data Scientists and Engineers</b></p>\n",
 36 |     "\n",
 37 |     "<p class='description'>It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use...\n",
 38 |     "<br>\n",
 39 |     "<br>\n",
 40 |     "It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use to get practice in designing your first IoT applications.\n",
 41 |     "<h1>Looking Back at My Coolest IoT Find in 2017</h1>\n",
 42 |     "Before going into detail about best new IoT device ideas, here’s the backstory. <span style=\"text-decoration: underline;\"><strong><a href=\"http://bit.ly/LPlNDJj\">Last month Ericsson Digital invited me</a></strong></span> to tour the Ericsson Studio in Kista, Sweden. Up until that visit, <a href=\"http://www.data-mania.com/blog/m2m-vs-iot/\">IoT</a> had been largely theoretical to me. Of course, I know the usual mumbo-jumbo about wearables and IoT-connected fitness trackers. That stuff is all well and good, but it’s somewhat old hat – plus I am not sure we are really benefiting so much from those, so I’m not that impressed.\n",
 43 |     "\n",
 44 |     "It wasn’t until I got to the Ericsson Studio that I became extremely impressed by how far IoT has really come. Relying on the promise of the 5g network expansion, IoT-powered smart devices are on the cusp of an explosive growth in adoption. It was Ericsson’s Smart Car that sent me reeling:<a href=\"bit.ly/LPlNDJj\"><img class=\"aligncenter size-full wp-image-3802\" src=\"http://www.data-mania.com/blog/wp-content/uploads/2017/12/new-IoT-device-ideas.jpg\" alt=\"Get your new iot device ideas here\" width=\"1024\" height=\"683\" /></a>\n",
 45 |     "\n",
 46 |     "This car is connected to Ericsson’s Connected Vehicle Cloud, an IoT platform that manages services for the Smart Cars to which it’s connected. The Volvo pictured above acts as a drop-off location for groceries that have been ordered by its owner.\n",
 47 |     "\n",
 48 |     "To understand how it works, imagine you’re pulling your normal 9-to-5 and you know you need to grab some groceries on your way home. Well, since you’re smart you’ve used Ericsson IoT platform to connect your car to the local grocery delivery service (<a href=\"http://mat.se/\">Mat.se</a>), so all you need to do is open the Mat.se app and make your usual order. Mat.se automatically handles the payment, grocery selection, delivery, and delivery scheduling. Since your car is IoT-enabled, Mat.se issues its trusted delivery agent a 1-time token to use for opening your car in order to place your groceries in your car for you at 4:40 pm (just before you get off from work).\n",
 49 |     "\n",
 50 |     "To watch some of the amazing IoT device demos I witnessed at Ericsson Studio, make sure to go <span style=\"text-decoration: underline;\"><strong><a href=\"http://bit.ly/LPlNDJj\">watch the videos on this page</a></strong></span>.\n",
 51 |     "<h1>Future Trends for IoT in 2018</h1>\n",
 52 |     "New IoT device ideas won’t do you much good unless you at least know the basic technology trends that are set to impact IoT over the next year(s). These include:\n",
 53 |     "<ol>\n",
 54 |     " \t<li><strong>Big Data</strong> &amp; Data Engineering: Sensors that are embedded within IoT devices spin off machine-generated data like it’s going out of style. For IoT to function, the platform must be solidly engineered to handle big data. Be assured, that requires some serious data engineering.</li>\n",
 55 |     " \t<li><strong>Machine Learning</strong> Data Science: While a lot of IoT devices are still operated according to rules-based decision criteria, the age of artificial intelligence is upon us. IoT will increasingly depend on machine learning algorithms to control device operations so that devices are able to autonomously respond to a complex set of overlapping stimuli.</li>\n",
 56 |     " \t<li><strong>Blockchain</strong>-Enabled Security: Above all else, IoT networks must be secure. Blockchain technology is primed to meet the security demands that come along with building and expanding the IoT.</li>\n",
 57 |     "</ol>\n",
 58 |     "<h1>Best New IoT Device Ideas</h1>\n",
 59 |     "This listing of new IoT device ideas has been sub-divided according to the main technology upon which the IoT devices are built. Below I’m providing a list of new IoT device ideas, but for detailed instructions on how to build these IoT applications, I recommend the <a href=\"https://click.linksynergy.com/deeplink?id=*JDLXjeE*wk&amp;mid=39197&amp;murl=https%3A%2F%2Fwww.udemy.com%2Ftopic%2Finternet-of-things%2F%3Fsort%3Dhighest-rated\">IoT courses on Udemy</a> (ß Please note: if you purchase a Udemy course through this link, I may receive a small commission), or courses that are available at <a href=\"http://www.skyfilabs.com/iot-online-courses\">SkyFi</a> and <a href=\"https://www.coursera.org/specializations/iot\">Coursera</a>.\n",
 60 |     "<h2>Raspberry Pi IoT Ideas</h2>\n",
 61 |     "Using Raspberry Pi as open-source hardware, you can build IoT applications that offer any one of the following benefits:\n",
 62 |     "<ol>\n",
 63 |     " \t<li>Enable built-in sensing to build a weather station that measures ambient temperature and humidity</li>\n",
 64 |     " \t<li>Build a system that detects discrepancies in electrical readings to identify electricity theft</li>\n",
 65 |     " \t<li>Use IoT to build a Servo that is controlled by motion detection readings</li>\n",
 66 |     " \t<li>Build a smart control switch that operates devices based on external stimuli. Use this for home automation.</li>\n",
 67 |     " \t<li>Build a music playing application that enables music for each room in your house</li>\n",
 68 |     " \t<li>Implement biometrics on IoT-connected devices</li>\n",
 69 |     "</ol>\n",
 70 |     "<h2>Arduino IoT Ideas</h2>\n",
 71 |     "There are a number of new IoT device ideas that deploy Arduino as a microcontroller. These include:\n",
 72 |     "<ol>\n",
 73 |     " \t<li>Integrate Arduino with Android to build a remote-control RGB LED device.</li>\n",
 74 |     " \t<li>Connect PIR sensors across the IoT to implement a smart building.</li>\n",
 75 |     " \t<li>Build a temperature and sunlight sensor system to remotely monitor and control the conditions of your garden.</li>\n",
 76 |     " \t<li>Deploy Arduino and IoT to automate your neighborhood streetlights.</li>\n",
 77 |     " \t<li>Build a smart irrigation system based on IoT-connected temperature and moisture sensors built-in to your agricultural plants.</li>\n",
 78 |     "</ol>\n",
 79 |     "[caption id=\"attachment_3807\" align=\"aligncenter\" width=\"300\"]<a href=\"bit.ly/LPlNDJj\"><img class=\"wp-image-3807 size-medium\" src=\"http://www.data-mania.com/blog/wp-content/uploads/2017/12/IMG_3058-300x295.jpg\" alt=\"\" width=\"300\" height=\"295\" /></a> An IoT Chatbot Tree at the Ericsson Studio[/caption]\n",
 80 |     "<h2>Wireless (GSM) IoT Ideas</h2>\n",
 81 |     "Several new IoT device ideas are developed around the GSM wireless network. Those are:\n",
 82 |     "<ol>\n",
 83 |     " \t<li>Monitor soil moisture to automate agricultural irrigation cycles.</li>\n",
 84 |     " \t<li>Automate and control the conditions of a greenhouse.</li>\n",
 85 |     " \t<li>Enable bio-metrics to build a smart security system for your home or office building</li>\n",
 86 |     " \t<li>Build an autonomously operating fitness application that automatically makes recommendations based on motion detection and heart rate sensors that are embedded on wearable fitness trackers.</li>\n",
 87 |     " \t<li>Build a healthcare monitoring system that tracks, informs, and automatically alerts healthcare providers based on sensor readings that describe a patients vital statistics (like temperature, pulse, blood pressure, etc).</li>\n",
 88 |     "</ol>\n",
 89 |     "<h2>IoT Automation Ideas</h2>\n",
 90 |     "Almost all new IoT device ideas offer automation benefits, but to outline a few more ideas:\n",
 91 |     "<ol>\n",
 92 |     " \t<li>Build an IoT device that automatically locates and reports the closest nearby parking spot.</li>\n",
 93 |     " \t<li>Build a motion detection system that automatically issues emails or sms messages to alert home owners of a likely home invasion.</li>\n",
 94 |     " \t<li>Use temperature sensors connected across the IoT to automatically alert you if your home windows or doors have been left open.</li>\n",
 95 |     " \t<li>Use bio-metric sensors to build a smart system that automate security for your home or office building</li>\n",
 96 |     "</ol>\n",
 97 |     "To learn more about IoT and what’s happening on the leading edge, be sure to pop over to Ericsson’s Studio Tour recap and <span style=\"text-decoration: underline;\"><strong><a href=\"http://bit.ly/LPlNDJj\">watch these videos</a></strong></span>.\n",
 98 |     "\n",
 99 |     "<em>(I captured some of this content on behalf of DevMode Strategies during an invite-only tour of the Ericsson Studio in Kista. Rest assure, the text and opinions are my own</em>)\n",
100 |     "<p class='description'>...</p>\n",
101 |     "'''"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": []
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "Python 3",
115 |    "language": "python",
116 |    "name": "python3"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.8.8"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 1
133 | }
134 | 


--------------------------------------------------------------------------------
/notebooks/07_05b.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Data parsing"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from bs4 import BeautifulSoup\n",
 17 |     "\n",
 18 |     "import urllib\n",
 19 |     "import urllib.request\n",
 20 |     "import re"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "### Parsing your data"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Getting data from a parse tree"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Searching and retrieving data from a parse tree\n",
 42 |     "\n",
 43 |     "#### Introducing the 'find_all()' method\n",
 44 |     "The find_all() method searchs a tag and its descendants to retrieve tags or strings that match your filters. \n",
 45 |     "\n",
 46 |     "#### Search and filtering a parse tree\n",
 47 |     "There are many different ways to access tags and strings within a parse tree. In this segment I am going to show you the following methods:\n",
 48 |     "- Name argument - Search for tags by filtering based on tag name\n",
 49 |     "- Keyword argument - Search for tags by filtering based on tag attribute\n",
 50 |     "- String argument - Search for tags by filtering based on an exact string\n",
 51 |     "- Lists - Search for tags by filtering based on lists\n",
 52 |     "- Boolean values- Search for tags by filtering based on a Boolean value\n",
 53 |     "- Strings - Search for weblinks by filtering based on string objects\n",
 54 |     "- Regular expressions - Search for tags and strings by filtering based on regular expression\n",
 55 |     "\n",
 56 |     "You can pass any of these argument into the find_all() method to use as filters and return either strings or tags.\n",
 57 |     "\n",
 58 |     "##### Retrieving tags by filtering with name arguments\n",
 59 |     "- Name argument - Search for tags by filtering based on tag name"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "##### Retrieving tags by filtering with keyword arguments\n",
 67 |     "- Keyword argument - Search for tags by filtering based on tag attribute"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "##### Retrieving tags by filtering with string arguments\n",
 75 |     "- String argument - Search for tags by filtering based on an exact string"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "##### Retrieving tags by filtering with list objects\n",
 83 |     "- Lists - Search for tags by filtering based on lists"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "##### Retrieving tags by filtering with regular expressions\n",
 91 |     "- Regular expressions - Search for tags and strings by filtering based on regular expression"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "##### Retrieving tags by filtering with a Boolean value\n",
 99 |     "- Boolean values- Search for tags by filtering based on a Boolean value"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "##### Retrieving weblinks by filtering with string objects\n",
107 |     "- Strings - Search for weblinks by filtering based on string objects"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "##### Retrieving strings by filtering with regular expressions\n",
115 |     "- Regular expressions - Search for tags and strings by filtering based on regular expression"
116 |    ]
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Python 3",
122 |    "language": "python",
123 |    "name": "python3"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": {
127 |     "name": "ipython",
128 |     "version": 3
129 |    },
130 |    "file_extension": ".py",
131 |    "mimetype": "text/x-python",
132 |    "name": "python",
133 |    "nbconvert_exporter": "python",
134 |    "pygments_lexer": "ipython3",
135 |    "version": "3.8.8"
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 1
140 | }
141 | 


--------------------------------------------------------------------------------
/notebooks/07_06b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Web scraping"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 1,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "from bs4 import BeautifulSoup\n",
17 |     "import urllib.request\n",
18 |     "from IPython.display import HTML\n",
19 |     "import re"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "markdown",
24 |    "metadata": {},
25 |    "source": [
26 |     "### Scraping a webpage and saving your results\n"
27 |    ]
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "kernelspec": {
32 |    "display_name": "Python 3",
33 |    "language": "python",
34 |    "name": "python3"
35 |   },
36 |   "language_info": {
37 |    "codemirror_mode": {
38 |     "name": "ipython",
39 |     "version": 3
40 |    },
41 |    "file_extension": ".py",
42 |    "mimetype": "text/x-python",
43 |    "name": "python",
44 |    "nbconvert_exporter": "python",
45 |    "pygments_lexer": "ipython3",
46 |    "version": "3.8.8"
47 |   }
48 |  },
49 |  "nbformat": 4,
50 |  "nbformat_minor": 1
51 | }
52 | 


--------------------------------------------------------------------------------
/notebooks/07_06e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Web scraping"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from bs4 import BeautifulSoup\n",
 17 |     "import urllib.request\n",
 18 |     "from IPython.display import HTML\n",
 19 |     "import re"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "bs4.BeautifulSoup"
 31 |       ]
 32 |      },
 33 |      "execution_count": 2,
 34 |      "metadata": {},
 35 |      "output_type": "execute_result"
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "r = urllib.request.urlopen('https://analytics.usa.gov').read()\n",
 40 |     "soup = BeautifulSoup(r, 'html.parser')\n",
 41 |     "type(soup)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "### Scraping a webpage and saving your results\n"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "<!DOCTYPE html>\n",
 61 |       "<html lang=\"en\">\n",
 62 |       " <!-- Initalize title and data source variables -->\n",
 63 |       " <head>\n",
 64 |       "  <!--\n",
 65 |       "\n"
 66 |      ]
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "print(soup.prettify()[:100])"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 4,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "/\n",
 83 |       "#explanation\n",
 84 |       "/data/\n",
 85 |       "https://open.gsa.gov/api/dap/\n",
 86 |       "data/\n",
 87 |       "#top-pages-realtime\n",
 88 |       "#top-pages-7-days\n",
 89 |       "#top-pages-30-days\n",
 90 |       "https://analytics.usa.gov/data/live/all-pages-realtime.csv\n",
 91 |       "https://analytics.usa.gov/data/live/all-domains-30-days.csv\n",
 92 |       "https://digital.gov/services/dap/\n",
 93 |       "https://digital.gov/services/dap/common-questions-about-dap-faq/#part-4\n",
 94 |       "https://support.google.com/analytics/answer/2763052?hl=en\n",
 95 |       "https://analytics.usa.gov/data/live/second-level-domains.csv\n",
 96 |       "https://analytics.usa.gov/data/live/sites.csv\n",
 97 |       "mailto:dap@gsa.gov\n",
 98 |       "/data/\n",
 99 |       "https://open.gsa.gov/api/dap/\n",
100 |       "mailto:dap@gsa.gov\n",
101 |       "https://github.com/18F/analytics.usa.gov/issues\n",
102 |       "https://github.com/18F/analytics.usa.gov\n",
103 |       "https://github.com/18F/analytics-reporter\n",
104 |       "https://www.gsa.gov/\n",
105 |       "https://digital.gov/guides/dap/\n",
106 |       "https://cloud.gov/\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "for link in soup.find_all('a'):\n",
112 |     "  print(link.get('href'))"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 5,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "\n",
125 |       "\n",
126 |       "\n",
127 |       "\n",
128 |       "\n",
129 |       "\n",
130 |       "\n",
131 |       "\n",
132 |       "\n",
133 |       "\n",
134 |       "\n",
135 |       "\n",
136 |       "\n",
137 |       "\n",
138 |       "\n",
139 |       "\n",
140 |       "analytics.usa.gov | The US government's web traffic. Analytics.usa.gov is migrating to a new web analytics platform.\n",
141 |       "\n",
142 |       "\n",
143 |       "\n",
144 |       "\n",
145 |       "\n",
146 |       "\n",
147 |       "\n",
148 |       "\n",
149 |       "\n",
150 |       "\n",
151 |       "\n",
152 |       "\n",
153 |       "\n",
154 |       "\n",
155 |       "\n",
156 |       "\n",
157 |       "\n",
158 |       "\n",
159 |       "\n",
160 |       "\n",
161 |       "\n",
162 |       "\n",
163 |       "\n",
164 |       "analytics.usa.gov\n",
165 |       "              \n",
166 |       "\n",
167 |       "\n",
168 |       "About this site\n",
169 |       "\n",
170 |       "Data ·\n",
171 |       "                API\n",
172 |       "\n",
173 |       "\n",
174 |       "\n",
175 |       "\n",
176 |       "Select an agency\n",
177 |       "\n",
178 |       "All Participating Websites\n",
179 |       "Agency for International Development\n",
180 |       "Department of Agriculture\n",
181 |       "Department of Commerce\n",
182 |       "Department of Defense\n",
183 |       "Department of Education\n",
184 |       "Department of Energy\n",
185 |       "Department of Health and Human Services\n",
186 |       "Department of Homeland Security\n",
187 |       "Department of Justice\n",
188 |       "Department of Labor\n",
189 |       "Department of State\n",
190 |       "Department of Transportation\n",
191 |       "Department of Veterans Affairs\n",
192 |       "Department of the Interior\n",
193 |       "Department of the Treasury\n",
194 |       "Environmental Protection Agency\n",
195 |       "Executive Office of the President\n",
196 |       "General Services Administration\n",
197 |       "National Aeronautics and Space Administration\n",
198 |       "National Archives and Records Administration\n",
199 |       "National Science Foundation\n",
200 |       "Nuclear Regulatory Commission\n",
201 |       "Office of Personnel Management\n",
202 |       "Postal Service\n",
203 |       "Small Business Administration\n",
204 |       "Social Security Administration\n",
205 |       "\n",
206 |       "\n",
207 |       "\n",
208 |       "\n",
209 |       "\n",
210 |       "\n",
211 |       "\n",
212 |       "\n",
213 |       "\n",
214 |       "        Analytics.usa.gov is migrating to a new web analytics platform.\n",
215 |       "      \n",
216 |       "\n",
217 |       "        Real time data is currently unavailable.\n",
218 |       "      \n",
219 |       "\n",
220 |       "\n",
221 |       "\n",
222 |       "\n",
223 |       "\n",
224 |       "...\n",
225 |       "people on government websites  and apps in last 30 minutes\n",
226 |       "\n",
227 |       "\n",
228 |       "Sessions Today\n",
229 |       "Eastern Time\n",
230 |       "\n",
231 |       "\n",
232 |       "\n",
233 |       "\n",
234 |       "\n",
235 |       "\n",
236 |       "Sessions in the Past 90 Days\n",
237 |       "\n",
238 |       "\n",
239 |       "          There were ... sessions over the past 90 days.\n",
240 |       "        \n",
241 |       "\n",
242 |       "Devices\n",
243 |       "\n",
244 |       "\n",
245 |       "\n",
246 |       "\n",
247 |       "\n",
248 |       "            Based on rough network segmentation data, we estimate that less than 5% of all traffic across all agencies comes from US federal government networks.\n",
249 |       "          \n",
250 |       "\n",
251 |       "            Much more detailed data is available in downloadable CSV and JSON. This includes data on combined browser and OS usage.\n",
252 |       "          \n",
253 |       "\n",
254 |       "\n",
255 |       "Browsers\n",
256 |       "\n",
257 |       "\n",
258 |       "\n",
259 |       "\n",
260 |       "\n",
261 |       "Internet Explorer\n",
262 |       "\n",
263 |       "\n",
264 |       "\n",
265 |       "\n",
266 |       "\n",
267 |       "Operating Systems\n",
268 |       "\n",
269 |       "\n",
270 |       "\n",
271 |       "\n",
272 |       "\n",
273 |       "Windows\n",
274 |       "\n",
275 |       "\n",
276 |       "\n",
277 |       "\n",
278 |       "\n",
279 |       "\n",
280 |       "\n",
281 |       "User Locations In The Last 30 Minutes\n",
282 |       "\n",
283 |       "\n",
284 |       "Cities\n",
285 |       "\n",
286 |       "\n",
287 |       "\n",
288 |       "\n",
289 |       "\n",
290 |       "\n",
291 |       "Countries\n",
292 |       "\n",
293 |       "\n",
294 |       "\n",
295 |       "\n",
296 |       "\n",
297 |       "United States & Territories\n",
298 |       "\n",
299 |       "\n",
300 |       "\n",
301 |       "\n",
302 |       "International\n",
303 |       "\n",
304 |       "\n",
305 |       "\n",
306 |       "\n",
307 |       "\n",
308 |       "\n",
309 |       "\n",
310 |       "\n",
311 |       "Top Web Pages and App Screens\n",
312 |       "\n",
313 |       "Now\n",
314 |       "7 Days\n",
315 |       "30 Days\n",
316 |       "\n",
317 |       "\n",
318 |       "\n",
319 |       "              People on a single, specific page or app screen in the last 30 minutes. We only count pages with at least 10 people on the page.\n",
320 |       "              Download the full dataset.\n",
321 |       "\n",
322 |       "\n",
323 |       "\n",
324 |       "\n",
325 |       "\n",
326 |       "Sessions over the last week to hostnames, including traffic to all web pages and app screens within that hostname.\n",
327 |       "\n",
328 |       "\n",
329 |       "\n",
330 |       "\n",
331 |       "\n",
332 |       "              Sessions over the last month to hostnames, including traffic to all web pages and app screens within that hostname. We only count pages and app screens with at least 1,000 sessions in the last month.\n",
333 |       "              Download the full dataset.\n",
334 |       "\n",
335 |       "\n",
336 |       "\n",
337 |       "\n",
338 |       "\n",
339 |       "\n",
340 |       "Top Downloads\n",
341 |       "Total file downloads yesterday on government hostnames.\n",
342 |       "\n",
343 |       "\n",
344 |       "\n",
345 |       "\n",
346 |       "\n",
347 |       "\n",
348 |       "\n",
349 |       "\n",
350 |       "About this Site\n",
351 |       "\n",
352 |       "            These data provide a window into how people are interacting with the government online.\n",
353 |       "             The data come from a unified Google Analytics account for U.S. federal government agencies known as the Digital Analytics Program.\n",
354 |       "              This program helps government agencies understand how people find, access, and use government services online. The program does not track individuals,\n",
355 |       "               and anonymizes the IP addresses of visitors.\n",
356 |       "          \n",
357 |       "\n",
358 |       "            Not every government website is represented in these data.\n",
359 |       "            Currently, the Digital Analytics Program collects web traffic from around 520 executive branch government second level domains,\n",
360 |       "             across about 6,800 total hostnames,\n",
361 |       "              including every cabinet department.\n",
362 |       "               We continue to pursue and add more sites frequently; to add your site, email the Digital Analytics Program.\n",
363 |       "          \n",
364 |       "\n",
365 |       "\n",
366 |       "Download the data\n",
367 |       "You can download the data here. Available in JSON and CSV format.\n",
368 |       " Additionally, you can access data via\n",
369 |       "          API (currently in Beta).\n",
370 |       "        \n",
371 |       "A note on sampling\n",
372 |       "\n",
373 |       "          Due to varying Google Analytics API sampling thresholds and the sheer volume of digital analytics program data,\n",
374 |       "          some non-realtime reports may be subject to sampling.\n",
375 |       "          The data are intended to represent high level trends and should only be used for general insights into online visitor behavior.\n",
376 |       "        \n",
377 |       "\n",
378 |       "\n",
379 |       "\n",
380 |       "\n",
381 |       "\n",
382 |       "\n",
383 |       "Have a question or problem?\n",
384 |       "              \n",
385 |       "              Get in touch.\n",
386 |       "              \n",
387 |       "\n",
388 |       "\n",
389 |       "                  Suggest a feature or report an issue\n",
390 |       "            \n",
391 |       "\n",
392 |       "\n",
393 |       "\n",
394 |       "\n",
395 |       "\n",
396 |       "              View application code on GitHub\n",
397 |       "\n",
398 |       "\n",
399 |       "\n",
400 |       "              View code for the data on GitHub\n",
401 |       "\n",
402 |       "\n",
403 |       "\n",
404 |       "\n",
405 |       "\n",
406 |       "\n",
407 |       "\n",
408 |       "\n",
409 |       "\n",
410 |       "\n",
411 |       "\n",
412 |       "Analytics.usa.gov is a project of GSA’s Digital Analytics Program.\n",
413 |       "This website is hosted on cloud.gov.\n",
414 |       "\n",
415 |       "\n",
416 |       "\n",
417 |       "\n",
418 |       "\n",
419 |       "\n",
420 |       "\n",
421 |       "\n",
422 |       "\n",
423 |       "\n",
424 |       "\n",
425 |       "\n",
426 |       "\n"
427 |      ]
428 |     }
429 |    ],
430 |    "source": [
431 |     "print(soup.get_text())"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 6,
437 |    "metadata": {},
438 |    "outputs": [
439 |     {
440 |      "name": "stdout",
441 |      "output_type": "stream",
442 |      "text": [
443 |       "<!DOCTYPE html>\n",
444 |       "<html lang=\"en\">\n",
445 |       " <!-- Initalize title and data source variables -->\n",
446 |       " <head>\n",
447 |       "  <!--\n",
448 |       "\n",
449 |       "    Hi! Welcome to our source code.\n",
450 |       "\n",
451 |       "    This dashboard uses data from the Digital Analytics Program, a US\n",
452 |       "    government team inside the General Services Administration.\n",
453 |       "\n",
454 |       "\n",
455 |       "    For a detailed tech breakdown of how 18F and friends built this site:\n",
456 |       "\n",
457 |       "    https://18f.gsa.gov/2015/03/19/how-we-built-analytics-usa-gov/\n",
458 |       "\n",
459 |       "\n",
460 |       "    This is a fully open source project, and your contributions are welcome.\n",
461 |       "\n",
462 |       "    Frontend static site: https://github.com/18F/analytics.usa.gov\n",
463 |       "    Backend data reporting: https://github.com/18F/analytics-reporter\n",
464 |       "\n",
465 |       "    -->\n",
466 |       "  <meta charset=\"utf-8\"/>\n",
467 |       "  <meta content=\"IE=Edge\" http-equiv=\"X-UA-Compatible\"/>\n",
468 |       "  <meta content=\"NjbZn6hQe7OwV-nTsa6nLmtrOUcSGPRyFjxm5zkmCcg\" name=\"google-site-verification\">\n",
469 |       "   <link href=\"/css/vendor/css/uswds.v0.9.6.css\" rel=\"stylesheet\"/>\n",
470 |       "   <link href=\"/css/public_analytics.css\" rel=\"stylesheet\"/>\n",
471 |       "   <link href=\"/images/analytics-favicon.ico\" rel=\"\n"
472 |      ]
473 |     }
474 |    ],
475 |    "source": [
476 |     "print(soup.prettify()[0:1000])"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": 7,
482 |    "metadata": {},
483 |    "outputs": [
484 |     {
485 |      "name": "stdout",
486 |      "output_type": "stream",
487 |      "text": [
488 |       "<a href=\"https://open.gsa.gov/api/dap/\" rel=\"noopener\" target=\"_blank\">API</a>\n",
489 |       "<a href=\"https://analytics.usa.gov/data/live/all-pages-realtime.csv\">Download the full dataset.</a>\n",
490 |       "<a href=\"https://analytics.usa.gov/data/live/all-domains-30-days.csv\">Download the full dataset.</a>\n",
491 |       "<a class=\"external-link\" href=\"https://digital.gov/services/dap/\">Digital Analytics Program</a>\n",
492 |       "<a class=\"external-link\" href=\"https://digital.gov/services/dap/common-questions-about-dap-faq/#part-4\">does not track individuals</a>\n",
493 |       "<a class=\"external-link\" href=\"https://support.google.com/analytics/answer/2763052?hl=en\">anonymizes the IP addresses</a>\n",
494 |       "<a class=\"external-link\" href=\"https://analytics.usa.gov/data/live/second-level-domains.csv\">520 executive branch government second level domains</a>\n",
495 |       "<a class=\"external-link\" href=\"https://analytics.usa.gov/data/live/sites.csv\">about 6,800 total hostnames</a>\n",
496 |       "<a href=\"https://open.gsa.gov/api/dap/\" rel=\"noopener\" target=\"_blank\">API</a>\n",
497 |       "<a class=\"usa-button usa-button-secondary-inverse\" href=\"https://github.com/18F/analytics.usa.gov/issues\">\n",
498 |       "<img alt=\"Github Icon\" class=\"github-icon\" src=\"/images/github-logo-white.svg\"/>\n",
499 |       "                  Suggest a feature or report an issue\n",
500 |       "            </a>\n",
501 |       "<a href=\"https://github.com/18F/analytics.usa.gov\">\n",
502 |       "<img alt=\"Github Icon\" class=\"github-icon\" src=\"/images/github-logo.svg\"/>\n",
503 |       "              View application code on GitHub</a>\n",
504 |       "<a href=\"https://github.com/18F/analytics-reporter\">\n",
505 |       "<img alt=\"Github Icon\" class=\"github-icon\" src=\"/images/github-logo.svg\"/>\n",
506 |       "              View code for the data on GitHub</a>\n",
507 |       "<a href=\"https://www.gsa.gov/\">\n",
508 |       "<img alt=\"GSA\" src=\"/images/gsa-logo.svg\"/>\n",
509 |       "</a>\n",
510 |       "<a href=\"https://digital.gov/guides/dap/\">Digital Analytics Program</a>\n",
511 |       "<a href=\"https://cloud.gov/\">cloud.gov</a>\n"
512 |      ]
513 |     }
514 |    ],
515 |    "source": [
516 |     "for link in soup.find_all('a', attrs = {'href': re.compile('^http')}):\n",
517 |     "                          print(link)"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 8,
523 |    "metadata": {},
524 |    "outputs": [
525 |     {
526 |      "data": {
527 |       "text/plain": [
528 |        "bs4.element.Tag"
529 |       ]
530 |      },
531 |      "execution_count": 8,
532 |      "metadata": {},
533 |      "output_type": "execute_result"
534 |     }
535 |    ],
536 |    "source": [
537 |     "type(link)"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "code",
542 |    "execution_count": 9,
543 |    "metadata": {},
544 |    "outputs": [
545 |     {
546 |      "name": "stdout",
547 |      "output_type": "stream",
548 |      "text": [
549 |       "<a href=\"https://open.gsa.gov/api/dap/\" rel=\"noopener\" target=\"_blank\">API</a>\n",
550 |       "<a href=\"https://analytics.usa.gov/data/live/all-pages-realtime.csv\">Download the full dataset.</a>\n",
551 |       "<a href=\"https://analytics.usa.gov/data/live/all-domains-30-days.csv\">Download the full dataset.</a>\n",
552 |       "<a class=\"external-link\" href=\"https://digital.gov/services/dap/\">Digital Analytics Program</a>\n",
553 |       "<a class=\"external-link\" href=\"https://digital.gov/services/dap/common-questions-about-dap-faq/#part-4\">does not track individuals</a>\n",
554 |       "<a class=\"external-link\" href=\"https://support.google.com/analytics/answer/2763052?hl=en\">anonymizes the IP addresses</a>\n",
555 |       "<a class=\"external-link\" href=\"https://analytics.usa.gov/data/live/second-level-domains.csv\">520 executive branch government second level domains</a>\n",
556 |       "<a class=\"external-link\" href=\"https://analytics.usa.gov/data/live/sites.csv\">about 6,800 total hostnames</a>\n",
557 |       "<a href=\"https://open.gsa.gov/api/dap/\" rel=\"noopener\" target=\"_blank\">API</a>\n",
558 |       "<a class=\"usa-button usa-button-secondary-inverse\" href=\"https://github.com/18F/analytics.usa.gov/issues\">\n",
559 |       "<img alt=\"Github Icon\" class=\"github-icon\" src=\"/images/github-logo-white.svg\"/>\n",
560 |       "                  Suggest a feature or report an issue\n",
561 |       "            </a>\n",
562 |       "<a href=\"https://github.com/18F/analytics.usa.gov\">\n",
563 |       "<img alt=\"Github Icon\" class=\"github-icon\" src=\"/images/github-logo.svg\"/>\n",
564 |       "              View application code on GitHub</a>\n",
565 |       "<a href=\"https://github.com/18F/analytics-reporter\">\n",
566 |       "<img alt=\"Github Icon\" class=\"github-icon\" src=\"/images/github-logo.svg\"/>\n",
567 |       "              View code for the data on GitHub</a>\n",
568 |       "<a href=\"https://www.gsa.gov/\">\n",
569 |       "<img alt=\"GSA\" src=\"/images/gsa-logo.svg\"/>\n",
570 |       "</a>\n",
571 |       "<a href=\"https://digital.gov/guides/dap/\">Digital Analytics Program</a>\n",
572 |       "<a href=\"https://cloud.gov/\">cloud.gov</a>\n"
573 |      ]
574 |     }
575 |    ],
576 |    "source": [
577 |     "file = open('parsed_data.txt', 'w')\n",
578 |     "for link in soup.find_all('a', attrs = {'href': re.compile('^http')}):\n",
579 |     "                          soup_link = str(link)\n",
580 |     "                          print(soup_link)\n",
581 |     "                          file.write(soup_link)\n",
582 |     "file.flush()\n",
583 |     "file.close()"
584 |    ]
585 |   },
586 |   {
587 |    "cell_type": "code",
588 |    "execution_count": 10,
589 |    "metadata": {},
590 |    "outputs": [
591 |     {
592 |      "data": {
593 |       "text/plain": [
594 |        "'/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/notebooks'"
595 |       ]
596 |      },
597 |      "execution_count": 10,
598 |      "metadata": {},
599 |      "output_type": "execute_result"
600 |     }
601 |    ],
602 |    "source": [
603 |     "%pwd"
604 |    ]
605 |   },
606 |   {
607 |    "cell_type": "code",
608 |    "execution_count": null,
609 |    "metadata": {},
610 |    "outputs": [],
611 |    "source": []
612 |   }
613 |  ],
614 |  "metadata": {
615 |   "kernelspec": {
616 |    "display_name": "Python 3",
617 |    "language": "python",
618 |    "name": "python3"
619 |   },
620 |   "language_info": {
621 |    "codemirror_mode": {
622 |     "name": "ipython",
623 |     "version": 3
624 |    },
625 |    "file_extension": ".py",
626 |    "mimetype": "text/x-python",
627 |    "name": "python",
628 |    "nbconvert_exporter": "python",
629 |    "pygments_lexer": "ipython3",
630 |    "version": "3.10.13"
631 |   }
632 |  },
633 |  "nbformat": 4,
634 |  "nbformat_minor": 1
635 | }
636 | 


--------------------------------------------------------------------------------
/notebooks/07_07b.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "7252adbf-4476-49ed-b1bb-c51604a2e729",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Asynchronous scraping"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 7,
14 |    "id": "cb191576-cfe0-4823-aeb8-2ce0ed7a1cb3",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import aiohttp\n",
19 |     "import asyncio\n",
20 |     "from bs4 import BeautifulSoup\n",
21 |     "import csv\n",
22 |     "import re"
23 |    ]
24 |   }
25 |  ],
26 |  "metadata": {
27 |   "kernelspec": {
28 |    "display_name": "Python 3",
29 |    "language": "python",
30 |    "name": "python3"
31 |   },
32 |   "language_info": {
33 |    "codemirror_mode": {
34 |     "name": "ipython",
35 |     "version": 3
36 |    },
37 |    "file_extension": ".py",
38 |    "mimetype": "text/x-python",
39 |    "name": "python",
40 |    "nbconvert_exporter": "python",
41 |    "pygments_lexer": "ipython3",
42 |    "version": "3.8.8"
43 |   }
44 |  },
45 |  "nbformat": 4,
46 |  "nbformat_minor": 5
47 | }
48 | 


--------------------------------------------------------------------------------
/notebooks/07_07e.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "7252adbf-4476-49ed-b1bb-c51604a2e729",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Asynchronous scraping"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 11,
 14 |    "id": "74ea2bda",
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Requirement already satisfied: aiohttp in /usr/local/python/3.10.13/lib/python3.10/site-packages (3.9.1)\n",
 22 |       "Requirement already satisfied: attrs>=17.3.0 in /home/codespace/.local/lib/python3.10/site-packages (from aiohttp) (23.1.0)\n",
 23 |       "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (6.0.4)\n",
 24 |       "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (1.9.4)\n",
 25 |       "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (1.4.0)\n",
 26 |       "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (1.3.1)\n",
 27 |       "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (4.0.3)\n",
 28 |       "Requirement already satisfied: idna>=2.0 in /home/codespace/.local/lib/python3.10/site-packages (from yarl<2.0,>=1.0->aiohttp) (3.4)\n",
 29 |       "Note: you may need to restart the kernel to use updated packages.\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "pip install aiohttp"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 12,
 40 |    "id": "c5ec3398",
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "Collecting asyncio\n",
 48 |       "  Downloading asyncio-3.4.3-py3-none-any.whl (101 kB)\n",
 49 |       "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.8/101.8 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
 50 |       "\u001b[?25hInstalling collected packages: asyncio\n",
 51 |       "Successfully installed asyncio-3.4.3\n",
 52 |       "Note: you may need to restart the kernel to use updated packages.\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "pip install asyncio"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 13,
 63 |    "id": "cb191576-cfe0-4823-aeb8-2ce0ed7a1cb3",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "import aiohttp\n",
 68 |     "import asyncio\n",
 69 |     "from bs4 import BeautifulSoup\n",
 70 |     "import csv\n",
 71 |     "import re"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 14,
 77 |    "id": "eea5fbf1",
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "Requirement already satisfied: nest-asyncio in /home/codespace/.local/lib/python3.10/site-packages (1.5.8)\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "!pip install nest-asyncio\n",
 90 |     "import nest_asyncio\n",
 91 |     "nest_asyncio.apply()"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 46,
 97 |    "id": "fa025c7d",
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "async def scrap_and_save_links(text):\n",
102 |     "  soup = BeautifulSoup(text, 'html.parser')\n",
103 |     "  file = open('csv_file', 'a', newline='')\n",
104 |     "  writer= csv.writer(file, delimiter=',')\n",
105 |     "  for link in soup.findAll('a', attrs={'href': re.compile('^http')}):\n",
106 |     "    link = link.get('href')\n",
107 |     "    writer.writerow([link])\n",
108 |     "  file.close()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 47,
114 |    "id": "c2176161",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "async def fetch(session, url):\n",
119 |     "   try:\n",
120 |     "      async with session.get(url) as response:\n",
121 |     "         text= await response.text()\n",
122 |     "         task = asyncio.create_task(scrap_and_save_links(text))\n",
123 |     "         await task\n",
124 |     "   except Exception as e:\n",
125 |     "      print(str(e))"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 48,
131 |    "id": "26ead7cb",
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "async def scrap(urls):\n",
136 |     "  tasks = []\n",
137 |     "  async with aiohttp.ClientSession() as session:\n",
138 |     "    for url in urls:\n",
139 |     "      tasks.append(fetch(session,url))\n",
140 |     "    await asyncio.gather(*tasks)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 49,
146 |    "id": "039cea48",
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "urls = ['https://analytics.usa.gov/', 'https://www.python.org/', 'https://www.linkedin.com/']\n",
151 |     "asyncio.run(scrap(urls=urls))"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "id": "e2ba7d90",
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": []
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "kernelspec": {
165 |    "display_name": "Python 3",
166 |    "language": "python",
167 |    "name": "python3"
168 |   },
169 |   "language_info": {
170 |    "codemirror_mode": {
171 |     "name": "ipython",
172 |     "version": 3
173 |    },
174 |    "file_extension": ".py",
175 |    "mimetype": "text/x-python",
176 |    "name": "python",
177 |    "nbconvert_exporter": "python",
178 |    "pygments_lexer": "ipython3",
179 |    "version": "3.10.13"
180 |   }
181 |  },
182 |  "nbformat": 4,
183 |  "nbformat_minor": 5
184 | }
185 | 


--------------------------------------------------------------------------------
/notebooks/parsed_data.txt:
--------------------------------------------------------------------------------
 1 | <a href="https://open.gsa.gov/api/dap/" rel="noopener" target="_blank">API</a><a href="https://analytics.usa.gov/data/live/all-pages-realtime.csv">Download the full dataset.</a><a href="https://analytics.usa.gov/data/live/all-domains-30-days.csv">Download the full dataset.</a><a class="external-link" href="https://digital.gov/services/dap/">Digital Analytics Program</a><a class="external-link" href="https://digital.gov/services/dap/common-questions-about-dap-faq/#part-4">does not track individuals</a><a class="external-link" href="https://support.google.com/analytics/answer/2763052?hl=en">anonymizes the IP addresses</a><a class="external-link" href="https://analytics.usa.gov/data/live/second-level-domains.csv">520 executive branch government second level domains</a><a class="external-link" href="https://analytics.usa.gov/data/live/sites.csv">about 6,800 total hostnames</a><a href="https://open.gsa.gov/api/dap/" rel="noopener" target="_blank">API</a><a class="usa-button usa-button-secondary-inverse" href="https://github.com/18F/analytics.usa.gov/issues">
 2 | <img alt="Github Icon" class="github-icon" src="/images/github-logo-white.svg"/>
 3 |                   Suggest a feature or report an issue
 4 |             </a><a href="https://github.com/18F/analytics.usa.gov">
 5 | <img alt="Github Icon" class="github-icon" src="/images/github-logo.svg"/>
 6 |               View application code on GitHub</a><a href="https://github.com/18F/analytics-reporter">
 7 | <img alt="Github Icon" class="github-icon" src="/images/github-logo.svg"/>
 8 |               View code for the data on GitHub</a><a href="https://www.gsa.gov/">
 9 | <img alt="GSA" src="/images/gsa-logo.svg"/>
10 | </a><a href="https://digital.gov/guides/dap/">Digital Analytics Program</a><a href="https://cloud.gov/">cloud.gov</a>


--------------------------------------------------------------------------------
/notebooks/pie_chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/notebooks/pie_chart.png


--------------------------------------------------------------------------------
/notebooks/test:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it.
2 | 


--------------------------------------------------------------------------------
/streamlit/08_02b.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_02b.py


--------------------------------------------------------------------------------
/streamlit/08_02e.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | 
3 | st.write('Hello World!')


--------------------------------------------------------------------------------
/streamlit/08_03b.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_03b.py


--------------------------------------------------------------------------------
/streamlit/08_03e.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import streamlit as st
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | col_names = ["column1","column2","column3"]
 7 | 
 8 | data = pd.DataFrame(np.random.randint(30, size=(30, 3)),columns=col_names)
 9 | 
10 | 'line graph:'
11 | st.line_chart(data)
12 | 
13 | 'bar graph:'
14 | st.bar_chart(data)
15 | 
16 | animals = ['cat', 'cow', 'dog']
17 | heights  = [30, 150, 80]
18 | 
19 | 'pie chart:'
20 | fig, ax = plt.subplots()
21 | ax.pie(heights,labels=animals)
22 | 
23 | st.pyplot(fig)
24 | 


--------------------------------------------------------------------------------
/streamlit/08_04b.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_04b.py


--------------------------------------------------------------------------------
/streamlit/08_04e.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np 
 3 | import pandas as pd 
 4 | import streamlit as st 
 5 | import matplotlib.pyplot as plt 
 6 | 
 7 | rows = np.random.randn(1,1)
 8 | 
 9 | 'Growing Line Chart:'
10 | chart = st.line_chart(rows)
11 | 
12 | for i in range(1, 100):
13 |   new_rows = rows[0] + np.random.randn(1,1)
14 |   chart.add_rows(new_rows)
15 |   rows= new_rows
16 |   time.sleep(0.05)
17 | 
18 | 
19 | values = np.random.rand(10)
20 | 'matplotlibs Line Chart:'
21 | fig, ax = plt.subplots()
22 | ax.plot(values)
23 | st.pyplot(fig)
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/streamlit/08_05b.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/streamlit/08_05e.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd 
 2 | import numpy as np 
 3 | import streamlit as st 
 4 | import matplotlib.pyplot as plt 
 5 | 
 6 | animals = ['cat', 'cow', 'dog', 'goat']
 7 | heights = [30, 150, 80, 60]
 8 | weights = [5, 400, 40, 50]
 9 | 
10 | fig, ax = plt.subplots()
11 | 
12 | x = np.arange(len(heights))
13 | width = 0.40
14 | 
15 | ax.bar(x-0.2, heights, width, color='red')
16 | ax.bar(x+0.2, weights, width, color='orange')
17 | 
18 | ax.legend(['height', 'weight'])
19 | ax.set_xticks(x)
20 | ax.set_xticklabels(animals)
21 | 
22 | st.pyplot(fig)
23 | 
24 | explode = [0.2, 0.1, 0.1, 0.1]
25 | plot_pie, ax = plt.subplots()
26 | ax.pie(heights, explode = explode, labels=animals, autopct='%1.1f%%', shadow = True)
27 | ax.axis('equal')
28 | st.pyplot(plot_pie)


--------------------------------------------------------------------------------
/streamlit/08_06b.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_06b.py


--------------------------------------------------------------------------------
/streamlit/08_06e.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st 
 2 | import seaborn as sns 
 3 | import matplotlib.pyplot as plt 
 4 | import pandas as pd 
 5 | from sklearn.datasets import load_iris
 6 | 
 7 | iris_data = load_iris()
 8 | 
 9 | data = pd.DataFrame(iris_data.data, columns = iris_data.feature_names)
10 | 
11 | fig = plt.figure()
12 | sns.histplot(data=data, bins=20)
13 | st.pyplot(fig)
14 | 
15 | fig = plt.figure()
16 | sns.boxplot(data=data)
17 | st.pyplot(fig)
18 | 
19 | fig =plt.figure()
20 | sns.scatterplot(data=data)
21 | st.pyplot(fig)


--------------------------------------------------------------------------------
/test:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------