├── .devcontainer └── devcontainer.json ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── main.yml ├── .gitignore ├── .vscode └── settings.json ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── data ├── Superstore-Sales.csv ├── iris.csv ├── iris.data.csv ├── mtcars.csv └── test ├── notebooks ├── 02_03b.ipynb ├── 02_03e.ipynb ├── 02_04b.ipynb ├── 02_04e.ipynb ├── 02_05b.ipynb ├── 02_05e.ipynb ├── 02_06b.ipynb ├── 02_06e.ipynb ├── 02_07b.ipynb ├── 02_07e.ipynb ├── 04_01b.ipynb ├── 04_01e.ipynb ├── 04_02b.ipynb ├── 04_02e.ipynb ├── 04_03b.ipynb ├── 04_03e.ipynb ├── 04_04b.ipynb ├── 04_04e.ipynb ├── 04_05b.ipynb ├── 04_05e.ipynb ├── 04_06b.ipynb ├── 04_06e.ipynb ├── 04_07b.ipynb ├── 04_07e.ipynb ├── 05_01b.ipynb ├── 05_01e.ipynb ├── 05_02b.ipynb ├── 05_02e.ipynb ├── 05_03b.ipynb ├── 05_03e.ipynb ├── 05_04b.ipynb ├── 05_04e.ipynb ├── 05_05b.ipynb ├── 05_05e.ipynb ├── 05_06b.ipynb ├── 05_06e.ipynb ├── 05_07b.ipynb ├── 05_07e.ipynb ├── 06_01b.ipynb ├── 06_01e.ipynb ├── 06_02b.ipynb ├── 06_02e.ipynb ├── 06_03b.ipynb ├── 06_03e.ipynb ├── 07_02b.ipynb ├── 07_02e.ipynb ├── 07_03b.ipynb ├── 07_03e.ipynb ├── 07_04b.ipynb ├── 07_04e.ipynb ├── 07_05b.ipynb ├── 07_05e.ipynb ├── 07_06b.ipynb ├── 07_06e.ipynb ├── 07_07b.ipynb ├── 07_07e.ipynb ├── csv_file ├── parsed_data.txt ├── pie_chart.png └── test ├── requirements.txt ├── streamlit ├── 08_02b.py ├── 08_02e.py ├── 08_03b.py ├── 08_03e.py ├── 08_04b.py ├── 08_04e.py ├── 08_05b.py ├── 08_05e.py ├── 08_06b.py └── 08_06e.py └── test /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "extensions": [ 3 | "GitHub.github-vscode-theme", 4 | "ms-toolsai.jupyter", 5 | "ms-python.python" 6 | // Additional Extensions Here 7 | ], 8 | "onCreateCommand" : "[ -f requirements.txt ] && pip install -r requirements.txt; echo PS1='\"$ \"' >> ~/.bashrc", //Set Terminal Prompt to $ 9 | } 10 | 11 | // DevContainer Reference: https://code.visualstudio.com/docs/remote/devcontainerjson-reference 12 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Codeowners for these exercise files: 2 | # * (asterisk) denotes "all files and folders" 3 | # Example: * @producer @instructor 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 7 | 8 | ## Issue Overview 9 | 10 | 11 | ## Describe your environment 12 | 13 | 14 | ## Steps to Reproduce 15 | 16 | 1. 17 | 2. 18 | 3. 19 | 4. 20 | 21 | ## Expected Behavior 22 | 23 | 24 | ## Current Behavior 25 | 26 | 27 | ## Possible Solution 28 | 29 | 30 | ## Screenshots / Video 31 | 32 | 33 | ## Related Issues 34 | 35 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Copy To Branches 2 | on: 3 | workflow_dispatch: 4 | jobs: 5 | copy-to-branches: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | with: 10 | fetch-depth: 0 11 | - name: Copy To Branches Action 12 | uses: planetoftheweb/copy-to-branches@v1.2 13 | env: 14 | key: main 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | .tmp 4 | npm-debug.log 5 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.bracketPairColorization.enabled": true, 3 | "editor.cursorBlinking": "solid", 4 | "editor.fontFamily": "ui-monospace, Menlo, Monaco, 'Cascadia Mono', 'Segoe UI Mono', 'Roboto Mono', 'Oxygen Mono', 'Ubuntu Monospace', 'Source Code Pro', 'Fira Mono', 'Droid Sans Mono', 'Courier New', monospace", 5 | "editor.fontLigatures": false, 6 | "editor.fontSize": 22, 7 | "editor.formatOnPaste": true, 8 | "editor.formatOnSave": true, 9 | "editor.lineNumbers": "on", 10 | "editor.matchBrackets": "always", 11 | "editor.minimap.enabled": false, 12 | "editor.smoothScrolling": true, 13 | "editor.tabSize": 2, 14 | "editor.useTabStops": true, 15 | "emmet.triggerExpansionOnTab": true, 16 | "explorer.openEditors.visible": 0, 17 | "files.autoSave": "afterDelay", 18 | "screencastMode.onlyKeyboardShortcuts": true, 19 | "terminal.integrated.fontSize": 18, 20 | "workbench.colorTheme": "Visual Studio Dark", 21 | "workbench.fontAliasing": "antialiased", 22 | "workbench.statusBar.visible": true 23 | } 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | Contribution Agreement 3 | ====================== 4 | 5 | This repository does not accept pull requests (PRs). All pull requests will be closed. 6 | 7 | However, if any contributions (through pull requests, issues, feedback or otherwise) are provided, as a contributor, you represent that the code you submit is your original work or that of your employer (in which case you represent you have the right to bind your employer). By submitting code (or otherwise providing feedback), you (and, if applicable, your employer) are licensing the submitted code (and/or feedback) to LinkedIn and the open source community subject to the BSD 2-Clause license. 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LinkedIn Learning Exercise Files License Agreement 2 | ================================================== 3 | 4 | This License Agreement (the "Agreement") is a binding legal agreement 5 | between you (as an individual or entity, as applicable) and LinkedIn 6 | Corporation (“LinkedIn”). By downloading or using the LinkedIn Learning 7 | exercise files in this repository (“Licensed Materials”), you agree to 8 | be bound by the terms of this Agreement. If you do not agree to these 9 | terms, do not download or use the Licensed Materials. 10 | 11 | 1. License. 12 | - a. Subject to the terms of this Agreement, LinkedIn hereby grants LinkedIn 13 | members during their LinkedIn Learning subscription a non-exclusive, 14 | non-transferable copyright license, for internal use only, to 1) make a 15 | reasonable number of copies of the Licensed Materials, and 2) make 16 | derivative works of the Licensed Materials for the sole purpose of 17 | practicing skills taught in LinkedIn Learning courses. 18 | - b. Distribution. Unless otherwise noted in the Licensed Materials, subject 19 | to the terms of this Agreement, LinkedIn hereby grants LinkedIn members 20 | with a LinkedIn Learning subscription a non-exclusive, non-transferable 21 | copyright license to distribute the Licensed Materials, except the 22 | Licensed Materials may not be included in any product or service (or 23 | otherwise used) to instruct or educate others. 24 | 25 | 2. Restrictions and Intellectual Property. 26 | - a. You may not to use, modify, copy, make derivative works of, publish, 27 | distribute, rent, lease, sell, sublicense, assign or otherwise transfer the 28 | Licensed Materials, except as expressly set forth above in Section 1. 29 | - b. Linkedin (and its licensors) retains its intellectual property rights 30 | in the Licensed Materials. Except as expressly set forth in Section 1, 31 | LinkedIn grants no licenses. 32 | - c. You indemnify LinkedIn and its licensors and affiliates for i) any 33 | alleged infringement or misappropriation of any intellectual property rights 34 | of any third party based on modifications you make to the Licensed Materials, 35 | ii) any claims arising from your use or distribution of all or part of the 36 | Licensed Materials and iii) a breach of this Agreement. You will defend, hold 37 | harmless, and indemnify LinkedIn and its affiliates (and our and their 38 | respective employees, shareholders, and directors) from any claim or action 39 | brought by a third party, including all damages, liabilities, costs and 40 | expenses, including reasonable attorneys’ fees, to the extent resulting from, 41 | alleged to have resulted from, or in connection with: (a) your breach of your 42 | obligations herein; or (b) your use or distribution of any Licensed Materials. 43 | 44 | 3. Open source. This code may include open source software, which may be 45 | subject to other license terms as provided in the files. 46 | 47 | 4. Warranty Disclaimer. LINKEDIN PROVIDES THE LICENSED MATERIALS ON AN “AS IS” 48 | AND “AS AVAILABLE” BASIS. LINKEDIN MAKES NO REPRESENTATION OR WARRANTY, 49 | WHETHER EXPRESS OR IMPLIED, ABOUT THE LICENSED MATERIALS, INCLUDING ANY 50 | REPRESENTATION THAT THE LICENSED MATERIALS WILL BE FREE OF ERRORS, BUGS OR 51 | INTERRUPTIONS, OR THAT THE LICENSED MATERIALS ARE ACCURATE, COMPLETE OR 52 | OTHERWISE VALID. TO THE FULLEST EXTENT PERMITTED BY LAW, LINKEDIN AND ITS 53 | AFFILIATES DISCLAIM ANY IMPLIED OR STATUTORY WARRANTY OR CONDITION, INCLUDING 54 | ANY IMPLIED WARRANTY OR CONDITION OF MERCHANTABILITY OR FITNESS FOR A 55 | PARTICULAR PURPOSE, AVAILABILITY, SECURITY, TITLE AND/OR NON-INFRINGEMENT. 56 | YOUR USE OF THE LICENSED MATERIALS IS AT YOUR OWN DISCRETION AND RISK, AND 57 | YOU WILL BE SOLELY RESPONSIBLE FOR ANY DAMAGE THAT RESULTS FROM USE OF THE 58 | LICENSED MATERIALS TO YOUR COMPUTER SYSTEM OR LOSS OF DATA. NO ADVICE OR 59 | INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM US OR THROUGH OR 60 | FROM THE LICENSED MATERIALS WILL CREATE ANY WARRANTY OR CONDITION NOT 61 | EXPRESSLY STATED IN THESE TERMS. 62 | 63 | 5. Limitation of Liability. LINKEDIN SHALL NOT BE LIABLE FOR ANY INDIRECT, 64 | INCIDENTAL, SPECIAL, PUNITIVE, CONSEQUENTIAL OR EXEMPLARY DAMAGES, INCLUDING 65 | BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE, DATA OR OTHER 66 | INTANGIBLE LOSSES . IN NO EVENT WILL LINKEDIN'S AGGREGATE LIABILITY TO YOU 67 | EXCEED $100. THIS LIMITATION OF LIABILITY SHALL: 68 | - i. APPLY REGARDLESS OF WHETHER (A) YOU BASE YOUR CLAIM ON CONTRACT, TORT, 69 | STATUTE, OR ANY OTHER LEGAL THEORY, (B) WE KNEW OR SHOULD HAVE KNOWN ABOUT 70 | THE POSSIBILITY OF SUCH DAMAGES, OR (C) THE LIMITED REMEDIES PROVIDED IN THIS 71 | SECTION FAIL OF THEIR ESSENTIAL PURPOSE; AND 72 | - ii. NOT APPLY TO ANY DAMAGE THAT LINKEDIN MAY CAUSE YOU INTENTIONALLY OR 73 | KNOWINGLY IN VIOLATION OF THESE TERMS OR APPLICABLE LAW, OR AS OTHERWISE 74 | MANDATED BY APPLICABLE LAW THAT CANNOT BE DISCLAIMED IN THESE TERMS. 75 | 76 | 6. Termination. This Agreement automatically terminates upon your breach of 77 | this Agreement or termination of your LinkedIn Learning subscription. On 78 | termination, all licenses granted under this Agreement will terminate 79 | immediately and you will delete the Licensed Materials. Sections 2-7 of this 80 | Agreement survive any termination of this Agreement. LinkedIn may discontinue 81 | the availability of some or all of the Licensed Materials at any time for any 82 | reason. 83 | 84 | 7. Miscellaneous. This Agreement will be governed by and construed in 85 | accordance with the laws of the State of California without regard to conflict 86 | of laws principles. The exclusive forum for any disputes arising out of or 87 | relating to this Agreement shall be an appropriate federal or state court 88 | sitting in the County of Santa Clara, State of California. If LinkedIn does 89 | not act to enforce a breach of this Agreement, that does not mean that 90 | LinkedIn has waived its right to enforce this Agreement. The Agreement does 91 | not create a partnership, agency relationship, or joint venture between the 92 | parties. Neither party has the power or authority to bind the other or to 93 | create any obligation or responsibility on behalf of the other. You may not, 94 | without LinkedIn’s prior written consent, assign or delegate any rights or 95 | obligations under these terms, including in connection with a change of 96 | control. Any purported assignment and delegation shall be ineffective. The 97 | Agreement shall bind and inure to the benefit of the parties, their respective 98 | successors and permitted assigns. If any provision of the Agreement is 99 | unenforceable, that provision will be modified to render it enforceable to the 100 | extent possible to give effect to the parties’ intentions and the remaining 101 | provisions will not be affected. This Agreement is the only agreement between 102 | you and LinkedIn regarding the Licensed Materials, and supersedes all prior 103 | agreements relating to the Licensed Materials. 104 | 105 | Last Updated: March 2019 106 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2024 LinkedIn Corporation 2 | All Rights Reserved. 3 | 4 | Licensed under the LinkedIn Learning Exercise File License (the "License"). 5 | See LICENSE in the project root for license information. 6 | 7 | Please note, this project may automatically load third party code from external 8 | repositories (for example, NPM modules, Composer packages, or other dependencies). 9 | If so, such third party code may be subject to other license terms than as set 10 | forth above. In addition, such third party code may also depend on and load 11 | multiple tiers of dependencies. Please review the applicable licenses of the 12 | additional dependencies. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python for Data Science and Machine Learning Essential Training 2 | This is the repository for the LinkedIn Learning course Python for Data Science and Machine Learning Essential Training. The full course is available from [LinkedIn Learning][lil-course-url]. 3 | 4 | ![lil-thumbnail-url] 5 | 6 | Python for Data Science and Machine Learning Essential Training is one of the most popular data science courses at LinkedIn Learning. It has now been updated and expanded to two parts-giving you even more hands-on, real-world Python experience. In part one, instructor Lillian Pierson takes you step by step through a data science and machine learning project: a web scraper that downloads and analyzes data from the web. Along the way, she introduces techniques to clean, reformat, transform, and describe raw data; generate visualizations; remove outliers; perform simple data analysis; and generate web-based graphs using Streamlit. By the end of this course, you'll have acquired basic coding experience that you can take to your organization and quickly apply to your own custom data science and machine learning projects. 7 | 8 | This course is integrated with GitHub Codespaces, an instant cloud developer environment that offers all the functionality of your favorite IDE without the need for any local machine setup. With GitHub Codespaces, you can get hands-on practice from any machine, at any time-all while using a tool that you'll likely encounter in the workplace. Check out the "Using GitHub Codespaces with this course" video to learn how to get started. 9 | 10 | ### Instructor 11 | 12 | Lillian Pierson, P.E. 13 | 14 | Engineer, CEO, and Head of Product at Data-Mania 15 | 16 | 17 | 18 | Check out my other courses on [LinkedIn Learning](https://www.linkedin.com/learning/instructors/lillian-pierson-p-e?u=104). 19 | 20 | [0]: # (Replace these placeholder URLs with actual course URLs) 21 | 22 | [lil-course-url]: https://www.linkedin.com/learning/python-for-data-science-and-machine-learning-essential-training-part-1 23 | [lil-thumbnail-url]: https://media.licdn.com/dms/image/D560DAQHjOZJ6XUrh3Q/learning-public-crop_675_1200/0/1709944557924?e=2147483647&v=beta&t=M7Tpw3XXS2hMAb2QXpJ73m7Bp6awqE82A8jReKHbkPk 24 | 25 | -------------------------------------------------------------------------------- /data/Superstore-Sales.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/data/Superstore-Sales.csv -------------------------------------------------------------------------------- /data/iris.csv: -------------------------------------------------------------------------------- 1 | "","Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species" 2 | "1",5.1,3.5,1.4,0.2,"setosa" 3 | "2",4.9,3,1.4,0.2,"setosa" 4 | "3",4.7,3.2,1.3,0.2,"setosa" 5 | "4",4.6,3.1,1.5,0.2,"setosa" 6 | "5",5,3.6,1.4,0.2,"setosa" 7 | "6",5.4,3.9,1.7,0.4,"setosa" 8 | "7",4.6,3.4,1.4,0.3,"setosa" 9 | "8",5,3.4,1.5,0.2,"setosa" 10 | "9",4.4,2.9,1.4,0.2,"setosa" 11 | "10",4.9,3.1,1.5,0.1,"setosa" 12 | "11",5.4,3.7,1.5,0.2,"setosa" 13 | "12",4.8,3.4,1.6,0.2,"setosa" 14 | "13",4.8,3,1.4,0.1,"setosa" 15 | "14",4.3,3,1.1,0.1,"setosa" 16 | "15",5.8,4,1.2,0.2,"setosa" 17 | "16",5.7,4.4,1.5,0.4,"setosa" 18 | "17",5.4,3.9,1.3,0.4,"setosa" 19 | "18",5.1,3.5,1.4,0.3,"setosa" 20 | "19",5.7,3.8,1.7,0.3,"setosa" 21 | "20",5.1,3.8,1.5,0.3,"setosa" 22 | "21",5.4,3.4,1.7,0.2,"setosa" 23 | "22",5.1,3.7,1.5,0.4,"setosa" 24 | "23",4.6,3.6,1,0.2,"setosa" 25 | "24",5.1,3.3,1.7,0.5,"setosa" 26 | "25",4.8,3.4,1.9,0.2,"setosa" 27 | "26",5,3,1.6,0.2,"setosa" 28 | "27",5,3.4,1.6,0.4,"setosa" 29 | "28",5.2,3.5,1.5,0.2,"setosa" 30 | "29",5.2,3.4,1.4,0.2,"setosa" 31 | "30",4.7,3.2,1.6,0.2,"setosa" 32 | "31",4.8,3.1,1.6,0.2,"setosa" 33 | "32",5.4,3.4,1.5,0.4,"setosa" 34 | "33",5.2,4.1,1.5,0.1,"setosa" 35 | "34",5.5,4.2,1.4,0.2,"setosa" 36 | "35",4.9,3.1,1.5,0.2,"setosa" 37 | "36",5,3.2,1.2,0.2,"setosa" 38 | "37",5.5,3.5,1.3,0.2,"setosa" 39 | "38",4.9,3.6,1.4,0.1,"setosa" 40 | "39",4.4,3,1.3,0.2,"setosa" 41 | "40",5.1,3.4,1.5,0.2,"setosa" 42 | "41",5,3.5,1.3,0.3,"setosa" 43 | "42",4.5,2.3,1.3,0.3,"setosa" 44 | "43",4.4,3.2,1.3,0.2,"setosa" 45 | "44",5,3.5,1.6,0.6,"setosa" 46 | "45",5.1,3.8,1.9,0.4,"setosa" 47 | "46",4.8,3,1.4,0.3,"setosa" 48 | "47",5.1,3.8,1.6,0.2,"setosa" 49 | "48",4.6,3.2,1.4,0.2,"setosa" 50 | "49",5.3,3.7,1.5,0.2,"setosa" 51 | "50",5,3.3,1.4,0.2,"setosa" 52 | "51",7,3.2,4.7,1.4,"versicolor" 53 | "52",6.4,3.2,4.5,1.5,"versicolor" 54 | "53",6.9,3.1,4.9,1.5,"versicolor" 55 | "54",5.5,2.3,4,1.3,"versicolor" 56 | "55",6.5,2.8,4.6,1.5,"versicolor" 57 | "56",5.7,2.8,4.5,1.3,"versicolor" 58 | "57",6.3,3.3,4.7,1.6,"versicolor" 59 | "58",4.9,2.4,3.3,1,"versicolor" 60 | "59",6.6,2.9,4.6,1.3,"versicolor" 61 | "60",5.2,2.7,3.9,1.4,"versicolor" 62 | "61",5,2,3.5,1,"versicolor" 63 | "62",5.9,3,4.2,1.5,"versicolor" 64 | "63",6,2.2,4,1,"versicolor" 65 | "64",6.1,2.9,4.7,1.4,"versicolor" 66 | "65",5.6,2.9,3.6,1.3,"versicolor" 67 | "66",6.7,3.1,4.4,1.4,"versicolor" 68 | "67",5.6,3,4.5,1.5,"versicolor" 69 | "68",5.8,2.7,4.1,1,"versicolor" 70 | "69",6.2,2.2,4.5,1.5,"versicolor" 71 | "70",5.6,2.5,3.9,1.1,"versicolor" 72 | "71",5.9,3.2,4.8,1.8,"versicolor" 73 | "72",6.1,2.8,4,1.3,"versicolor" 74 | "73",6.3,2.5,4.9,1.5,"versicolor" 75 | "74",6.1,2.8,4.7,1.2,"versicolor" 76 | "75",6.4,2.9,4.3,1.3,"versicolor" 77 | "76",6.6,3,4.4,1.4,"versicolor" 78 | "77",6.8,2.8,4.8,1.4,"versicolor" 79 | "78",6.7,3,5,1.7,"versicolor" 80 | "79",6,2.9,4.5,1.5,"versicolor" 81 | "80",5.7,2.6,3.5,1,"versicolor" 82 | "81",5.5,2.4,3.8,1.1,"versicolor" 83 | "82",5.5,2.4,3.7,1,"versicolor" 84 | "83",5.8,2.7,3.9,1.2,"versicolor" 85 | "84",6,2.7,5.1,1.6,"versicolor" 86 | "85",5.4,3,4.5,1.5,"versicolor" 87 | "86",6,3.4,4.5,1.6,"versicolor" 88 | "87",6.7,3.1,4.7,1.5,"versicolor" 89 | "88",6.3,2.3,4.4,1.3,"versicolor" 90 | "89",5.6,3,4.1,1.3,"versicolor" 91 | "90",5.5,2.5,4,1.3,"versicolor" 92 | "91",5.5,2.6,4.4,1.2,"versicolor" 93 | "92",6.1,3,4.6,1.4,"versicolor" 94 | "93",5.8,2.6,4,1.2,"versicolor" 95 | "94",5,2.3,3.3,1,"versicolor" 96 | "95",5.6,2.7,4.2,1.3,"versicolor" 97 | "96",5.7,3,4.2,1.2,"versicolor" 98 | "97",5.7,2.9,4.2,1.3,"versicolor" 99 | "98",6.2,2.9,4.3,1.3,"versicolor" 100 | "99",5.1,2.5,3,1.1,"versicolor" 101 | "100",5.7,2.8,4.1,1.3,"versicolor" 102 | "101",6.3,3.3,6,2.5,"virginica" 103 | "102",5.8,2.7,5.1,1.9,"virginica" 104 | "103",7.1,3,5.9,2.1,"virginica" 105 | "104",6.3,2.9,5.6,1.8,"virginica" 106 | "105",6.5,3,5.8,2.2,"virginica" 107 | "106",7.6,3,6.6,2.1,"virginica" 108 | "107",4.9,2.5,4.5,1.7,"virginica" 109 | "108",7.3,2.9,6.3,1.8,"virginica" 110 | "109",6.7,2.5,5.8,1.8,"virginica" 111 | "110",7.2,3.6,6.1,2.5,"virginica" 112 | "111",6.5,3.2,5.1,2,"virginica" 113 | "112",6.4,2.7,5.3,1.9,"virginica" 114 | "113",6.8,3,5.5,2.1,"virginica" 115 | "114",5.7,2.5,5,2,"virginica" 116 | "115",5.8,2.8,5.1,2.4,"virginica" 117 | "116",6.4,3.2,5.3,2.3,"virginica" 118 | "117",6.5,3,5.5,1.8,"virginica" 119 | "118",7.7,3.8,6.7,2.2,"virginica" 120 | "119",7.7,2.6,6.9,2.3,"virginica" 121 | "120",6,2.2,5,1.5,"virginica" 122 | "121",6.9,3.2,5.7,2.3,"virginica" 123 | "122",5.6,2.8,4.9,2,"virginica" 124 | "123",7.7,2.8,6.7,2,"virginica" 125 | "124",6.3,2.7,4.9,1.8,"virginica" 126 | "125",6.7,3.3,5.7,2.1,"virginica" 127 | "126",7.2,3.2,6,1.8,"virginica" 128 | "127",6.2,2.8,4.8,1.8,"virginica" 129 | "128",6.1,3,4.9,1.8,"virginica" 130 | "129",6.4,2.8,5.6,2.1,"virginica" 131 | "130",7.2,3,5.8,1.6,"virginica" 132 | "131",7.4,2.8,6.1,1.9,"virginica" 133 | "132",7.9,3.8,6.4,2,"virginica" 134 | "133",6.4,2.8,5.6,2.2,"virginica" 135 | "134",6.3,2.8,5.1,1.5,"virginica" 136 | "135",6.1,2.6,5.6,1.4,"virginica" 137 | "136",7.7,3,6.1,2.3,"virginica" 138 | "137",6.3,3.4,5.6,2.4,"virginica" 139 | "138",6.4,3.1,5.5,1.8,"virginica" 140 | "139",6,3,4.8,1.8,"virginica" 141 | "140",6.9,3.1,5.4,2.1,"virginica" 142 | "141",6.7,3.1,5.6,2.4,"virginica" 143 | "142",6.9,3.1,5.1,2.3,"virginica" 144 | "143",5.8,2.7,5.1,1.9,"virginica" 145 | "144",6.8,3.2,5.9,2.3,"virginica" 146 | "145",6.7,3.3,5.7,2.5,"virginica" 147 | "146",6.7,3,5.2,2.3,"virginica" 148 | "147",6.3,2.5,5,1.9,"virginica" 149 | "148",6.5,3,5.2,2,"virginica" 150 | "149",6.2,3.4,5.4,2.3,"virginica" 151 | "150",5.9,3,5.1,1.8,"virginica" 152 | -------------------------------------------------------------------------------- /data/iris.data.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,setosa 2 | 4.9,3.0,1.4,0.2,setosa 3 | 4.7,3.2,1.3,0.2,setosa 4 | 4.6,3.1,1.5,0.2,setosa 5 | 5.0,3.6,1.4,0.2,setosa 6 | 5.4,3.9,1.7,0.4,setosa 7 | 4.6,3.4,1.4,0.3,setosa 8 | 5.0,3.4,1.5,0.2,setosa 9 | 4.4,2.9,1.4,0.2,setosa 10 | 4.9,3.1,1.5,0.1,setosa 11 | 5.4,3.7,1.5,0.2,setosa 12 | 4.8,3.4,1.6,0.2,setosa 13 | 4.8,3.0,1.4,0.1,setosa 14 | 4.3,3.0,1.1,0.1,setosa 15 | 5.8,4.0,1.2,0.2,setosa 16 | 5.7,4.4,1.5,0.4,setosa 17 | 5.4,3.9,1.3,0.4,setosa 18 | 5.1,3.5,1.4,0.3,setosa 19 | 5.7,3.8,1.7,0.3,setosa 20 | 5.1,3.8,1.5,0.3,setosa 21 | 5.4,3.4,1.7,0.2,setosa 22 | 5.1,3.7,1.5,0.4,setosa 23 | 4.6,3.6,1.0,0.2,setosa 24 | 5.1,3.3,1.7,0.5,setosa 25 | 4.8,3.4,1.9,0.2,setosa 26 | 5.0,3.0,1.6,0.2,setosa 27 | 5.0,3.4,1.6,0.4,setosa 28 | 5.2,3.5,1.5,0.2,setosa 29 | 5.2,3.4,1.4,0.2,setosa 30 | 4.7,3.2,1.6,0.2,setosa 31 | 4.8,3.1,1.6,0.2,setosa 32 | 5.4,3.4,1.5,0.4,setosa 33 | 5.2,4.1,1.5,0.1,setosa 34 | 5.5,4.2,1.4,0.2,setosa 35 | 4.9,3.1,1.5,0.2,setosa 36 | 5.0,3.2,1.2,0.2,setosa 37 | 5.5,3.5,1.3,0.2,setosa 38 | 4.9,3.6,1.4,0.1,setosa 39 | 4.4,3.0,1.3,0.2,setosa 40 | 5.1,3.4,1.5,0.2,setosa 41 | 5.0,3.5,1.3,0.3,setosa 42 | 4.5,2.3,1.3,0.3,setosa 43 | 4.4,3.2,1.3,0.2,setosa 44 | 5.0,3.5,1.6,0.6,setosa 45 | 5.1,3.8,1.9,0.4,setosa 46 | 4.8,3.0,1.4,0.3,setosa 47 | 5.1,3.8,1.6,0.2,setosa 48 | 4.6,3.2,1.4,0.2,setosa 49 | 5.3,3.7,1.5,0.2,setosa 50 | 5.0,3.3,1.4,0.2,setosa 51 | 7.0,3.2,4.7,1.4,versicolor 52 | 6.4,3.2,4.5,1.5,versicolor 53 | 6.9,3.1,4.9,1.5,versicolor 54 | 5.5,2.3,4.0,1.3,versicolor 55 | 6.5,2.8,4.6,1.5,versicolor 56 | 5.7,2.8,4.5,1.3,versicolor 57 | 6.3,3.3,4.7,1.6,versicolor 58 | 4.9,2.4,3.3,1.0,versicolor 59 | 6.6,2.9,4.6,1.3,versicolor 60 | 5.2,2.7,3.9,1.4,versicolor 61 | 5.0,2.0,3.5,1.0,versicolor 62 | 5.9,3.0,4.2,1.5,versicolor 63 | 6.0,2.2,4.0,1.0,versicolor 64 | 6.1,2.9,4.7,1.4,versicolor 65 | 5.6,2.9,3.6,1.3,versicolor 66 | 6.7,3.1,4.4,1.4,versicolor 67 | 5.6,3.0,4.5,1.5,versicolor 68 | 5.8,2.7,4.1,1.0,versicolor 69 | 6.2,2.2,4.5,1.5,versicolor 70 | 5.6,2.5,3.9,1.1,versicolor 71 | 5.9,3.2,4.8,1.8,versicolor 72 | 6.1,2.8,4.0,1.3,versicolor 73 | 6.3,2.5,4.9,1.5,versicolor 74 | 6.1,2.8,4.7,1.2,versicolor 75 | 6.4,2.9,4.3,1.3,versicolor 76 | 6.6,3.0,4.4,1.4,versicolor 77 | 6.8,2.8,4.8,1.4,versicolor 78 | 6.7,3.0,5.0,1.7,versicolor 79 | 6.0,2.9,4.5,1.5,versicolor 80 | 5.7,2.6,3.5,1.0,versicolor 81 | 5.5,2.4,3.8,1.1,versicolor 82 | 5.5,2.4,3.7,1.0,versicolor 83 | 5.8,2.7,3.9,1.2,versicolor 84 | 6.0,2.7,5.1,1.6,versicolor 85 | 5.4,3.0,4.5,1.5,versicolor 86 | 6.0,3.4,4.5,1.6,versicolor 87 | 6.7,3.1,4.7,1.5,versicolor 88 | 6.3,2.3,4.4,1.3,versicolor 89 | 5.6,3.0,4.1,1.3,versicolor 90 | 5.5,2.5,4.0,1.3,versicolor 91 | 5.5,2.6,4.4,1.2,versicolor 92 | 6.1,3.0,4.6,1.4,versicolor 93 | 5.8,2.6,4.0,1.2,versicolor 94 | 5.0,2.3,3.3,1.0,versicolor 95 | 5.6,2.7,4.2,1.3,versicolor 96 | 5.7,3.0,4.2,1.2,versicolor 97 | 5.7,2.9,4.2,1.3,versicolor 98 | 6.2,2.9,4.3,1.3,versicolor 99 | 5.1,2.5,3.0,1.1,versicolor 100 | 5.7,2.8,4.1,1.3,versicolor 101 | 6.3,3.3,6.0,2.5,virginica 102 | 5.8,2.7,5.1,1.9,virginica 103 | 7.1,3.0,5.9,2.1,virginica 104 | 6.3,2.9,5.6,1.8,virginica 105 | 6.5,3.0,5.8,2.2,virginica 106 | 7.6,3.0,6.6,2.1,virginica 107 | 4.9,2.5,4.5,1.7,virginica 108 | 7.3,2.9,6.3,1.8,virginica 109 | 6.7,2.5,5.8,1.8,virginica 110 | 7.2,3.6,6.1,2.5,virginica 111 | 6.5,3.2,5.1,2.0,virginica 112 | 6.4,2.7,5.3,1.9,virginica 113 | 6.8,3.0,5.5,2.1,virginica 114 | 5.7,2.5,5.0,2.0,virginica 115 | 5.8,2.8,5.1,2.4,virginica 116 | 6.4,3.2,5.3,2.3,virginica 117 | 6.5,3.0,5.5,1.8,virginica 118 | 7.7,3.8,6.7,2.2,virginica 119 | 7.7,2.6,6.9,2.3,virginica 120 | 6.0,2.2,5.0,1.5,virginica 121 | 6.9,3.2,5.7,2.3,virginica 122 | 5.6,2.8,4.9,2.0,virginica 123 | 7.7,2.8,6.7,2.0,virginica 124 | 6.3,2.7,4.9,1.8,virginica 125 | 6.7,3.3,5.7,2.1,virginica 126 | 7.2,3.2,6.0,1.8,virginica 127 | 6.2,2.8,4.8,1.8,virginica 128 | 6.1,3.0,4.9,1.8,virginica 129 | 6.4,2.8,5.6,2.1,virginica 130 | 7.2,3.0,5.8,1.6,virginica 131 | 7.4,2.8,6.1,1.9,virginica 132 | 7.9,3.8,6.4,2.0,virginica 133 | 6.4,2.8,5.6,2.2,virginica 134 | 6.3,2.8,5.1,1.5,virginica 135 | 6.1,2.6,5.6,1.4,virginica 136 | 7.7,3.0,6.1,2.3,virginica 137 | 6.3,3.4,5.6,2.4,virginica 138 | 6.4,3.1,5.5,1.8,virginica 139 | 6.0,3.0,4.8,1.8,virginica 140 | 6.9,3.1,5.4,2.1,virginica 141 | 6.7,3.1,5.6,2.4,virginica 142 | 6.9,3.1,5.1,2.3,virginica 143 | 5.8,2.7,5.1,1.9,virginica 144 | 6.8,3.2,5.9,2.3,virginica 145 | 6.7,3.3,5.7,2.5,virginica 146 | 6.7,3.0,5.2,2.3,virginica 147 | 6.3,2.5,5.0,1.9,virginica 148 | 6.5,3.0,5.2,2.0,virginica 149 | 6.2,3.4,5.4,2.3,virginica 150 | 5.9,3.0,5.1,1.8,virginica 151 | -------------------------------------------------------------------------------- /data/mtcars.csv: -------------------------------------------------------------------------------- 1 | "","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" 2 | "Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4,4 3 | "Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4,4 4 | "Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 5 | "Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 6 | "Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 7 | "Valiant",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 8 | "Duster 360",14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 9 | "Merc 240D",24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 10 | "Merc 230",22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 11 | "Merc 280",19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 12 | "Merc 280C",17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 13 | "Merc 450SE",16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 14 | "Merc 450SL",17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 15 | "Merc 450SLC",15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 16 | "Cadillac Fleetwood",10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 17 | "Lincoln Continental",10.4,8,460,215,3,5.424,17.82,0,0,3,4 18 | "Chrysler Imperial",14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 19 | "Fiat 128",32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 20 | "Honda Civic",30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 21 | "Toyota Corolla",33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 22 | "Toyota Corona",21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 23 | "Dodge Challenger",15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 24 | "AMC Javelin",15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 25 | "Camaro Z28",13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 26 | "Pontiac Firebird",19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 27 | "Fiat X1-9",27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 28 | "Porsche 914-2",26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 29 | "Lotus Europa",30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 30 | "Ford Pantera L",15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 31 | "Ferrari Dino",19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 32 | "Maserati Bora",15,8,301,335,3.54,3.57,14.6,0,1,5,8 33 | "Volvo 142E",21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 34 | -------------------------------------------------------------------------------- /data/test: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /notebooks/02_03b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Data Filtering and Selection" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "id": "acd063a4", 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stdout", 18 | "output_type": "stream", 19 | "text": [ 20 | "Requirement already satisfied: numpy in /home/codespace/.local/lib/python3.10/site-packages (1.26.1)\n", 21 | "\n", 22 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", 23 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", 24 | "Requirement already satisfied: pandas in /home/codespace/.local/lib/python3.10/site-packages (2.1.1)\n", 25 | "Requirement already satisfied: numpy>=1.22.4 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (1.26.1)\n", 26 | "Requirement already satisfied: python-dateutil>=2.8.2 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (2.8.2)\n", 27 | "Requirement already satisfied: pytz>=2020.1 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (2023.3.post1)\n", 28 | "Requirement already satisfied: tzdata>=2022.1 in /home/codespace/.local/lib/python3.10/site-packages (from pandas) (2023.3)\n", 29 | "Requirement already satisfied: six>=1.5 in /home/codespace/.local/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", 30 | "\n", 31 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", 32 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "!pip install numpy\n", 38 | "!pip install pandas" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "id": "0b48db56-f3a4-47b5-998e-37d64588ae49", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np\n", 49 | "import pandas as pd\n", 50 | "\n", 51 | "from pandas import DataFrame" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 13, 57 | "id": "bbba446d", 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/html": [ 63 | "
\n", 64 | "\n", 77 | "\n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | "
column 1column 2column 3
row 1036
row 291215
row 3182124
row 4273033
row 5363942
row 6454851
row 7545760
row 8636669
row 9727578
row 10818487
\n", 149 | "
" 150 | ], 151 | "text/plain": [ 152 | " column 1 column 2 column 3\n", 153 | "row 1 0 3 6\n", 154 | "row 2 9 12 15\n", 155 | "row 3 18 21 24\n", 156 | "row 4 27 30 33\n", 157 | "row 5 36 39 42\n", 158 | "row 6 45 48 51\n", 159 | "row 7 54 57 60\n", 160 | "row 8 63 66 69\n", 161 | "row 9 72 75 78\n", 162 | "row 10 81 84 87" 163 | ] 164 | }, 165 | "execution_count": 13, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "numbers_df = DataFrame(np.arange(0,90,3).reshape(10,3), index = ['row 1','row 2','row 3','row 4','row 5','row 6','row 7','row 8','row 9','row 10'],columns=['column 1','column 2','column 3'])\n", 172 | "numbers_df" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "id": "68b700fd-5320-47e2-852c-a13aab49a486", 178 | "metadata": {}, 179 | "source": [ 180 | "#### Comparison operators (> < = <= => == !=) and Masking." 181 | ] 182 | } 183 | ], 184 | "metadata": { 185 | "kernelspec": { 186 | "display_name": "Python 3", 187 | "language": "python", 188 | "name": "python3" 189 | }, 190 | "language_info": { 191 | "codemirror_mode": { 192 | "name": "ipython", 193 | "version": 3 194 | }, 195 | "file_extension": ".py", 196 | "mimetype": "text/x-python", 197 | "name": "python", 198 | "nbconvert_exporter": "python", 199 | "pygments_lexer": "ipython3", 200 | "version": "3.10.8" 201 | } 202 | }, 203 | "nbformat": 4, 204 | "nbformat_minor": 5 205 | } 206 | -------------------------------------------------------------------------------- /notebooks/02_04b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c9327e33-5afa-4ffb-aca9-6bdc963ef9ff", 6 | "metadata": {}, 7 | "source": [ 8 | "## Working with Missing Data in Pandas" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "5be0cfbf-e779-42b3-8bd6-f3dd46888ebb", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "\n", 21 | "from pandas import DataFrame" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "61bbc7e0-5198-4609-a66a-d4b437606dac", 27 | "metadata": {}, 28 | "source": [ 29 | "### Filling missing values using fillna(), replace() and interpolate()" 30 | ] 31 | } 32 | ], 33 | "metadata": { 34 | "kernelspec": { 35 | "display_name": "Python 3", 36 | "language": "python", 37 | "name": "python3" 38 | }, 39 | "language_info": { 40 | "codemirror_mode": { 41 | "name": "ipython", 42 | "version": 3 43 | }, 44 | "file_extension": ".py", 45 | "mimetype": "text/x-python", 46 | "name": "python", 47 | "nbconvert_exporter": "python", 48 | "pygments_lexer": "ipython3", 49 | "version": "3.10.8" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 5 54 | } 55 | -------------------------------------------------------------------------------- /notebooks/02_05b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "from pandas import Series, DataFrame" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "### Removing duplicates" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [] 28 | } 29 | ], 30 | "metadata": { 31 | "anaconda-cloud": {}, 32 | "kernelspec": { 33 | "display_name": "Python 3", 34 | "language": "python", 35 | "name": "python3" 36 | }, 37 | "language_info": { 38 | "codemirror_mode": { 39 | "name": "ipython", 40 | "version": 3 41 | }, 42 | "file_extension": ".py", 43 | "mimetype": "text/x-python", 44 | "name": "python", 45 | "nbconvert_exporter": "python", 46 | "pygments_lexer": "ipython3", 47 | "version": "3.8.8" 48 | } 49 | }, 50 | "nbformat": 4, 51 | "nbformat_minor": 1 52 | } 53 | -------------------------------------------------------------------------------- /notebooks/02_05e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "from pandas import Series, DataFrame" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "### Removing duplicates" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | "
column 1column 2column 3
01aA
11aA
22bB
32bB
43cC
53cC
63cC
\n", 98 | "
" 99 | ], 100 | "text/plain": [ 101 | " column 1 column 2 column 3\n", 102 | "0 1 a A\n", 103 | "1 1 a A\n", 104 | "2 2 b B\n", 105 | "3 2 b B\n", 106 | "4 3 c C\n", 107 | "5 3 c C\n", 108 | "6 3 c C" 109 | ] 110 | }, 111 | "execution_count": 2, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "DF_obj = DataFrame({'column 1': [1,1,2,2,3,3,3],\n", 118 | " 'column 2':['a', 'a', 'b', 'b', 'c', 'c', 'c'],\n", 119 | " 'column 3': ['A', 'A', 'B', 'B', 'C', 'C', 'C']})\n", 120 | "DF_obj" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 3, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "0 False\n", 132 | "1 True\n", 133 | "2 False\n", 134 | "3 True\n", 135 | "4 False\n", 136 | "5 True\n", 137 | "6 True\n", 138 | "dtype: bool" 139 | ] 140 | }, 141 | "execution_count": 3, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "DF_obj.duplicated()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/html": [ 158 | "
\n", 159 | "\n", 172 | "\n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | "
column 1column 2column 3
01aA
22bB
43cC
\n", 202 | "
" 203 | ], 204 | "text/plain": [ 205 | " column 1 column 2 column 3\n", 206 | "0 1 a A\n", 207 | "2 2 b B\n", 208 | "4 3 c C" 209 | ] 210 | }, 211 | "execution_count": 4, 212 | "metadata": {}, 213 | "output_type": "execute_result" 214 | } 215 | ], 216 | "source": [ 217 | "DF_obj.drop_duplicates()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 5, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "DF_obj = DataFrame({'column 1': [1,1,2,2,3,3,3],\n", 227 | " 'column 2':['a', 'a', 'b', 'b', 'c', 'c', 'c'],\n", 228 | " 'column 3': ['A', 'A', 'B', 'B', 'C', 'D', 'C']})" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 6, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/html": [ 239 | "
\n", 240 | "\n", 253 | "\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | "
column 1column 2column 3
01aA
11aA
22bB
32bB
43cC
53cD
63cC
\n", 307 | "
" 308 | ], 309 | "text/plain": [ 310 | " column 1 column 2 column 3\n", 311 | "0 1 a A\n", 312 | "1 1 a A\n", 313 | "2 2 b B\n", 314 | "3 2 b B\n", 315 | "4 3 c C\n", 316 | "5 3 c D\n", 317 | "6 3 c C" 318 | ] 319 | }, 320 | "execution_count": 6, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "DF_obj" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 7, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/html": [ 337 | "
\n", 338 | "\n", 351 | "\n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | "
column 1column 2column 3
01aA
22bB
43cC
53cD
\n", 387 | "
" 388 | ], 389 | "text/plain": [ 390 | " column 1 column 2 column 3\n", 391 | "0 1 a A\n", 392 | "2 2 b B\n", 393 | "4 3 c C\n", 394 | "5 3 c D" 395 | ] 396 | }, 397 | "execution_count": 7, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "DF_obj.drop_duplicates(['column 3'])" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [] 412 | } 413 | ], 414 | "metadata": { 415 | "anaconda-cloud": {}, 416 | "kernelspec": { 417 | "display_name": "Python 3", 418 | "language": "python", 419 | "name": "python3" 420 | }, 421 | "language_info": { 422 | "codemirror_mode": { 423 | "name": "ipython", 424 | "version": 3 425 | }, 426 | "file_extension": ".py", 427 | "mimetype": "text/x-python", 428 | "name": "python", 429 | "nbconvert_exporter": "python", 430 | "pygments_lexer": "ipython3", 431 | "version": "3.10.13" 432 | } 433 | }, 434 | "nbformat": 4, 435 | "nbformat_minor": 1 436 | } 437 | -------------------------------------------------------------------------------- /notebooks/02_06b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "from pandas import Series, DataFrame" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "### Concatenating data" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Transforming data\n", 27 | "#### Dropping data" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "#### Adding data" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "#### Sorting data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [] 50 | } 51 | ], 52 | "metadata": { 53 | "kernelspec": { 54 | "display_name": "Python 3", 55 | "language": "python", 56 | "name": "python3" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 3 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython3", 68 | "version": "3.8.8" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 1 73 | } 74 | -------------------------------------------------------------------------------- /notebooks/02_07b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from pandas import Series, DataFrame" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### Grouping data by column index" 19 | ] 20 | } 21 | ], 22 | "metadata": { 23 | "kernelspec": { 24 | "display_name": "Python 3", 25 | "language": "python", 26 | "name": "python3" 27 | }, 28 | "language_info": { 29 | "codemirror_mode": { 30 | "name": "ipython", 31 | "version": 3 32 | }, 33 | "file_extension": ".py", 34 | "mimetype": "text/x-python", 35 | "name": "python", 36 | "nbconvert_exporter": "python", 37 | "pygments_lexer": "ipython3", 38 | "version": "3.7.1" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 1 43 | } 44 | -------------------------------------------------------------------------------- /notebooks/02_07e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from pandas import Series, DataFrame" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### Grouping data by column index" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 11, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | "
car_namesmpgcyldisphpdratwtqsecvsamgearcarb
0Mazda RX421.06160.01103.902.62016.460144
1Mazda RX4 Wag21.06160.01103.902.87517.020144
2Datsun 71022.84108.0933.852.32018.611141
3Hornet 4 Drive21.46258.01103.083.21519.441031
4Hornet Sportabout18.78360.01753.153.44017.020032
\n", 139 | "
" 140 | ], 141 | "text/plain": [ 142 | " car_names mpg cyl disp hp drat wt qsec vs am gear \\\n", 143 | "0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 \n", 144 | "1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 \n", 145 | "2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 \n", 146 | "3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 \n", 147 | "4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 \n", 148 | "\n", 149 | " carb \n", 150 | "0 4 \n", 151 | "1 4 \n", 152 | "2 1 \n", 153 | "3 1 \n", 154 | "4 2 " 155 | ] 156 | }, 157 | "execution_count": 11, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 164 | "\n", 165 | "cars = pd.read_csv(address)\n", 166 | "\n", 167 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n", 168 | "\n", 169 | "cars.head()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 13, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/html": [ 180 | "
\n", 181 | "\n", 194 | "\n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | "
mpgdisphpdratwtqsecvsamgearcarb
cyl
426.663636105.13636482.6363644.0709092.28572719.1372730.9090910.7272734.0909091.545455
619.742857183.314286122.2857143.5857143.11714317.9771430.5714290.4285713.8571433.428571
815.100000353.100000209.2142863.2292863.99921416.7721430.0000000.1428573.2857143.500000
\n", 265 | "
" 266 | ], 267 | "text/plain": [ 268 | " mpg disp hp drat wt qsec \\\n", 269 | "cyl \n", 270 | "4 26.663636 105.136364 82.636364 4.070909 2.285727 19.137273 \n", 271 | "6 19.742857 183.314286 122.285714 3.585714 3.117143 17.977143 \n", 272 | "8 15.100000 353.100000 209.214286 3.229286 3.999214 16.772143 \n", 273 | "\n", 274 | " vs am gear carb \n", 275 | "cyl \n", 276 | "4 0.909091 0.727273 4.090909 1.545455 \n", 277 | "6 0.571429 0.428571 3.857143 3.428571 \n", 278 | "8 0.000000 0.142857 3.285714 3.500000 " 279 | ] 280 | }, 281 | "execution_count": 13, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "cars_groups = cars.groupby(cars['cyl'])\n", 288 | "cars_groups.mean(numeric_only=True)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [] 295 | } 296 | ], 297 | "metadata": { 298 | "kernelspec": { 299 | "display_name": "Python 3", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.10.13" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 1 318 | } 319 | -------------------------------------------------------------------------------- /notebooks/04_01b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "51445501-72bb-48f0-bfb8-715edb50087e", 6 | "metadata": {}, 7 | "source": [] 8 | }, 9 | { 10 | "cell_type": "code", 11 | "execution_count": null, 12 | "metadata": {}, 13 | "outputs": [], 14 | "source": [] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### Matplotlib's Bar Chart" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "7359f9c0-0051-48c5-91fc-5185eae9bfd0", 33 | "metadata": {}, 34 | "source": [ 35 | "### Line Plot Matplotlib" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "id": "f1c8460d-7689-406f-a6ac-2720d8cb867d", 41 | "metadata": {}, 42 | "source": [ 43 | "### Line Plot Seaborn" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "16d65d0b-34a9-4a98-8550-4cdc0dc66c25", 49 | "metadata": {}, 50 | "source": [ 51 | "### Pie Chart Matplotlib" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "4be3dfcc-1d4a-4b7e-bc57-427280da980e", 57 | "metadata": {}, 58 | "source": [ 59 | "### Pie Chart Seaborn" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kernelspec": { 65 | "display_name": "Python 3", 66 | "language": "python", 67 | "name": "python3" 68 | }, 69 | "language_info": { 70 | "codemirror_mode": { 71 | "name": "ipython", 72 | "version": 3 73 | }, 74 | "file_extension": ".py", 75 | "mimetype": "text/x-python", 76 | "name": "python", 77 | "nbconvert_exporter": "python", 78 | "pygments_lexer": "ipython3", 79 | "version": "3.8.8" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 5 84 | } 85 | -------------------------------------------------------------------------------- /notebooks/04_02b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Creating a line chart from a list object" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "#### Plotting a line chart in matplotlib" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "#### Plotting a line chart from a Pandas object" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 38 | "\n", 39 | "cars = pd.read_csv(address)\n", 40 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n", 41 | "mpg = cars['mpg']" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### Creating bar charts\n", 49 | "#### Creating a bar chart from a list" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "#### Creating bar charts from Pandas objects" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "### Creating a pie chart" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Saving a plot" 71 | ] 72 | } 73 | ], 74 | "metadata": { 75 | "anaconda-cloud": {}, 76 | "kernelspec": { 77 | "display_name": "Python 3", 78 | "language": "python", 79 | "name": "python3" 80 | }, 81 | "language_info": { 82 | "codemirror_mode": { 83 | "name": "ipython", 84 | "version": 3 85 | }, 86 | "file_extension": ".py", 87 | "mimetype": "text/x-python", 88 | "name": "python", 89 | "nbconvert_exporter": "python", 90 | "pygments_lexer": "ipython3", 91 | "version": "3.8.8" 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 1 96 | } 97 | -------------------------------------------------------------------------------- /notebooks/04_03b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "from numpy.random import randn\n", 11 | "import pandas as pd\n", 12 | "from pandas import Series, DataFrame\n", 13 | "\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from matplotlib import rcParams" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Defining axes, ticks, and grids" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Generating multiple plots in one figure with subplots " 30 | ] 31 | } 32 | ], 33 | "metadata": { 34 | "anaconda-cloud": {}, 35 | "kernelspec": { 36 | "display_name": "Python 3", 37 | "language": "python", 38 | "name": "python3" 39 | }, 40 | "language_info": { 41 | "codemirror_mode": { 42 | "name": "ipython", 43 | "version": 3 44 | }, 45 | "file_extension": ".py", 46 | "mimetype": "text/x-python", 47 | "name": "python", 48 | "nbconvert_exporter": "python", 49 | "pygments_lexer": "ipython3", 50 | "version": "3.8.8" 51 | } 52 | }, 53 | "nbformat": 4, 54 | "nbformat_minor": 1 55 | } 56 | -------------------------------------------------------------------------------- /notebooks/04_04b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from pandas import Series, DataFrame\n", 12 | "\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "from pylab import rcParams\n", 15 | "\n", 16 | "import seaborn as sb" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "%matplotlib inline\n", 26 | "rcParams['figure.figsize'] = 5, 4\n", 27 | "sb.set_style('whitegrid')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Defining plot color" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 58 | "\n", 59 | "cars = pd.read_csv(address)\n", 60 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### Customizing line styles" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### Setting plot markers" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "anaconda-cloud": {}, 80 | "kernelspec": { 81 | "display_name": "Python 3", 82 | "language": "python", 83 | "name": "python3" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.8.8" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 1 100 | } 101 | -------------------------------------------------------------------------------- /notebooks/04_05b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Creating labels and annotations" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 6, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "from pandas import Series, DataFrame\n", 19 | "\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "from pylab import rcParams\n", 22 | "import seaborn as sb" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 7, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "%matplotlib inline\n", 32 | "rcParams['figure.figsize'] = 8,4\n", 33 | "sb.set_style('whitegrid')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "source": [ 42 | "### Labeling plot features\n", 43 | "#### The functional method" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "#### The object-oriented method" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Adding a legend to your plot\n", 58 | "#### The functional method" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "#### The object-oriented method" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "source": [ 74 | "### Annotating your plot" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "anaconda-cloud": {}, 80 | "kernelspec": { 81 | "display_name": "Python 3", 82 | "language": "python", 83 | "name": "python3" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.8.8" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 1 100 | } 101 | -------------------------------------------------------------------------------- /notebooks/04_06b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Visualizing time series" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from numpy.random import randn\n", 18 | "import pandas as pd\n", 19 | "from pandas import Series, DataFrame\n", 20 | "\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from pylab import rcParams\n", 23 | "import seaborn as sb" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "%matplotlib inline\n", 33 | "rcParams['figure.figsize'] = 5, 4\n", 34 | "sb.set_style('whitegrid')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### The simplest time series plot" 42 | ] 43 | } 44 | ], 45 | "metadata": { 46 | "anaconda-cloud": {}, 47 | "kernelspec": { 48 | "display_name": "Python 3", 49 | "language": "python", 50 | "name": "python3" 51 | }, 52 | "language_info": { 53 | "codemirror_mode": { 54 | "name": "ipython", 55 | "version": 3 56 | }, 57 | "file_extension": ".py", 58 | "mimetype": "text/x-python", 59 | "name": "python", 60 | "nbconvert_exporter": "python", 61 | "pygments_lexer": "ipython3", 62 | "version": "3.8.8" 63 | } 64 | }, 65 | "nbformat": 4, 66 | "nbformat_minor": 1 67 | } 68 | -------------------------------------------------------------------------------- /notebooks/04_07b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from pandas import Series, DataFrame\n", 12 | "\n", 13 | "from pandas.plotting import scatter_matrix\n", 14 | "\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "from pylab import rcParams\n", 17 | "import seaborn as sns" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 5, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "%matplotlib inline\n", 27 | "rcParams['figure.figsize'] = 5, 4\n", 28 | "sns.set_style('whitegrid')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "### Eyeballing dataset distributions with histograms" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 6, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 45 | "\n", 46 | "cars = pd.read_csv(address)\n", 47 | "\n", 48 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n", 49 | "cars.index = cars.car_names\n" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Seeing scatterplots in action" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### Generating a scatter plot matrix\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "### Building boxplots\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [] 135 | } 136 | ], 137 | "metadata": { 138 | "anaconda-cloud": {}, 139 | "kernelspec": { 140 | "display_name": "Python 3", 141 | "language": "python", 142 | "name": "python3" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.10.13" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 1 159 | } 160 | -------------------------------------------------------------------------------- /notebooks/05_01b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Using NumPy to perform arithmetic operations on data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from numpy.random import randn" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Creating arrays\n", 25 | "### Creating arrays using a list" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Creating arrays via assignment" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "source": [ 41 | "### Multiplying matrices and basic linear algebra" 42 | ] 43 | } 44 | ], 45 | "metadata": { 46 | "kernelspec": { 47 | "display_name": "Python 3", 48 | "language": "python", 49 | "name": "python3" 50 | }, 51 | "language_info": { 52 | "codemirror_mode": { 53 | "name": "ipython", 54 | "version": 3 55 | }, 56 | "file_extension": ".py", 57 | "mimetype": "text/x-python", 58 | "name": "python", 59 | "nbconvert_exporter": "python", 60 | "pygments_lexer": "ipython3", 61 | "version": "3.8.8" 62 | } 63 | }, 64 | "nbformat": 4, 65 | "nbformat_minor": 1 66 | } 67 | -------------------------------------------------------------------------------- /notebooks/05_01e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Using NumPy to perform arithmetic operations on data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from numpy.random import randn" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "np.set_printoptions(precision=2)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Creating arrays\n", 34 | "### Creating arrays using a list" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "array([1, 2, 3, 4, 5, 6])" 46 | ] 47 | }, 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "a = np.array([1,2,3,4,5,6])\n", 55 | "a" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "array([[10, 20, 30],\n", 67 | " [40, 50, 60]])" 68 | ] 69 | }, 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "b = np.array([[10,20,30],[40,50,60]])\n", 77 | "b" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "### Creating arrays via assignment" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "array([ 8.22, 36.97, -30.23, -21.28, -34.45, -8. ])" 96 | ] 97 | }, 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "np.random.seed(25)\n", 105 | "c = 36*np.random.randn(6)\n", 106 | "c" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 6, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", 118 | " 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34])" 119 | ] 120 | }, 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "d= np.arange(1,35)\n", 128 | "d" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 7, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "array([10, 20, 30, 40, 50, 60])" 140 | ] 141 | }, 142 | "execution_count": 7, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "a*10" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 8, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "array([ 9.22, 38.97, -27.23, -17.28, -29.45, -2. ])" 160 | ] 161 | }, 162 | "execution_count": 8, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "c+a" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 9, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "array([ 7.22, 34.97, -33.23, -25.28, -39.45, -14. ])" 180 | ] 181 | }, 182 | "execution_count": 9, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "c-a" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 11, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "array([ 8.22, 73.94, -90.68, -85.13, -172.24, -48.02])" 200 | ] 201 | }, 202 | "execution_count": 11, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "c*a" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 12, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "array([ 8.22, 18.48, -10.08, -5.32, -6.89, -1.33])" 220 | ] 221 | }, 222 | "execution_count": 12, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "c/a" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": { 234 | "collapsed": true 235 | }, 236 | "source": [ 237 | "### Multiplying matrices and basic linear algebra" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [] 246 | } 247 | ], 248 | "metadata": { 249 | "kernelspec": { 250 | "display_name": "Python 3", 251 | "language": "python", 252 | "name": "python3" 253 | }, 254 | "language_info": { 255 | "codemirror_mode": { 256 | "name": "ipython", 257 | "version": 3 258 | }, 259 | "file_extension": ".py", 260 | "mimetype": "text/x-python", 261 | "name": "python", 262 | "nbconvert_exporter": "python", 263 | "pygments_lexer": "ipython3", 264 | "version": "3.10.13" 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 1 269 | } 270 | -------------------------------------------------------------------------------- /notebooks/05_02b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Generating summary statistics using pandas and scipy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "from pandas import Series, DataFrame\n", 19 | "\n", 20 | "import scipy\n", 21 | "from scipy import stats" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 31 | "\n", 32 | "cars = pd.read_csv(address)\n", 33 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Looking at summary statistics that decribe a variable's numeric values" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Looking at summary statistics that describe variable distribution" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "anaconda-cloud": {}, 53 | "kernelspec": { 54 | "display_name": "Python 3", 55 | "language": "python", 56 | "name": "python3" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 3 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython3", 68 | "version": "3.8.8" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 1 73 | } 74 | -------------------------------------------------------------------------------- /notebooks/05_02e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Generating summary statistics using pandas and scipy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "from pandas import Series, DataFrame\n", 19 | "\n", 20 | "import scipy\n", 21 | "from scipy import stats" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | "
car_namesmpgcyldisphpdratwtqsecvsamgearcarb
0Mazda RX421.06160.01103.902.62016.460144
1Mazda RX4 Wag21.06160.01103.902.87517.020144
2Datsun 71022.84108.0933.852.32018.611141
3Hornet 4 Drive21.46258.01103.083.21519.441031
4Hornet Sportabout18.78360.01753.153.44017.020032
\n", 142 | "
" 143 | ], 144 | "text/plain": [ 145 | " car_names mpg cyl disp hp drat wt qsec vs am gear \\\n", 146 | "0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 \n", 147 | "1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 \n", 148 | "2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 \n", 149 | "3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 \n", 150 | "4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 \n", 151 | "\n", 152 | " carb \n", 153 | "0 4 \n", 154 | "1 4 \n", 155 | "2 1 \n", 156 | "3 1 \n", 157 | "4 2 " 158 | ] 159 | }, 160 | "execution_count": 3, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 167 | "\n", 168 | "cars = pd.read_csv(address)\n", 169 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n", 170 | "cars.head()\n" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "### Looking at summary statistics that decribe a variable's numeric values" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 4, 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "car_names Mazda RX4Mazda RX4 WagDatsun 710Hornet 4 Drive...\n", 189 | "mpg 642.9\n", 190 | "cyl 198\n", 191 | "disp 7383.1\n", 192 | "hp 4694\n", 193 | "drat 115.09\n", 194 | "wt 102.952\n", 195 | "qsec 571.16\n", 196 | "vs 14\n", 197 | "am 13\n", 198 | "gear 118\n", 199 | "carb 90\n", 200 | "dtype: object" 201 | ] 202 | }, 203 | "execution_count": 4, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "cars.sum()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 5, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/plain": [ 220 | "0 328.980\n", 221 | "1 329.795\n", 222 | "2 259.580\n", 223 | "3 426.135\n", 224 | "4 590.310\n", 225 | "5 385.540\n", 226 | "6 656.920\n", 227 | "7 270.980\n", 228 | "8 299.570\n", 229 | "9 350.460\n", 230 | "10 349.660\n", 231 | "11 510.740\n", 232 | "12 511.500\n", 233 | "13 509.850\n", 234 | "14 728.560\n", 235 | "15 726.644\n", 236 | "16 725.695\n", 237 | "17 213.850\n", 238 | "18 195.165\n", 239 | "19 206.955\n", 240 | "20 273.775\n", 241 | "21 519.650\n", 242 | "22 506.085\n", 243 | "23 646.280\n", 244 | "24 631.175\n", 245 | "25 208.215\n", 246 | "26 272.570\n", 247 | "27 273.683\n", 248 | "28 670.690\n", 249 | "29 379.590\n", 250 | "30 694.710\n", 251 | "31 288.890\n", 252 | "dtype: float64" 253 | ] 254 | }, 255 | "execution_count": 5, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "cars.sum(axis=1, numeric_only=True)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 6, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/plain": [ 272 | "mpg 19.200\n", 273 | "cyl 6.000\n", 274 | "disp 196.300\n", 275 | "hp 123.000\n", 276 | "drat 3.695\n", 277 | "wt 3.325\n", 278 | "qsec 17.710\n", 279 | "vs 0.000\n", 280 | "am 0.000\n", 281 | "gear 4.000\n", 282 | "carb 2.000\n", 283 | "dtype: float64" 284 | ] 285 | }, 286 | "execution_count": 6, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "cars.median(numeric_only='True')" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 7, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "mpg 20.090625\n", 304 | "cyl 6.187500\n", 305 | "disp 230.721875\n", 306 | "hp 146.687500\n", 307 | "drat 3.596563\n", 308 | "wt 3.217250\n", 309 | "qsec 17.848750\n", 310 | "vs 0.437500\n", 311 | "am 0.406250\n", 312 | "gear 3.687500\n", 313 | "carb 2.812500\n", 314 | "dtype: float64" 315 | ] 316 | }, 317 | "execution_count": 7, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "cars.mean(numeric_only='True')" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 8, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/plain": [ 334 | "car_names Volvo 142E\n", 335 | "mpg 33.9\n", 336 | "cyl 8\n", 337 | "disp 472.0\n", 338 | "hp 335\n", 339 | "drat 4.93\n", 340 | "wt 5.424\n", 341 | "qsec 22.9\n", 342 | "vs 1\n", 343 | "am 1\n", 344 | "gear 5\n", 345 | "carb 8\n", 346 | "dtype: object" 347 | ] 348 | }, 349 | "execution_count": 8, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "cars.max()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 9, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "data": { 365 | "text/plain": [ 366 | "19" 367 | ] 368 | }, 369 | "execution_count": 9, 370 | "metadata": {}, 371 | "output_type": "execute_result" 372 | } 373 | ], 374 | "source": [ 375 | "mpg = cars.mpg\n", 376 | "mpg.idxmax()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "### Looking at summary statistics that describe variable distribution" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 10, 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "mpg 6.026948\n", 395 | "cyl 1.785922\n", 396 | "disp 123.938694\n", 397 | "hp 68.562868\n", 398 | "drat 0.534679\n", 399 | "wt 0.978457\n", 400 | "qsec 1.786943\n", 401 | "vs 0.504016\n", 402 | "am 0.498991\n", 403 | "gear 0.737804\n", 404 | "carb 1.615200\n", 405 | "dtype: float64" 406 | ] 407 | }, 408 | "execution_count": 10, 409 | "metadata": {}, 410 | "output_type": "execute_result" 411 | } 412 | ], 413 | "source": [ 414 | "cars.std(numeric_only='True')" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 11, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "text/plain": [ 425 | "mpg 36.324103\n", 426 | "cyl 3.189516\n", 427 | "disp 15360.799829\n", 428 | "hp 4700.866935\n", 429 | "drat 0.285881\n", 430 | "wt 0.957379\n", 431 | "qsec 3.193166\n", 432 | "vs 0.254032\n", 433 | "am 0.248992\n", 434 | "gear 0.544355\n", 435 | "carb 2.608871\n", 436 | "dtype: float64" 437 | ] 438 | }, 439 | "execution_count": 11, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "cars.var(numeric_only='True')" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 12, 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "data": { 455 | "text/plain": [ 456 | "gear\n", 457 | "3 15\n", 458 | "4 12\n", 459 | "5 5\n", 460 | "Name: count, dtype: int64" 461 | ] 462 | }, 463 | "execution_count": 12, 464 | "metadata": {}, 465 | "output_type": "execute_result" 466 | } 467 | ], 468 | "source": [ 469 | "gear = cars.gear\n", 470 | "gear.value_counts()" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 13, 476 | "metadata": {}, 477 | "outputs": [ 478 | { 479 | "data": { 480 | "text/html": [ 481 | "
\n", 482 | "\n", 495 | "\n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | "
mpgcyldisphpdratwtqsecvsamgearcarb
count32.00000032.00000032.00000032.00000032.00000032.00000032.00000032.00000032.00000032.00000032.0000
mean20.0906256.187500230.721875146.6875003.5965633.21725017.8487500.4375000.4062503.6875002.8125
std6.0269481.785922123.93869468.5628680.5346790.9784571.7869430.5040160.4989910.7378041.6152
min10.4000004.00000071.10000052.0000002.7600001.51300014.5000000.0000000.0000003.0000001.0000
25%15.4250004.000000120.82500096.5000003.0800002.58125016.8925000.0000000.0000003.0000002.0000
50%19.2000006.000000196.300000123.0000003.6950003.32500017.7100000.0000000.0000004.0000002.0000
75%22.8000008.000000326.000000180.0000003.9200003.61000018.9000001.0000001.0000004.0000004.0000
max33.9000008.000000472.000000335.0000004.9300005.42400022.9000001.0000001.0000005.0000008.0000
\n", 627 | "
" 628 | ], 629 | "text/plain": [ 630 | " mpg cyl disp hp drat wt \\\n", 631 | "count 32.000000 32.000000 32.000000 32.000000 32.000000 32.000000 \n", 632 | "mean 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 \n", 633 | "std 6.026948 1.785922 123.938694 68.562868 0.534679 0.978457 \n", 634 | "min 10.400000 4.000000 71.100000 52.000000 2.760000 1.513000 \n", 635 | "25% 15.425000 4.000000 120.825000 96.500000 3.080000 2.581250 \n", 636 | "50% 19.200000 6.000000 196.300000 123.000000 3.695000 3.325000 \n", 637 | "75% 22.800000 8.000000 326.000000 180.000000 3.920000 3.610000 \n", 638 | "max 33.900000 8.000000 472.000000 335.000000 4.930000 5.424000 \n", 639 | "\n", 640 | " qsec vs am gear carb \n", 641 | "count 32.000000 32.000000 32.000000 32.000000 32.0000 \n", 642 | "mean 17.848750 0.437500 0.406250 3.687500 2.8125 \n", 643 | "std 1.786943 0.504016 0.498991 0.737804 1.6152 \n", 644 | "min 14.500000 0.000000 0.000000 3.000000 1.0000 \n", 645 | "25% 16.892500 0.000000 0.000000 3.000000 2.0000 \n", 646 | "50% 17.710000 0.000000 0.000000 4.000000 2.0000 \n", 647 | "75% 18.900000 1.000000 1.000000 4.000000 4.0000 \n", 648 | "max 22.900000 1.000000 1.000000 5.000000 8.0000 " 649 | ] 650 | }, 651 | "execution_count": 13, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "cars.describe()" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [] 666 | } 667 | ], 668 | "metadata": { 669 | "anaconda-cloud": {}, 670 | "kernelspec": { 671 | "display_name": "Python 3", 672 | "language": "python", 673 | "name": "python3" 674 | }, 675 | "language_info": { 676 | "codemirror_mode": { 677 | "name": "ipython", 678 | "version": 3 679 | }, 680 | "file_extension": ".py", 681 | "mimetype": "text/x-python", 682 | "name": "python", 683 | "nbconvert_exporter": "python", 684 | "pygments_lexer": "ipython3", 685 | "version": "3.10.13" 686 | } 687 | }, 688 | "nbformat": 4, 689 | "nbformat_minor": 1 690 | } 691 | -------------------------------------------------------------------------------- /notebooks/05_04b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Starting with parametric methods in pandas and scipy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import seaborn as sns\n", 21 | "from pylab import rcParams\n", 22 | "\n", 23 | "import scipy\n", 24 | "from scipy.stats import pearsonr" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "%matplotlib inline\n", 34 | "rcParams['figure.figsize'] = 8, 4\n", 35 | "sns.set_style(\"whitegrid\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### The Pearson Correlation" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 52 | "\n", 53 | "cars = pd.read_csv(address)\n", 54 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### Using scipy to calculate the Pearson correlation coefficient" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Using Seaborn to visualize the Pearson correlation coefficient" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "anaconda-cloud": {}, 74 | "kernelspec": { 75 | "display_name": "Python 3", 76 | "language": "python", 77 | "name": "python3" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 3 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython3", 89 | "version": "3.8.8" 90 | } 91 | }, 92 | "nbformat": 4, 93 | "nbformat_minor": 1 94 | } 95 | -------------------------------------------------------------------------------- /notebooks/05_05b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Delving into non-parametric methods using pandas and scipy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import seaborn as sns\n", 21 | "from pylab import rcParams\n", 22 | "\n", 23 | "import scipy\n", 24 | "from scipy.stats import spearmanr" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "%matplotlib inline\n", 34 | "rcParams['figure.figsize'] = 14, 7\n", 35 | "sns.set_style(\"whitegrid\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### The Spearman Rank Correlation" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 52 | "\n", 53 | "cars = pd.read_csv(address)\n", 54 | "cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']\n", 55 | "cars.head()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### Chi-square test for independence" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "kernelspec": { 68 | "display_name": "Python 3", 69 | "language": "python", 70 | "name": "python3" 71 | }, 72 | "language_info": { 73 | "codemirror_mode": { 74 | "name": "ipython", 75 | "version": 3 76 | }, 77 | "file_extension": ".py", 78 | "mimetype": "text/x-python", 79 | "name": "python", 80 | "nbconvert_exporter": "python", 81 | "pygments_lexer": "ipython3", 82 | "version": "3.8.8" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 1 87 | } 88 | -------------------------------------------------------------------------------- /notebooks/05_06b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Extreme value analysis using univariate methods" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from pylab import rcParams" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "%matplotlib inline\n", 30 | "rcParams['figure.figsize'] = 5,4" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Identifying outliers from Tukey boxplots" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Applying Tukey outlier labeling" 45 | ] 46 | } 47 | ], 48 | "metadata": { 49 | "kernelspec": { 50 | "display_name": "Python 3", 51 | "language": "python", 52 | "name": "python3" 53 | }, 54 | "language_info": { 55 | "codemirror_mode": { 56 | "name": "ipython", 57 | "version": 3 58 | }, 59 | "file_extension": ".py", 60 | "mimetype": "text/x-python", 61 | "name": "python", 62 | "nbconvert_exporter": "python", 63 | "pygments_lexer": "ipython3", 64 | "version": "3.8.8" 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 1 69 | } 70 | -------------------------------------------------------------------------------- /notebooks/05_07b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Multivariate analysis for outlier detection" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "from pylab import rcParams\n", 20 | "import seaborn as sns" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "%matplotlib inline\n", 30 | "rcParams['figure.figsize'] = 5, 4\n", 31 | "sns.set_style('whitegrid')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### Visually inspecting boxplots" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/iris.data.csv'\n", 48 | "df= pd.read_csv(filepath_or_buffer=address, header=None, sep=',')\n", 49 | "\n", 50 | "df.columns = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species']\n", 51 | "x = df.iloc[:,0:4].values\n", 52 | "y = df.iloc[:,4].values\n", 53 | "df[:5]" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "source": [ 69 | "### Looking at the scatterplot matrix" 70 | ] 71 | } 72 | ], 73 | "metadata": { 74 | "kernelspec": { 75 | "display_name": "Python 3", 76 | "language": "python", 77 | "name": "python3" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 3 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython3", 89 | "version": "3.8.8" 90 | } 91 | }, 92 | "nbformat": 4, 93 | "nbformat_minor": 1 94 | } 95 | -------------------------------------------------------------------------------- /notebooks/06_01b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8ab9487e-00e3-47e2-9368-9a5a6f112a65", 6 | "metadata": {}, 7 | "source": [ 8 | "## Cleaning and treating categorical variables" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "6ddc4c2e-8b01-449d-b823-9394f1f96b1f", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "from pandas import DataFrame\n", 20 | "\n", 21 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "1bef47b2", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "data = {'names':['steve', 'john', 'richard', 'sarah', 'randy', 'micheal', 'julie'],\n", 32 | "'age':[20, 22, 20, 21, 24, 23, 22],\n", 33 | "'gender':['Male', 'Male', np.nan, 'Female', np.nan, 'Male', np.nan],\n", 34 | "'rank':[2, 1, 4, 5, 3, 7, 6]}" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "385b7880", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "1321636f-e6e6-4c6e-9756-e0b47dcc56a9", 48 | "metadata": {}, 49 | "source": [ 50 | "### Label Encoding" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "46c8b54e-bc23-4ef7-95a3-8b3dac69b324", 56 | "metadata": {}, 57 | "source": [ 58 | "### One Hot Encoder" 59 | ] 60 | } 61 | ], 62 | "metadata": { 63 | "kernelspec": { 64 | "display_name": "Python 3", 65 | "language": "python", 66 | "name": "python3" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.8.8" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 5 83 | } 84 | -------------------------------------------------------------------------------- /notebooks/06_02b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5d519b84-6889-4827-83d8-e99dbb2ec5df", 6 | "metadata": {}, 7 | "source": [ 8 | "## Transforming Dataset Distributions" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 3, 14 | "id": "868ec360-501a-4032-a6b7-c7fac40a78df", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from sklearn.preprocessing import MinMaxScaler, scale" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "a67f7690", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/mtcars.csv'\n", 33 | "\n", 34 | "dataset = pd.read_csv(address)\n", 35 | "dataset.head()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "id": "4c753f3f-ea35-4502-a099-168880862c3f", 41 | "metadata": {}, 42 | "source": [ 43 | "### Normalization" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "496a3803-34bb-4ddc-9dc5-5fca9b9b0fd1", 49 | "metadata": {}, 50 | "source": [ 51 | "### Standardization" 52 | ] 53 | } 54 | ], 55 | "metadata": { 56 | "kernelspec": { 57 | "display_name": "Python 3", 58 | "language": "python", 59 | "name": "python3" 60 | }, 61 | "language_info": { 62 | "codemirror_mode": { 63 | "name": "ipython", 64 | "version": 3 65 | }, 66 | "file_extension": ".py", 67 | "mimetype": "text/x-python", 68 | "name": "python", 69 | "nbconvert_exporter": "python", 70 | "pygments_lexer": "ipython3", 71 | "version": "3.8.8" 72 | } 73 | }, 74 | "nbformat": 4, 75 | "nbformat_minor": 5 76 | } 77 | -------------------------------------------------------------------------------- /notebooks/06_03b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Applied Machine Learning: Starter Problem" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 108, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "from sklearn.model_selection import train_test_split\n", 18 | "from sklearn.tree import DecisionTreeClassifier\n", 19 | "from sklearn import metrics" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "id": "6b20b117", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/iris.csv'\n", 30 | "\n", 31 | "dataset = pd.read_csv(address)\n", 32 | "dataset.head()" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "### Separating features and labels" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### Train Test Split" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Training Decision Tree Classifier" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### Evaluation metric" 61 | ] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.7.1" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 5 85 | } 86 | -------------------------------------------------------------------------------- /notebooks/06_03e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Applied Machine Learning: Starter Problem" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "from sklearn.model_selection import train_test_split\n", 18 | "from sklearn.tree import DecisionTreeClassifier\n", 19 | "from sklearn import metrics" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "4e1cf7e5", 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
Unnamed: 0Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
015.13.51.40.2setosa
124.93.01.40.2setosa
234.73.21.30.2setosa
344.63.11.50.2setosa
455.03.61.40.2setosa
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | " Unnamed: 0 Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", 109 | "0 1 5.1 3.5 1.4 0.2 setosa\n", 110 | "1 2 4.9 3.0 1.4 0.2 setosa\n", 111 | "2 3 4.7 3.2 1.3 0.2 setosa\n", 112 | "3 4 4.6 3.1 1.5 0.2 setosa\n", 113 | "4 5 5.0 3.6 1.4 0.2 setosa" 114 | ] 115 | }, 116 | "execution_count": 2, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "address = '/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/data/iris.csv'\n", 123 | "\n", 124 | "dataset = pd.read_csv(address)\n", 125 | "dataset.head()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 3, 131 | "id": "a10a8e24", 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)" 138 | ] 139 | }, 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "dataset.Species.unique()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "### Separating features and labels" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 4, 159 | "id": "0ba71df9", 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/html": [ 165 | "
\n", 166 | "\n", 179 | "\n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | "
Sepal.LengthSepal.WidthPetal.LengthPetal.Width
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
...............
1456.73.05.22.3
1466.32.55.01.9
1476.53.05.22.0
1486.23.45.42.3
1495.93.05.11.8
\n", 269 | "

150 rows × 4 columns

\n", 270 | "
" 271 | ], 272 | "text/plain": [ 273 | " Sepal.Length Sepal.Width Petal.Length Petal.Width\n", 274 | "0 5.1 3.5 1.4 0.2\n", 275 | "1 4.9 3.0 1.4 0.2\n", 276 | "2 4.7 3.2 1.3 0.2\n", 277 | "3 4.6 3.1 1.5 0.2\n", 278 | "4 5.0 3.6 1.4 0.2\n", 279 | ".. ... ... ... ...\n", 280 | "145 6.7 3.0 5.2 2.3\n", 281 | "146 6.3 2.5 5.0 1.9\n", 282 | "147 6.5 3.0 5.2 2.0\n", 283 | "148 6.2 3.4 5.4 2.3\n", 284 | "149 5.9 3.0 5.1 1.8\n", 285 | "\n", 286 | "[150 rows x 4 columns]" 287 | ] 288 | }, 289 | "execution_count": 4, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "X = dataset.iloc[:,1:5]\n", 296 | "X" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 5, 302 | "id": "b7fdb1a8", 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "0 setosa\n", 309 | "1 setosa\n", 310 | "2 setosa\n", 311 | "3 setosa\n", 312 | "4 setosa\n", 313 | " ... \n", 314 | "145 virginica\n", 315 | "146 virginica\n", 316 | "147 virginica\n", 317 | "148 virginica\n", 318 | "149 virginica\n", 319 | "Name: Species, Length: 150, dtype: object" 320 | ] 321 | }, 322 | "execution_count": 5, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "y = dataset.iloc[:,5]\n", 329 | "y" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "### Train Test Split" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 6, 342 | "id": "61c24f24", 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "### Training Decision Tree Classifier" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 7, 359 | "id": "994db922", 360 | "metadata": {}, 361 | "outputs": [ 362 | { 363 | "data": { 364 | "text/html": [ 365 | "
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 366 | ], 367 | "text/plain": [ 368 | "DecisionTreeClassifier()" 369 | ] 370 | }, 371 | "execution_count": 7, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [ 377 | "clf = DecisionTreeClassifier()\n", 378 | "clf.fit(X_train, y_train)" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 8, 384 | "id": "5d06c2ac", 385 | "metadata": {}, 386 | "outputs": [ 387 | { 388 | "data": { 389 | "text/plain": [ 390 | "array(['virginica', 'versicolor', 'setosa', 'virginica', 'setosa',\n", 391 | " 'virginica', 'setosa', 'versicolor', 'versicolor', 'versicolor',\n", 392 | " 'virginica', 'versicolor', 'versicolor', 'versicolor',\n", 393 | " 'versicolor', 'setosa', 'versicolor', 'versicolor', 'setosa',\n", 394 | " 'setosa', 'virginica', 'versicolor', 'setosa', 'setosa',\n", 395 | " 'virginica', 'setosa', 'setosa', 'versicolor', 'versicolor',\n", 396 | " 'setosa', 'virginica', 'versicolor', 'setosa', 'virginica',\n", 397 | " 'virginica', 'versicolor', 'setosa', 'virginica', 'versicolor',\n", 398 | " 'versicolor', 'virginica', 'setosa', 'virginica', 'setosa',\n", 399 | " 'setosa'], dtype=object)" 400 | ] 401 | }, 402 | "execution_count": 8, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [ 408 | "y_predict = clf.predict(X_test)\n", 409 | "y_predict" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### Evaluation metric" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 9, 422 | "id": "8415063f", 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "name": "stdout", 427 | "output_type": "stream", 428 | "text": [ 429 | "Accuracy: 0.9777777777777777\n" 430 | ] 431 | } 432 | ], 433 | "source": [ 434 | "accuracy = metrics.accuracy_score(y_test, y_predict)\n", 435 | "print(\"Accuracy:\", accuracy)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "id": "ecc7f83e", 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [] 445 | } 446 | ], 447 | "metadata": { 448 | "kernelspec": { 449 | "display_name": "Python 3", 450 | "language": "python", 451 | "name": "python3" 452 | }, 453 | "language_info": { 454 | "codemirror_mode": { 455 | "name": "ipython", 456 | "version": 3 457 | }, 458 | "file_extension": ".py", 459 | "mimetype": "text/x-python", 460 | "name": "python", 461 | "nbconvert_exporter": "python", 462 | "pygments_lexer": "ipython3", 463 | "version": "3.10.13" 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 5 468 | } 469 | -------------------------------------------------------------------------------- /notebooks/07_02b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d73346ef-024b-4e66-b8bb-8521e36d8aa0", 6 | "metadata": {}, 7 | "source": [ 8 | "## Python requests for automating data collection." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "3731af81-600b-4560-8e71-92693df00f7f", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "29d169cb-fe50-41c3-a145-3af10473fcc4", 22 | "metadata": {}, 23 | "source": [ 24 | "### Headers" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "799bc8a5-5b9f-4c93-ad74-2d56a4de8bc5", 30 | "metadata": {}, 31 | "source": [ 32 | "### Content Types" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "cd63c2df-ecca-48e6-a84d-1275d5df3d55", 38 | "metadata": {}, 39 | "source": [ 40 | "### Body/Content" 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 3", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.8.8" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 5 65 | } 66 | -------------------------------------------------------------------------------- /notebooks/07_03b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Part 1 - Objects in BeautifulSoup" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "collapsed": true 14 | }, 15 | "source": [ 16 | "### BeautifulSoup objects" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "our_html_document = '''\n", 26 | "IoT Articles\n", 27 | "\n", 28 | "

2018 Trends: Best New IoT Device Ideas for Data Scientists and Engineers

\n", 29 | "\n", 30 | "

It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use...\n", 31 | "
\n", 32 | "
\n", 33 | "It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use to get practice in designing your first IoT applications.\n", 34 | "

Looking Back at My Coolest IoT Find in 2017

\n", 35 | "Before going into detail about best new IoT device ideas, here’s the backstory. Last month Ericsson Digital invited me to tour the Ericsson Studio in Kista, Sweden. Up until that visit, IoT had been largely theoretical to me. Of course, I know the usual mumbo-jumbo about wearables and IoT-connected fitness trackers. That stuff is all well and good, but it’s somewhat old hat – plus I am not sure we are really benefiting so much from those, so I’m not that impressed.\n", 36 | "\n", 37 | "It wasn’t until I got to the Ericsson Studio that I became extremely impressed by how far IoT has really come. Relying on the promise of the 5g network expansion, IoT-powered smart devices are on the cusp of an explosive growth in adoption. It was Ericsson’s Smart Car that sent me reeling:\"Get\n", 38 | "\n", 39 | "This car is connected to Ericsson’s Connected Vehicle Cloud, an IoT platform that manages services for the Smart Cars to which it’s connected. The Volvo pictured above acts as a drop-off location for groceries that have been ordered by its owner.\n", 40 | "\n", 41 | "To understand how it works, imagine you’re pulling your normal 9-to-5 and you know you need to grab some groceries on your way home. Well, since you’re smart you’ve used Ericsson IoT platform to connect your car to the local grocery delivery service (Mat.se), so all you need to do is open the Mat.se app and make your usual order. Mat.se automatically handles the payment, grocery selection, delivery, and delivery scheduling. Since your car is IoT-enabled, Mat.se issues its trusted delivery agent a 1-time token to use for opening your car in order to place your groceries in your car for you at 4:40 pm (just before you get off from work).\n", 42 | "\n", 43 | "To watch some of the amazing IoT device demos I witnessed at Ericsson Studio, make sure to go watch the videos on this page.\n", 44 | "

Future Trends for IoT in 2018

\n", 45 | "New IoT device ideas won’t do you much good unless you at least know the basic technology trends that are set to impact IoT over the next year(s). These include:\n", 46 | "
    \n", 47 | " \t
  1. Big Data & Data Engineering: Sensors that are embedded within IoT devices spin off machine-generated data like it’s going out of style. For IoT to function, the platform must be solidly engineered to handle big data. Be assured, that requires some serious data engineering.
  2. \n", 48 | " \t
  3. Machine Learning Data Science: While a lot of IoT devices are still operated according to rules-based decision criteria, the age of artificial intelligence is upon us. IoT will increasingly depend on machine learning algorithms to control device operations so that devices are able to autonomously respond to a complex set of overlapping stimuli.
  4. \n", 49 | " \t
  5. Blockchain-Enabled Security: Above all else, IoT networks must be secure. Blockchain technology is primed to meet the security demands that come along with building and expanding the IoT.
  6. \n", 50 | "
\n", 51 | "

Best New IoT Device Ideas

\n", 52 | "This listing of new IoT device ideas has been sub-divided according to the main technology upon which the IoT devices are built. Below I’m providing a list of new IoT device ideas, but for detailed instructions on how to build these IoT applications, I recommend the IoT courses on Udemy (ß Please note: if you purchase a Udemy course through this link, I may receive a small commission), or courses that are available at SkyFi and Coursera.\n", 53 | "

Raspberry Pi IoT Ideas

\n", 54 | "Using Raspberry Pi as open-source hardware, you can build IoT applications that offer any one of the following benefits:\n", 55 | "
    \n", 56 | " \t
  1. Enable built-in sensing to build a weather station that measures ambient temperature and humidity
  2. \n", 57 | " \t
  3. Build a system that detects discrepancies in electrical readings to identify electricity theft
  4. \n", 58 | " \t
  5. Use IoT to build a Servo that is controlled by motion detection readings
  6. \n", 59 | " \t
  7. Build a smart control switch that operates devices based on external stimuli. Use this for home automation.
  8. \n", 60 | " \t
  9. Build a music playing application that enables music for each room in your house
  10. \n", 61 | " \t
  11. Implement biometrics on IoT-connected devices
  12. \n", 62 | "
\n", 63 | "

Arduino IoT Ideas

\n", 64 | "There are a number of new IoT device ideas that deploy Arduino as a microcontroller. These include:\n", 65 | "
    \n", 66 | " \t
  1. Integrate Arduino with Android to build a remote-control RGB LED device.
  2. \n", 67 | " \t
  3. Connect PIR sensors across the IoT to implement a smart building.
  4. \n", 68 | " \t
  5. Build a temperature and sunlight sensor system to remotely monitor and control the conditions of your garden.
  6. \n", 69 | " \t
  7. Deploy Arduino and IoT to automate your neighborhood streetlights.
  8. \n", 70 | " \t
  9. Build a smart irrigation system based on IoT-connected temperature and moisture sensors built-in to your agricultural plants.
  10. \n", 71 | "
\n", 72 | "[caption id=\"attachment_3807\" align=\"aligncenter\" width=\"300\"]\"\" An IoT Chatbot Tree at the Ericsson Studio[/caption]\n", 73 | "

Wireless (GSM) IoT Ideas

\n", 74 | "Several new IoT device ideas are developed around the GSM wireless network. Those are:\n", 75 | "
    \n", 76 | " \t
  1. Monitor soil moisture to automate agricultural irrigation cycles.
  2. \n", 77 | " \t
  3. Automate and control the conditions of a greenhouse.
  4. \n", 78 | " \t
  5. Enable bio-metrics to build a smart security system for your home or office building
  6. \n", 79 | " \t
  7. Build an autonomously operating fitness application that automatically makes recommendations based on motion detection and heart rate sensors that are embedded on wearable fitness trackers.
  8. \n", 80 | " \t
  9. Build a healthcare monitoring system that tracks, informs, and automatically alerts healthcare providers based on sensor readings that describe a patients vital statistics (like temperature, pulse, blood pressure, etc).
  10. \n", 81 | "
\n", 82 | "

IoT Automation Ideas

\n", 83 | "Almost all new IoT device ideas offer automation benefits, but to outline a few more ideas:\n", 84 | "
    \n", 85 | " \t
  1. Build an IoT device that automatically locates and reports the closest nearby parking spot.
  2. \n", 86 | " \t
  3. Build a motion detection system that automatically issues emails or sms messages to alert home owners of a likely home invasion.
  4. \n", 87 | " \t
  5. Use temperature sensors connected across the IoT to automatically alert you if your home windows or doors have been left open.
  6. \n", 88 | " \t
  7. Use bio-metric sensors to build a smart system that automate security for your home or office building
  8. \n", 89 | "
\n", 90 | "To learn more about IoT and what’s happening on the leading edge, be sure to pop over to Ericsson’s Studio Tour recap and watch these videos.\n", 91 | "\n", 92 | "(I captured some of this content on behalf of DevMode Strategies during an invite-only tour of the Ericsson Studio in Kista. Rest assure, the text and opinions are my own)\n", 93 | "

...

\n", 94 | "'''" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "source": [ 103 | "### Tag objects" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "collapsed": true 110 | }, 111 | "source": [ 112 | "#### Tag names" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "collapsed": true 119 | }, 120 | "source": [ 121 | "#### Tag attributes" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "#### Navigating a parse tree using tags" 129 | ] 130 | } 131 | ], 132 | "metadata": { 133 | "kernelspec": { 134 | "display_name": "Python 3", 135 | "language": "python", 136 | "name": "python3" 137 | }, 138 | "language_info": { 139 | "codemirror_mode": { 140 | "name": "ipython", 141 | "version": 3 142 | }, 143 | "file_extension": ".py", 144 | "mimetype": "text/x-python", 145 | "name": "python", 146 | "nbconvert_exporter": "python", 147 | "pygments_lexer": "ipython3", 148 | "version": "3.8.8" 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 1 153 | } 154 | -------------------------------------------------------------------------------- /notebooks/07_04b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## NavigatableString Objects" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### NavigatableString objects" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "source": [ 23 | "#### Utilizing NavigatableString objects" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "our_html_document = '''\n", 33 | "IoT Articles\n", 34 | "\n", 35 | "

2018 Trends: Best New IoT Device Ideas for Data Scientists and Engineers

\n", 36 | "\n", 37 | "

It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use...\n", 38 | "
\n", 39 | "
\n", 40 | "It’s almost 2018 and IoT is on the cusp of an explosive expansion. In this article, I offer you a listing of new IoT device ideas that you can use to get practice in designing your first IoT applications.\n", 41 | "

Looking Back at My Coolest IoT Find in 2017

\n", 42 | "Before going into detail about best new IoT device ideas, here’s the backstory. Last month Ericsson Digital invited me to tour the Ericsson Studio in Kista, Sweden. Up until that visit, IoT had been largely theoretical to me. Of course, I know the usual mumbo-jumbo about wearables and IoT-connected fitness trackers. That stuff is all well and good, but it’s somewhat old hat – plus I am not sure we are really benefiting so much from those, so I’m not that impressed.\n", 43 | "\n", 44 | "It wasn’t until I got to the Ericsson Studio that I became extremely impressed by how far IoT has really come. Relying on the promise of the 5g network expansion, IoT-powered smart devices are on the cusp of an explosive growth in adoption. It was Ericsson’s Smart Car that sent me reeling:\"Get\n", 45 | "\n", 46 | "This car is connected to Ericsson’s Connected Vehicle Cloud, an IoT platform that manages services for the Smart Cars to which it’s connected. The Volvo pictured above acts as a drop-off location for groceries that have been ordered by its owner.\n", 47 | "\n", 48 | "To understand how it works, imagine you’re pulling your normal 9-to-5 and you know you need to grab some groceries on your way home. Well, since you’re smart you’ve used Ericsson IoT platform to connect your car to the local grocery delivery service (Mat.se), so all you need to do is open the Mat.se app and make your usual order. Mat.se automatically handles the payment, grocery selection, delivery, and delivery scheduling. Since your car is IoT-enabled, Mat.se issues its trusted delivery agent a 1-time token to use for opening your car in order to place your groceries in your car for you at 4:40 pm (just before you get off from work).\n", 49 | "\n", 50 | "To watch some of the amazing IoT device demos I witnessed at Ericsson Studio, make sure to go watch the videos on this page.\n", 51 | "

Future Trends for IoT in 2018

\n", 52 | "New IoT device ideas won’t do you much good unless you at least know the basic technology trends that are set to impact IoT over the next year(s). These include:\n", 53 | "
    \n", 54 | " \t
  1. Big Data & Data Engineering: Sensors that are embedded within IoT devices spin off machine-generated data like it’s going out of style. For IoT to function, the platform must be solidly engineered to handle big data. Be assured, that requires some serious data engineering.
  2. \n", 55 | " \t
  3. Machine Learning Data Science: While a lot of IoT devices are still operated according to rules-based decision criteria, the age of artificial intelligence is upon us. IoT will increasingly depend on machine learning algorithms to control device operations so that devices are able to autonomously respond to a complex set of overlapping stimuli.
  4. \n", 56 | " \t
  5. Blockchain-Enabled Security: Above all else, IoT networks must be secure. Blockchain technology is primed to meet the security demands that come along with building and expanding the IoT.
  6. \n", 57 | "
\n", 58 | "

Best New IoT Device Ideas

\n", 59 | "This listing of new IoT device ideas has been sub-divided according to the main technology upon which the IoT devices are built. Below I’m providing a list of new IoT device ideas, but for detailed instructions on how to build these IoT applications, I recommend the IoT courses on Udemy (ß Please note: if you purchase a Udemy course through this link, I may receive a small commission), or courses that are available at SkyFi and Coursera.\n", 60 | "

Raspberry Pi IoT Ideas

\n", 61 | "Using Raspberry Pi as open-source hardware, you can build IoT applications that offer any one of the following benefits:\n", 62 | "
    \n", 63 | " \t
  1. Enable built-in sensing to build a weather station that measures ambient temperature and humidity
  2. \n", 64 | " \t
  3. Build a system that detects discrepancies in electrical readings to identify electricity theft
  4. \n", 65 | " \t
  5. Use IoT to build a Servo that is controlled by motion detection readings
  6. \n", 66 | " \t
  7. Build a smart control switch that operates devices based on external stimuli. Use this for home automation.
  8. \n", 67 | " \t
  9. Build a music playing application that enables music for each room in your house
  10. \n", 68 | " \t
  11. Implement biometrics on IoT-connected devices
  12. \n", 69 | "
\n", 70 | "

Arduino IoT Ideas

\n", 71 | "There are a number of new IoT device ideas that deploy Arduino as a microcontroller. These include:\n", 72 | "
    \n", 73 | " \t
  1. Integrate Arduino with Android to build a remote-control RGB LED device.
  2. \n", 74 | " \t
  3. Connect PIR sensors across the IoT to implement a smart building.
  4. \n", 75 | " \t
  5. Build a temperature and sunlight sensor system to remotely monitor and control the conditions of your garden.
  6. \n", 76 | " \t
  7. Deploy Arduino and IoT to automate your neighborhood streetlights.
  8. \n", 77 | " \t
  9. Build a smart irrigation system based on IoT-connected temperature and moisture sensors built-in to your agricultural plants.
  10. \n", 78 | "
\n", 79 | "[caption id=\"attachment_3807\" align=\"aligncenter\" width=\"300\"]\"\" An IoT Chatbot Tree at the Ericsson Studio[/caption]\n", 80 | "

Wireless (GSM) IoT Ideas

\n", 81 | "Several new IoT device ideas are developed around the GSM wireless network. Those are:\n", 82 | "
    \n", 83 | " \t
  1. Monitor soil moisture to automate agricultural irrigation cycles.
  2. \n", 84 | " \t
  3. Automate and control the conditions of a greenhouse.
  4. \n", 85 | " \t
  5. Enable bio-metrics to build a smart security system for your home or office building
  6. \n", 86 | " \t
  7. Build an autonomously operating fitness application that automatically makes recommendations based on motion detection and heart rate sensors that are embedded on wearable fitness trackers.
  8. \n", 87 | " \t
  9. Build a healthcare monitoring system that tracks, informs, and automatically alerts healthcare providers based on sensor readings that describe a patients vital statistics (like temperature, pulse, blood pressure, etc).
  10. \n", 88 | "
\n", 89 | "

IoT Automation Ideas

\n", 90 | "Almost all new IoT device ideas offer automation benefits, but to outline a few more ideas:\n", 91 | "
    \n", 92 | " \t
  1. Build an IoT device that automatically locates and reports the closest nearby parking spot.
  2. \n", 93 | " \t
  3. Build a motion detection system that automatically issues emails or sms messages to alert home owners of a likely home invasion.
  4. \n", 94 | " \t
  5. Use temperature sensors connected across the IoT to automatically alert you if your home windows or doors have been left open.
  6. \n", 95 | " \t
  7. Use bio-metric sensors to build a smart system that automate security for your home or office building
  8. \n", 96 | "
\n", 97 | "To learn more about IoT and what’s happening on the leading edge, be sure to pop over to Ericsson’s Studio Tour recap and watch these videos.\n", 98 | "\n", 99 | "(I captured some of this content on behalf of DevMode Strategies during an invite-only tour of the Ericsson Studio in Kista. Rest assure, the text and opinions are my own)\n", 100 | "

...

\n", 101 | "'''" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.8.8" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 1 133 | } 134 | -------------------------------------------------------------------------------- /notebooks/07_05b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Data parsing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from bs4 import BeautifulSoup\n", 17 | "\n", 18 | "import urllib\n", 19 | "import urllib.request\n", 20 | "import re" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### Parsing your data" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Getting data from a parse tree" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Searching and retrieving data from a parse tree\n", 42 | "\n", 43 | "#### Introducing the 'find_all()' method\n", 44 | "The find_all() method searchs a tag and its descendants to retrieve tags or strings that match your filters. \n", 45 | "\n", 46 | "#### Search and filtering a parse tree\n", 47 | "There are many different ways to access tags and strings within a parse tree. In this segment I am going to show you the following methods:\n", 48 | "- Name argument - Search for tags by filtering based on tag name\n", 49 | "- Keyword argument - Search for tags by filtering based on tag attribute\n", 50 | "- String argument - Search for tags by filtering based on an exact string\n", 51 | "- Lists - Search for tags by filtering based on lists\n", 52 | "- Boolean values- Search for tags by filtering based on a Boolean value\n", 53 | "- Strings - Search for weblinks by filtering based on string objects\n", 54 | "- Regular expressions - Search for tags and strings by filtering based on regular expression\n", 55 | "\n", 56 | "You can pass any of these argument into the find_all() method to use as filters and return either strings or tags.\n", 57 | "\n", 58 | "##### Retrieving tags by filtering with name arguments\n", 59 | "- Name argument - Search for tags by filtering based on tag name" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "##### Retrieving tags by filtering with keyword arguments\n", 67 | "- Keyword argument - Search for tags by filtering based on tag attribute" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "##### Retrieving tags by filtering with string arguments\n", 75 | "- String argument - Search for tags by filtering based on an exact string" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "##### Retrieving tags by filtering with list objects\n", 83 | "- Lists - Search for tags by filtering based on lists" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "##### Retrieving tags by filtering with regular expressions\n", 91 | "- Regular expressions - Search for tags and strings by filtering based on regular expression" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "##### Retrieving tags by filtering with a Boolean value\n", 99 | "- Boolean values- Search for tags by filtering based on a Boolean value" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "##### Retrieving weblinks by filtering with string objects\n", 107 | "- Strings - Search for weblinks by filtering based on string objects" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "##### Retrieving strings by filtering with regular expressions\n", 115 | "- Regular expressions - Search for tags and strings by filtering based on regular expression" 116 | ] 117 | } 118 | ], 119 | "metadata": { 120 | "kernelspec": { 121 | "display_name": "Python 3", 122 | "language": "python", 123 | "name": "python3" 124 | }, 125 | "language_info": { 126 | "codemirror_mode": { 127 | "name": "ipython", 128 | "version": 3 129 | }, 130 | "file_extension": ".py", 131 | "mimetype": "text/x-python", 132 | "name": "python", 133 | "nbconvert_exporter": "python", 134 | "pygments_lexer": "ipython3", 135 | "version": "3.8.8" 136 | } 137 | }, 138 | "nbformat": 4, 139 | "nbformat_minor": 1 140 | } 141 | -------------------------------------------------------------------------------- /notebooks/07_06b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Web scraping" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from bs4 import BeautifulSoup\n", 17 | "import urllib.request\n", 18 | "from IPython.display import HTML\n", 19 | "import re" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "### Scraping a webpage and saving your results\n" 27 | ] 28 | } 29 | ], 30 | "metadata": { 31 | "kernelspec": { 32 | "display_name": "Python 3", 33 | "language": "python", 34 | "name": "python3" 35 | }, 36 | "language_info": { 37 | "codemirror_mode": { 38 | "name": "ipython", 39 | "version": 3 40 | }, 41 | "file_extension": ".py", 42 | "mimetype": "text/x-python", 43 | "name": "python", 44 | "nbconvert_exporter": "python", 45 | "pygments_lexer": "ipython3", 46 | "version": "3.8.8" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 1 51 | } 52 | -------------------------------------------------------------------------------- /notebooks/07_06e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Web scraping" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from bs4 import BeautifulSoup\n", 17 | "import urllib.request\n", 18 | "from IPython.display import HTML\n", 19 | "import re" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "bs4.BeautifulSoup" 31 | ] 32 | }, 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "r = urllib.request.urlopen('https://analytics.usa.gov').read()\n", 40 | "soup = BeautifulSoup(r, 'html.parser')\n", 41 | "type(soup)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### Scraping a webpage and saving your results\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 446 | " \n", 447 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " API\n", 489 | "Download the full dataset.\n", 490 | "Download the full dataset.\n", 491 | "Digital Analytics Program\n", 492 | "does not track individuals\n", 493 | "anonymizes the IP addresses\n", 494 | "520 executive branch government second level domains\n", 495 | "about 6,800 total hostnames\n", 496 | "API\n", 497 | "\n", 498 | "\"Github\n", 499 | " Suggest a feature or report an issue\n", 500 | " \n", 501 | "\n", 502 | "\"Github\n", 503 | " View application code on GitHub\n", 504 | "\n", 505 | "\"Github\n", 506 | " View code for the data on GitHub\n", 507 | "\n", 508 | "\"GSA\"\n", 509 | "\n", 510 | "Digital Analytics Program\n", 511 | "cloud.gov\n" 512 | ] 513 | } 514 | ], 515 | "source": [ 516 | "for link in soup.find_all('a', attrs = {'href': re.compile('^http')}):\n", 517 | " print(link)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 8, 523 | "metadata": {}, 524 | "outputs": [ 525 | { 526 | "data": { 527 | "text/plain": [ 528 | "bs4.element.Tag" 529 | ] 530 | }, 531 | "execution_count": 8, 532 | "metadata": {}, 533 | "output_type": "execute_result" 534 | } 535 | ], 536 | "source": [ 537 | "type(link)" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": 9, 543 | "metadata": {}, 544 | "outputs": [ 545 | { 546 | "name": "stdout", 547 | "output_type": "stream", 548 | "text": [ 549 | "API\n", 550 | "Download the full dataset.\n", 551 | "Download the full dataset.\n", 552 | "Digital Analytics Program\n", 553 | "does not track individuals\n", 554 | "anonymizes the IP addresses\n", 555 | "520 executive branch government second level domains\n", 556 | "about 6,800 total hostnames\n", 557 | "API\n", 558 | "\n", 559 | "\"Github\n", 560 | " Suggest a feature or report an issue\n", 561 | " \n", 562 | "\n", 563 | "\"Github\n", 564 | " View application code on GitHub\n", 565 | "\n", 566 | "\"Github\n", 567 | " View code for the data on GitHub\n", 568 | "\n", 569 | "\"GSA\"\n", 570 | "\n", 571 | "Digital Analytics Program\n", 572 | "cloud.gov\n" 573 | ] 574 | } 575 | ], 576 | "source": [ 577 | "file = open('parsed_data.txt', 'w')\n", 578 | "for link in soup.find_all('a', attrs = {'href': re.compile('^http')}):\n", 579 | " soup_link = str(link)\n", 580 | " print(soup_link)\n", 581 | " file.write(soup_link)\n", 582 | "file.flush()\n", 583 | "file.close()" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 10, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/plain": [ 594 | "'/workspaces/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/notebooks'" 595 | ] 596 | }, 597 | "execution_count": 10, 598 | "metadata": {}, 599 | "output_type": "execute_result" 600 | } 601 | ], 602 | "source": [ 603 | "%pwd" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": {}, 610 | "outputs": [], 611 | "source": [] 612 | } 613 | ], 614 | "metadata": { 615 | "kernelspec": { 616 | "display_name": "Python 3", 617 | "language": "python", 618 | "name": "python3" 619 | }, 620 | "language_info": { 621 | "codemirror_mode": { 622 | "name": "ipython", 623 | "version": 3 624 | }, 625 | "file_extension": ".py", 626 | "mimetype": "text/x-python", 627 | "name": "python", 628 | "nbconvert_exporter": "python", 629 | "pygments_lexer": "ipython3", 630 | "version": "3.10.13" 631 | } 632 | }, 633 | "nbformat": 4, 634 | "nbformat_minor": 1 635 | } 636 | -------------------------------------------------------------------------------- /notebooks/07_07b.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7252adbf-4476-49ed-b1bb-c51604a2e729", 6 | "metadata": {}, 7 | "source": [ 8 | "## Asynchronous scraping" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 7, 14 | "id": "cb191576-cfe0-4823-aeb8-2ce0ed7a1cb3", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import aiohttp\n", 19 | "import asyncio\n", 20 | "from bs4 import BeautifulSoup\n", 21 | "import csv\n", 22 | "import re" 23 | ] 24 | } 25 | ], 26 | "metadata": { 27 | "kernelspec": { 28 | "display_name": "Python 3", 29 | "language": "python", 30 | "name": "python3" 31 | }, 32 | "language_info": { 33 | "codemirror_mode": { 34 | "name": "ipython", 35 | "version": 3 36 | }, 37 | "file_extension": ".py", 38 | "mimetype": "text/x-python", 39 | "name": "python", 40 | "nbconvert_exporter": "python", 41 | "pygments_lexer": "ipython3", 42 | "version": "3.8.8" 43 | } 44 | }, 45 | "nbformat": 4, 46 | "nbformat_minor": 5 47 | } 48 | -------------------------------------------------------------------------------- /notebooks/07_07e.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7252adbf-4476-49ed-b1bb-c51604a2e729", 6 | "metadata": {}, 7 | "source": [ 8 | "## Asynchronous scraping" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 11, 14 | "id": "74ea2bda", 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Requirement already satisfied: aiohttp in /usr/local/python/3.10.13/lib/python3.10/site-packages (3.9.1)\n", 22 | "Requirement already satisfied: attrs>=17.3.0 in /home/codespace/.local/lib/python3.10/site-packages (from aiohttp) (23.1.0)\n", 23 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (6.0.4)\n", 24 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (1.9.4)\n", 25 | "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (1.4.0)\n", 26 | "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (1.3.1)\n", 27 | "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/python/3.10.13/lib/python3.10/site-packages (from aiohttp) (4.0.3)\n", 28 | "Requirement already satisfied: idna>=2.0 in /home/codespace/.local/lib/python3.10/site-packages (from yarl<2.0,>=1.0->aiohttp) (3.4)\n", 29 | "Note: you may need to restart the kernel to use updated packages.\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "pip install aiohttp" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 12, 40 | "id": "c5ec3398", 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "Collecting asyncio\n", 48 | " Downloading asyncio-3.4.3-py3-none-any.whl (101 kB)\n", 49 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.8/101.8 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", 50 | "\u001b[?25hInstalling collected packages: asyncio\n", 51 | "Successfully installed asyncio-3.4.3\n", 52 | "Note: you may need to restart the kernel to use updated packages.\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "pip install asyncio" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 13, 63 | "id": "cb191576-cfe0-4823-aeb8-2ce0ed7a1cb3", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "import aiohttp\n", 68 | "import asyncio\n", 69 | "from bs4 import BeautifulSoup\n", 70 | "import csv\n", 71 | "import re" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 14, 77 | "id": "eea5fbf1", 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "Requirement already satisfied: nest-asyncio in /home/codespace/.local/lib/python3.10/site-packages (1.5.8)\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "!pip install nest-asyncio\n", 90 | "import nest_asyncio\n", 91 | "nest_asyncio.apply()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 46, 97 | "id": "fa025c7d", 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "async def scrap_and_save_links(text):\n", 102 | " soup = BeautifulSoup(text, 'html.parser')\n", 103 | " file = open('csv_file', 'a', newline='')\n", 104 | " writer= csv.writer(file, delimiter=',')\n", 105 | " for link in soup.findAll('a', attrs={'href': re.compile('^http')}):\n", 106 | " link = link.get('href')\n", 107 | " writer.writerow([link])\n", 108 | " file.close()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 47, 114 | "id": "c2176161", 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "async def fetch(session, url):\n", 119 | " try:\n", 120 | " async with session.get(url) as response:\n", 121 | " text= await response.text()\n", 122 | " task = asyncio.create_task(scrap_and_save_links(text))\n", 123 | " await task\n", 124 | " except Exception as e:\n", 125 | " print(str(e))" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 48, 131 | "id": "26ead7cb", 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "async def scrap(urls):\n", 136 | " tasks = []\n", 137 | " async with aiohttp.ClientSession() as session:\n", 138 | " for url in urls:\n", 139 | " tasks.append(fetch(session,url))\n", 140 | " await asyncio.gather(*tasks)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 49, 146 | "id": "039cea48", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "urls = ['https://analytics.usa.gov/', 'https://www.python.org/', 'https://www.linkedin.com/']\n", 151 | "asyncio.run(scrap(urls=urls))" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "e2ba7d90", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "Python 3", 166 | "language": "python", 167 | "name": "python3" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.10.13" 180 | } 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 5 184 | } 185 | -------------------------------------------------------------------------------- /notebooks/parsed_data.txt: -------------------------------------------------------------------------------- 1 | APIDownload the full dataset.Download the full dataset.Digital Analytics Programdoes not track individualsanonymizes the IP addresses520 executive branch government second level domainsabout 6,800 total hostnamesAPI 2 | Github Icon 3 | Suggest a feature or report an issue 4 | 5 | Github Icon 6 | View application code on GitHub 7 | Github Icon 8 | View code for the data on GitHub 9 | GSA 10 | Digital Analytics Programcloud.gov -------------------------------------------------------------------------------- /notebooks/pie_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/notebooks/pie_chart.png -------------------------------------------------------------------------------- /notebooks/test: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Specify Python package requirements for your project here (e.g., Mako==1.1.1). If your project doesn't require these, you can leave this file unchanged or delete it. 2 | -------------------------------------------------------------------------------- /streamlit/08_02b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_02b.py -------------------------------------------------------------------------------- /streamlit/08_02e.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | st.write('Hello World!') -------------------------------------------------------------------------------- /streamlit/08_03b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_03b.py -------------------------------------------------------------------------------- /streamlit/08_03e.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import streamlit as st 4 | import matplotlib.pyplot as plt 5 | 6 | col_names = ["column1","column2","column3"] 7 | 8 | data = pd.DataFrame(np.random.randint(30, size=(30, 3)),columns=col_names) 9 | 10 | 'line graph:' 11 | st.line_chart(data) 12 | 13 | 'bar graph:' 14 | st.bar_chart(data) 15 | 16 | animals = ['cat', 'cow', 'dog'] 17 | heights = [30, 150, 80] 18 | 19 | 'pie chart:' 20 | fig, ax = plt.subplots() 21 | ax.pie(heights,labels=animals) 22 | 23 | st.pyplot(fig) 24 | -------------------------------------------------------------------------------- /streamlit/08_04b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_04b.py -------------------------------------------------------------------------------- /streamlit/08_04e.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import pandas as pd 4 | import streamlit as st 5 | import matplotlib.pyplot as plt 6 | 7 | rows = np.random.randn(1,1) 8 | 9 | 'Growing Line Chart:' 10 | chart = st.line_chart(rows) 11 | 12 | for i in range(1, 100): 13 | new_rows = rows[0] + np.random.randn(1,1) 14 | chart.add_rows(new_rows) 15 | rows= new_rows 16 | time.sleep(0.05) 17 | 18 | 19 | values = np.random.rand(10) 20 | 'matplotlibs Line Chart:' 21 | fig, ax = plt.subplots() 22 | ax.plot(values) 23 | st.pyplot(fig) 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /streamlit/08_05b.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /streamlit/08_05e.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import streamlit as st 4 | import matplotlib.pyplot as plt 5 | 6 | animals = ['cat', 'cow', 'dog', 'goat'] 7 | heights = [30, 150, 80, 60] 8 | weights = [5, 400, 40, 50] 9 | 10 | fig, ax = plt.subplots() 11 | 12 | x = np.arange(len(heights)) 13 | width = 0.40 14 | 15 | ax.bar(x-0.2, heights, width, color='red') 16 | ax.bar(x+0.2, weights, width, color='orange') 17 | 18 | ax.legend(['height', 'weight']) 19 | ax.set_xticks(x) 20 | ax.set_xticklabels(animals) 21 | 22 | st.pyplot(fig) 23 | 24 | explode = [0.2, 0.1, 0.1, 0.1] 25 | plot_pie, ax = plt.subplots() 26 | ax.pie(heights, explode = explode, labels=animals, autopct='%1.1f%%', shadow = True) 27 | ax.axis('equal') 28 | st.pyplot(plot_pie) -------------------------------------------------------------------------------- /streamlit/08_06b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInLearning/python-for-data-science-and-machine-learning-essential-training-part-1-3006708/ba4a06149dcd9e5592eae5c662a97787aab43880/streamlit/08_06b.py -------------------------------------------------------------------------------- /streamlit/08_06e.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | from sklearn.datasets import load_iris 6 | 7 | iris_data = load_iris() 8 | 9 | data = pd.DataFrame(iris_data.data, columns = iris_data.feature_names) 10 | 11 | fig = plt.figure() 12 | sns.histplot(data=data, bins=20) 13 | st.pyplot(fig) 14 | 15 | fig = plt.figure() 16 | sns.boxplot(data=data) 17 | st.pyplot(fig) 18 | 19 | fig =plt.figure() 20 | sns.scatterplot(data=data) 21 | st.pyplot(fig) -------------------------------------------------------------------------------- /test: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------