├── .github ├── dependabot.yml └── workflows │ ├── autopep8.yml │ ├── bandit.yml │ ├── codeql.yml │ └── scorecard.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MLOps_Professional ├── lab1 │ ├── env │ │ └── conda.yml │ └── sample │ │ ├── data_model.py │ │ ├── dockerfile │ │ ├── maintenance.py │ │ ├── requirements.txt │ │ └── serve.py ├── lab2 │ └── README.md ├── lab3 │ ├── env │ │ └── conda.yml │ └── sample │ │ ├── data_model.py │ │ ├── generate_data.py │ │ ├── serve.py │ │ └── train.py ├── lab4 │ ├── env │ │ └── conda.yml │ └── sample │ │ ├── data_model.py │ │ ├── generate_data.py │ │ ├── inference.py │ │ ├── serve.py │ │ └── train.py ├── lab5 │ ├── env │ │ └── README.md │ └── sample │ │ └── IntelPyTorch_Optimizations.py ├── lab6 │ ├── env │ │ └── conda.yml │ └── sample │ │ └── Falcon_HF_Pipelines.py ├── lab7 │ ├── env │ │ └── conda.yml │ └── sample │ │ ├── data_model.py │ │ ├── generate_data.py │ │ ├── inference.py │ │ ├── serve.py │ │ └── train.py ├── lab8 │ ├── README.md │ └── sample │ │ ├── Dockerfile │ │ ├── PickerBot.py │ │ ├── __init__.py │ │ ├── model.py │ │ ├── models │ │ └── pickerbot │ │ │ └── README.md │ │ ├── requirements.txt │ │ └── serve.py ├── lab9 │ ├── README.md │ └── sample │ │ ├── Part I - Leveraging Intel Optimizations with Hugging Face for Enhanced Model Performance.ipynb │ │ └── Part II - Uploading and Sharing Models on Hugging Face Hub with Intel Optimizations.ipynb ├── mlops_capstone │ ├── README.md │ ├── app_frontend │ │ ├── Dockerfile │ │ ├── Home.py │ │ ├── README.md │ │ ├── assets │ │ │ ├── robot_arm.png │ │ │ └── stats.png │ │ ├── pages │ │ │ ├── Monitoring.py │ │ │ └── RoboMaintenance.py │ │ └── requirements.txt │ ├── robot_maintenance │ │ ├── Dockerfile │ │ ├── README.md │ │ └── src │ │ │ ├── __init__.py │ │ │ ├── data_model.py │ │ │ ├── inference.py │ │ │ ├── requirements.txt │ │ │ ├── serve.py │ │ │ ├── train.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── generate_data.py │ └── setup │ │ ├── Makefile │ │ └── README.md └── workshops │ ├── 01_model_development_basics │ └── model_development_basics.ipynb │ └── 02_llm_pipelines │ └── llm_pipelines.ipynb ├── README.md └── SECURITY.md /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "MLOps_Professional/lab1/sample/" # Location of package manifests 10 | schedule: 11 | interval: "monthly" 12 | - package-ecosystem: "pip" # See documentation for possible values 13 | directory: "MLOps_Professional/lab8/sample/" # Location of package manifests 14 | schedule: 15 | interval: "monthly" 16 | - package-ecosystem: "pip" # See documentation for possible values 17 | directory: "MLOps_Professional/mlops_capstone/robot_maintenance/src/" # Location of package manifests 18 | schedule: 19 | interval: "monthly" 20 | - package-ecosystem: "pip" # See documentation for possible values 21 | directory: "MLOps_Professional/mlops_capstone/app_frontend/" # Location of package manifests 22 | schedule: 23 | interval: "monthly" 24 | -------------------------------------------------------------------------------- /.github/workflows/autopep8.yml: -------------------------------------------------------------------------------- 1 | name: Check code formatting with autopep8 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ "main" ] 7 | pull_request: 8 | # The branches below must be a subset of the branches above 9 | branches: [ "main" ] 10 | 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | python-code-format: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Harden Runner 20 | uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 21 | with: 22 | egress-policy: audit 23 | 24 | - name: Checkout code 25 | uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 26 | 27 | - name: Setup python 28 | uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 29 | with: 30 | python-version: "3.10" 31 | architecture: "x64" 32 | 33 | - name: Display Python Version 34 | run: python --version 35 | 36 | - name: Install packages 37 | run: pip install autopep8 38 | 39 | - name: Check code formatting style 40 | run: | 41 | autopep8 --exit-code --recursive --in-place --aggressive --aggressive . --verbose --max-line-length 200 42 | -------------------------------------------------------------------------------- /.github/workflows/bandit.yml: -------------------------------------------------------------------------------- 1 | name: Bandit Scan 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ "main" ] 9 | schedule: 10 | - cron: '40 5 * * 5' 11 | workflow_dispatch: 12 | 13 | 14 | permissions: 15 | contents: read 16 | 17 | jobs: 18 | bandit_scan: 19 | runs-on: ubuntu-latest 20 | permissions: 21 | security-events: write 22 | steps: 23 | - name: Harden Runner 24 | uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 25 | with: 26 | egress-policy: audit 27 | 28 | - name: Perform Bandit Analysis 29 | uses: PyCQA/bandit-action@8a1b30610f61f3f792fe7556e888c9d7dffa52de # v1.0.0 30 | with: 31 | severity: "all" 32 | confidence: "all" 33 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL Advanced" 13 | 14 | on: 15 | workflow_dispatch: 16 | push: 17 | branches: [ "main" ] 18 | pull_request: 19 | branches: [ "main" ] 20 | schedule: 21 | - cron: '25 0 * * 2' 22 | 23 | permissions: 24 | contents: read 25 | 26 | jobs: 27 | analyze: 28 | name: Analyze (${{ matrix.language }}) 29 | # Runner size impacts CodeQL analysis time. To learn more, please see: 30 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 31 | # - https://gh.io/supported-runners-and-hardware-resources 32 | # - https://gh.io/using-larger-runners (GitHub.com only) 33 | # Consider using larger runners or machines with greater resources for possible analysis time improvements. 34 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 35 | permissions: 36 | # required for all workflows 37 | security-events: write 38 | 39 | # required to fetch internal or private CodeQL packs 40 | packages: read 41 | 42 | # only required for workflows in private repositories 43 | actions: read 44 | contents: read 45 | 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | include: 50 | - language: python 51 | build-mode: none 52 | # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' 53 | # Use `c-cpp` to analyze code written in C, C++ or both 54 | # Use 'java-kotlin' to analyze code written in Java, Kotlin or both 55 | # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 56 | # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, 57 | # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. 58 | # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how 59 | # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages 60 | steps: 61 | - name: Harden Runner 62 | uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 63 | with: 64 | egress-policy: audit 65 | 66 | - name: Checkout repository 67 | uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 68 | 69 | # Initializes the CodeQL tools for scanning. 70 | - name: Initialize CodeQL 71 | uses: github/codeql-action/init@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 72 | with: 73 | languages: ${{ matrix.language }} 74 | build-mode: ${{ matrix.build-mode }} 75 | # If you wish to specify custom queries, you can do so here or in a config file. 76 | # By default, queries listed here will override any specified in a config file. 77 | # Prefix the list here with "+" to use these queries and those in the config file. 78 | 79 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 80 | # queries: security-extended,security-and-quality 81 | 82 | # If the analyze step fails for one of the languages you are analyzing with 83 | # "We were unable to automatically build your code", modify the matrix above 84 | # to set the build mode to "manual" for that language. Then modify this step 85 | # to build your code. 86 | # ℹ️ Command-line programs to run using the OS shell. 87 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 88 | - if: matrix.build-mode == 'manual' 89 | shell: bash 90 | run: | 91 | echo 'If you are using a "manual" build mode for one or more of the' \ 92 | 'languages you are analyzing, replace this with the commands to build' \ 93 | 'your code, for example:' 94 | echo ' make bootstrap' 95 | echo ' make release' 96 | exit 1 97 | 98 | - name: Perform CodeQL Analysis 99 | uses: github/codeql-action/analyze@294a9d92911152fe08befb9ec03e240add280cb3 # v3.26.8 100 | with: 101 | category: "/language:${{matrix.language}}" 102 | - name: Generate Security Report 103 | uses: rsdmike/github-security-report-action@a149b24539044c92786ec39af8ba38c93496495d # v3.0.4 104 | with: 105 | template: report 106 | token: ${{ secrets.CODEQL_GENERATE_SECURITY_REPORT_TOKEN_2 }} 107 | - name: GitHub Upload Release Artifacts 108 | uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 109 | with: 110 | name: report 111 | path: | 112 | ./report.pdf 113 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # Scorecard analysis, looking for vulnerabilities and bad practices in the repo. 2 | name: Scorecard supply-chain security 3 | on: 4 | branch_protection_rule: 5 | workflow_dispatch: 6 | schedule: 7 | - cron: '45 22 * * 5' 8 | push: 9 | branches: [ "main" ] 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | analysis: 15 | name: Scorecard analysis 16 | runs-on: ubuntu-latest 17 | permissions: 18 | security-events: write 19 | id-token: write 20 | 21 | steps: 22 | - name: Harden Runner 23 | uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 24 | with: 25 | egress-policy: audit 26 | 27 | - name: "Checkout code" 28 | uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 29 | with: 30 | persist-credentials: false 31 | 32 | - name: "Run analysis" 33 | uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 34 | with: 35 | results_file: scorecard_results.sarif 36 | results_format: sarif 37 | publish_results: true 38 | 39 | - name: "Upload artifact" 40 | uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # 4.3.6 41 | with: 42 | name: Scorecard results 43 | path: scorecard_results.sarif 44 | retention-days: 5 45 | 46 | # Upload the results to GitHub's code scanning dashboard. 47 | - name: "Upload to code-scanning" 48 | uses: github/codeql-action/upload-sarif@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8 49 | with: 50 | sarif_file: scorecard_results.sarif 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .DS* 3 | # Python 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # Conda 9 | .env/ 10 | .conda/ 11 | conda_env/ 12 | .ipynb_checkpoints/ -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | CommunityCodeOfConduct AT intel DOT com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ### License 4 | 5 | is licensed under the terms in [LICENSE]. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. 6 | 7 | ### Sign your work 8 | 9 | Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify 10 | the below (from [developercertificate.org](http://developercertificate.org/)): 11 | 12 | ``` 13 | Developer Certificate of Origin 14 | Version 1.1 15 | 16 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 17 | 660 York Street, Suite 102, 18 | San Francisco, CA 94110 USA 19 | 20 | Everyone is permitted to copy and distribute verbatim copies of this 21 | license document, but changing it is not allowed. 22 | 23 | Developer's Certificate of Origin 1.1 24 | 25 | By making a contribution to this project, I certify that: 26 | 27 | (a) The contribution was created in whole or in part by me and I 28 | have the right to submit it under the open source license 29 | indicated in the file; or 30 | 31 | (b) The contribution is based upon previous work that, to the best 32 | of my knowledge, is covered under an appropriate open source 33 | license and I have the right under that license to submit that 34 | work with modifications, whether created in whole or in part 35 | by me, under the same open source license (unless I am 36 | permitted to submit under a different license), as indicated 37 | in the file; or 38 | 39 | (c) The contribution was provided directly to me by some other 40 | person who certified (a), (b) or (c) and I have not modified 41 | it. 42 | 43 | (d) I understand and agree that this project and the contribution 44 | are public and that a record of the contribution (including all 45 | personal information I submit with it, including my sign-off) is 46 | maintained indefinitely and may be redistributed consistent with 47 | this project or the open source license(s) involved. 48 | ``` 49 | 50 | Then you just add a line to every git commit message: 51 | 52 | Signed-off-by: Joe Smith 53 | 54 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 55 | 56 | If you set your `user.name` and `user.email` git configs, you can sign your 57 | commit automatically with `git commit -s`. 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022, Intel Corporation 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of Intel Corporation nor the names of its contributors 12 | may be used to endorse or promote products derived from this software 13 | without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /MLOps_Professional/lab1/env/conda.yml: -------------------------------------------------------------------------------- 1 | name: lab1 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9.13 6 | - pip: 7 | - fastapi==0.103.0 8 | - uvicorn==0.23.2 9 | - pydantic==2.3.0 10 | -------------------------------------------------------------------------------- /MLOps_Professional/lab1/sample/data_model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class MaintenancePayload(BaseModel): 5 | """ 6 | Model for representing maintenance data. 7 | 8 | Attributes: 9 | temperature (int): The temperature value. 10 | """ 11 | 12 | temperature: int 13 | -------------------------------------------------------------------------------- /MLOps_Professional/lab1/sample/dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.8@sha256:d411270700143fa2683cc8264d9fa5d3279fd3b6afff62ae81ea2f9d070e390c 2 | 3 | # Create a non-root user 4 | RUN useradd -m nonrootuser 5 | 6 | # Switch to the non-root user 7 | USER nonrootuser 8 | 9 | # copy assets over to image 10 | COPY . /sample 11 | 12 | # set the working directory 13 | WORKDIR /sample 14 | 15 | # install dependancies 16 | RUN pip3 install --user --no-cache-dir -r requirements.txt 17 | 18 | # set PATH 19 | ENV PATH=.local/bin:$PATH 20 | 21 | # exposing endpoint port 22 | EXPOSE 5000 23 | 24 | ENTRYPOINT ["python", "serve.py"] -------------------------------------------------------------------------------- /MLOps_Professional/lab1/sample/maintenance.py: -------------------------------------------------------------------------------- 1 | # maintenance test business logic 2 | 3 | 4 | def test_maintenance(temperature: int) -> str: 5 | """Tests the maintenance status based on temperature sensor readings. 6 | 7 | Args: 8 | temperature (int): Test parameter for temperature sensor readings. 9 | 10 | Returns: 11 | str: 'Needs Maintenance' if temperature is greater than 50, otherwise 'No Maintenance Required'. 12 | """ 13 | maintenance_status = ( 14 | "Needs Maintenance" if temperature > 50 else "No Maintenance Required" 15 | ) 16 | 17 | return maintenance_status 18 | -------------------------------------------------------------------------------- /MLOps_Professional/lab1/sample/requirements.txt: -------------------------------------------------------------------------------- 1 | pydantic==2.11.1 2 | fastapi==0.115.12 3 | uvicorn==0.34.0 -------------------------------------------------------------------------------- /MLOps_Professional/lab1/sample/serve.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import logging 3 | import warnings 4 | 5 | from fastapi import FastAPI, HTTPException 6 | from data_model import MaintenancePayload 7 | from maintenance import test_maintenance 8 | 9 | 10 | app = FastAPI() 11 | 12 | logging.basicConfig(level=logging.DEBUG) 13 | logger = logging.getLogger(__name__) 14 | warnings.filterwarnings("ignore") 15 | 16 | 17 | @app.get("/ping") 18 | async def ping() -> dict: 19 | """Ping server to determine status 20 | 21 | Returns: 22 | dict: API response 23 | response from server on health status 24 | """ 25 | return {"message": "Server is Running"} 26 | 27 | 28 | @app.post("/maintenance") 29 | async def predict(payload: MaintenancePayload) -> dict: 30 | """ 31 | Predicts the maintenance status based on the given payload. 32 | 33 | Args: 34 | payload (MaintenancePayload): The payload containing the temperature data. 35 | 36 | Returns: 37 | dict: A dictionary containing the message and maintenance status. 38 | """ 39 | try: 40 | # Validate temperature 41 | if not isinstance(payload.temperature, (int, float)): 42 | raise ValueError("Invalid temperature. It should be a number.") 43 | 44 | maintenance_result = test_maintenance(payload.temperature) 45 | return {"msg": "Completed Analysis", "Maintenance Status": maintenance_result} 46 | except ValueError as e: 47 | logger.error(f"Validation error: {e}") 48 | raise HTTPException(status_code=400, detail=str(e)) 49 | except Exception as e: 50 | logger.error(f"Unexpected error: {e}") 51 | raise HTTPException(status_code=500, detail="Internal Server Error") 52 | 53 | 54 | if __name__ == "__main__": 55 | """Main entry point for the server. 56 | 57 | This block runs the FastAPI application using Uvicorn. 58 | """ 59 | try: 60 | uvicorn.run("serve:app", host="127.0.0.1", port=5000, log_level="info") 61 | except Exception as e: 62 | logger.error(f"Failed to start server: {e}") 63 | -------------------------------------------------------------------------------- /MLOps_Professional/lab2/README.md: -------------------------------------------------------------------------------- 1 | # Lab 2: Creating Architecture Diagrams from Application Specs 2 | Please reference lab instructions in the learning management system to complete this lab. -------------------------------------------------------------------------------- /MLOps_Professional/lab3/env/conda.yml: -------------------------------------------------------------------------------- 1 | name: lab3 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9.13 6 | - pip 7 | - pip: 8 | - fastapi==0.103.0 9 | - uvicorn==0.23.2 10 | - pydantic==2.3.0 11 | - pandas==2.1.0 12 | - numpy==1.25.2 13 | - scikit-learn==1.3.0 14 | - mlflow==2.6.0 15 | - xgboost==1.7.6 16 | -------------------------------------------------------------------------------- /MLOps_Professional/lab3/sample/data_model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class TrainPayload(BaseModel): 5 | """ 6 | Data model for holding the parameters required to train a machine learning model. 7 | 8 | Attributes: 9 | file (str): The path to the training data file. 10 | model_name (str): The name of the machine learning model. 11 | model_path (str): The path where the trained model will be saved. 12 | test_size (int, optional): The size of the test dataset as a percentage. Default is 25. 13 | ncpu (int, optional): The number of CPU cores to use for training. Default is 4. 14 | mlflow_tracking_uri (str): The URI for the MLflow tracking server. 15 | mlflow_new_experiment (str, optional): The name of the new MLflow experiment. Default is None. 16 | mlflow_experiment (str, optional): The name of the existing MLflow experiment. Default is None. 17 | """ 18 | 19 | file: str 20 | model_name: str 21 | model_path: str 22 | test_size: int = 25 23 | ncpu: int = 4 24 | mlflow_tracking_uri: str 25 | mlflow_new_experiment: str = None 26 | mlflow_experiment: str = None 27 | -------------------------------------------------------------------------------- /MLOps_Professional/lab3/sample/generate_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to generate dataset for Predictive Asset Maintenance 3 | """ 4 | 5 | import os 6 | import warnings 7 | import argparse 8 | import logging 9 | import time 10 | import pandas as pd 11 | import numpy as np 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | logger = logging.getLogger(__name__) 15 | warnings.filterwarnings("ignore") 16 | 17 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab3") 18 | 19 | 20 | def validate_inputs(size: int, save_path: str) -> None: 21 | """Validates the command line inputs. 22 | 23 | Args: 24 | size (int): The size of the dataset to generate. 25 | save_path (str): The path to save the generated dataset. 26 | 27 | Raises: 28 | ValueError: If any of the inputs are invalid. 29 | """ 30 | if not isinstance(size, int) or size <= 0: 31 | raise ValueError("Invalid size. It should be a positive integer.") 32 | if not isinstance(save_path, str) or not save_path: 33 | raise ValueError("Invalid save path. It should be a non-empty string.") 34 | 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument( 38 | "-s", "--size", type=int, required=False, default=25000, help="data size" 39 | ) 40 | parser.add_argument( 41 | "-p", 42 | "--save_path", 43 | type=str, 44 | required=True, 45 | help="path to the output Parquet file within the safe directory", 46 | ) 47 | FLAGS = parser.parse_args() 48 | 49 | # Validate inputs 50 | try: 51 | validate_inputs(FLAGS.size, FLAGS.save_path) 52 | except ValueError as e: 53 | logger.error(f"Validation error: {e}") 54 | raise 55 | 56 | dsize = FLAGS.size 57 | train_path = FLAGS.save_path 58 | train_path = os.path.abspath(os.path.normpath(os.path.join(SAFE_BASE_DIR, train_path))) 59 | 60 | # Ensure train_path is still inside SAFE_BASE_DIR 61 | if not train_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 62 | raise ValueError(f"Path is not within the allowed directory {SAFE_BASE_DIR}") 63 | 64 | # Ensure the directory exists before saving 65 | os.makedirs(os.path.dirname(train_path), exist_ok=True) 66 | 67 | # Generating our data 68 | start = time.time() 69 | logger.info("Generating data with the size %d", dsize) 70 | np.random.seed(1) 71 | manufacturer_list = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] 72 | generation_list = ["Gen1", "Gen2", "Gen3", "Gen4"] 73 | lubrication_type_list = ["LTA", "LTB", "LTC"] 74 | product_assignment_list = ["Gala", "Golden_Delicious", "Granny_Smith"] 75 | data = pd.DataFrame( 76 | { 77 | "Age": np.random.choice(range(0, 25), dsize, replace=True), 78 | "Temperature": np.random.randint(low=50, high=300, size=dsize), 79 | "Last_Maintenance": np.random.normal(0, 60, size=dsize), 80 | "Motor_Current": np.random.randint(low=0.00, high=10.00, size=dsize), 81 | "Manufacturer": np.random.choice(manufacturer_list, dsize, replace=True), 82 | "Generation": np.random.choice(generation_list, dsize, replace=True), 83 | "Number_Repairs": np.random.choice(range(0, 50), dsize, replace=True), 84 | "Lubrication": np.random.choice(lubrication_type_list, dsize, replace=True), 85 | "Product_Assignment": np.random.choice( 86 | product_assignment_list, dsize, replace=True 87 | ), 88 | } 89 | ) 90 | 91 | # Generating our target variable Asset_Label 92 | logger.info("Generating our target variable Asset_Label") 93 | data["Asset_Label"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.99, 0.01]) 94 | 95 | # When age is 0-5 and over 20 change Asset_Label to 1 96 | logger.info("Creating correlation between our variables and our target variable") 97 | logger.info("When age is 0-5 and over 20 change Asset_Label to 1") 98 | data["Asset_Label"] = np.where( 99 | ((data.Age > 0) & (data.Age <= 5)) | (data.Age > 20), 1, data.Asset_Label 100 | ) 101 | 102 | # When Temperature is between 150-300 change Asset_Label to 1 103 | logger.info("When Temperature is between 500-1500 change Asset_Label to 1") 104 | data["Asset_Label"] = np.where( 105 | (data.Temperature >= 150) & (data.Temperature <= 300), 1, data.Asset_Label 106 | ) 107 | 108 | # When Manufacturer is A, E, or H change Asset_Label to have 80% 1's 109 | logger.info("When Manufacturer is A, E, or H change Asset_Label to 1") 110 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.2, 0.8]) 111 | data["Asset_Label"] = np.where( 112 | (data.Manufacturer == "A") 113 | | (data.Manufacturer == "E") 114 | | (data.Manufacturer == "H"), 115 | data.Temp_Var, 116 | data.Asset_Label, 117 | ) 118 | 119 | # When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 1's 120 | logger.info("When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 0's") 121 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.5, 0.5]) 122 | data["Asset_Label"] = np.where( 123 | (data.Generation == "Gen1") | (data.Generation == "Gen3"), 124 | data.Temp_Var, 125 | data.Asset_Label, 126 | ) 127 | 128 | 129 | # When Product Assignment is Pill B change Asset_Label to have 70% to 1's 130 | logger.info("When District is Pill B change Asset_Label to have 70% to 1's") 131 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.3, 0.7]) 132 | data["Asset_Label"] = np.where( 133 | (data.Product_Assignment == "Gala"), data.Temp_Var, data.Asset_Label 134 | ) 135 | 136 | 137 | # When Lubrication is LTC change Asset_Label to have 75% to 1's 138 | logger.info("When Lubrication is LTC change Asset_Label to have 75% to 1's") 139 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.25, 0.75]) 140 | data["Asset_Label"] = np.where( 141 | (data.Lubrication == "LTC"), data.Temp_Var, data.Asset_Label 142 | ) 143 | 144 | data.drop("Temp_Var", axis=1, inplace=True) 145 | 146 | Categorical_Variables = pd.get_dummies( 147 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 148 | drop_first=False, 149 | ) 150 | data = pd.concat([data, Categorical_Variables], axis=1) 151 | data.drop( 152 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 153 | axis=1, 154 | inplace=True, 155 | ) 156 | 157 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 158 | 159 | etime = time.time() - start 160 | datasize = data.shape 161 | logger.info( 162 | "=====> Time taken %f secs for data generation for the size of %s", etime, datasize 163 | ) 164 | 165 | # save data to parquet file 166 | try: 167 | logger.info("Saving the data to %s ...", train_path) 168 | data.to_parquet(train_path) 169 | logger.info("DONE") 170 | except Exception as e: 171 | logger.error(f"Failed to save data: {e}") 172 | raise 173 | -------------------------------------------------------------------------------- /MLOps_Professional/lab3/sample/serve.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import logging 3 | import warnings 4 | 5 | from fastapi import FastAPI, HTTPException 6 | from data_model import TrainPayload 7 | from train import HarvesterMaintenance 8 | 9 | app = FastAPI() 10 | 11 | logging.basicConfig(level=logging.DEBUG) 12 | logger = logging.getLogger(__name__) 13 | warnings.filterwarnings("ignore") 14 | 15 | 16 | @app.get("/ping") 17 | async def ping() -> dict: 18 | """Ping server to determine status 19 | 20 | Returns: 21 | dict: API response 22 | response from server on health status 23 | """ 24 | return {"message": "Server is Running"} 25 | 26 | 27 | @app.post("/train") 28 | async def train(payload: TrainPayload) -> dict: 29 | """Training Endpoint 30 | This endpoint process raw data and trains an XGBoost Classifier 31 | 32 | Args: 33 | payload (TrainPayload): Training endpoint payload model 34 | 35 | Returns: 36 | dict: Accuracy metrics and other logger feedback on training progress. 37 | """ 38 | try: 39 | # Validate inputs 40 | if not isinstance(payload.model_name, str) or not payload.model_name: 41 | raise ValueError("Invalid model name. It should be a non-empty string.") 42 | if not isinstance(payload.file, str) or not payload.file.endswith(".parquet"): 43 | raise ValueError( 44 | "Invalid file name. It should be a string ending with '.parquet'" 45 | ) 46 | if not isinstance(payload.test_size, float) or not (0 < payload.test_size < 1): 47 | raise ValueError("Invalid test size. It should be a float between 0 and 1") 48 | if not isinstance(payload.ncpu, int) or payload.ncpu <= 0: 49 | raise ValueError("Invalid ncpu. It should be a positive integer.") 50 | if not isinstance(payload.model_path, str) or not payload.model_path: 51 | raise ValueError("Invalid model path. It should be a non-empty string.") 52 | 53 | model = HarvesterMaintenance(payload.model_name) 54 | model.mlflow_tracking( 55 | tracking_uri=payload.mlflow_tracking_uri, 56 | new_experiment=payload.mlflow_new_experiment, 57 | experiment=payload.mlflow_experiment, 58 | ) 59 | logger.info("Configured Experiment and Tracking URI for MLFlow") 60 | model.process_data(payload.file, payload.test_size) 61 | logger.info("Data has been successfully processed") 62 | model.train(payload.ncpu) 63 | logger.info("Maintenance Apple Harvester Model Successfully Trained") 64 | model.save(payload.model_path) 65 | logger.info("Saved Harvester Maintenance Model") 66 | accuracy_score = model.validate() 67 | return { 68 | "msg": "Model trained successfully", 69 | "validation scores": accuracy_score, 70 | } 71 | except ValueError as e: 72 | logger.error(f"Validation error: {e}") 73 | raise HTTPException(status_code=400, detail=str(e)) 74 | except Exception as e: 75 | logger.error(f"Unexpected error: {e}") 76 | raise HTTPException(status_code=500, detail="Internal Server Error") 77 | 78 | 79 | if __name__ == "__main__": 80 | """Main entry point for the server. 81 | 82 | This block runs the FastAPI application using Uvicorn. 83 | """ 84 | try: 85 | uvicorn.run("serve:app", host="127.0.0.1", port=5000, log_level="info") 86 | except Exception as e: 87 | logger.error(f"Failed to start server: {e}") 88 | -------------------------------------------------------------------------------- /MLOps_Professional/lab3/sample/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # pylint: disable=import-error 4 | 5 | """ 6 | Module to train and prediction using XGBoost Classifier 7 | """ 8 | 9 | import os 10 | import sys 11 | import logging 12 | import warnings 13 | import joblib 14 | import mlflow 15 | from werkzeug.utils import secure_filename 16 | import numpy as np 17 | import xgboost as xgb 18 | import pandas as pd 19 | 20 | from sklearn.model_selection import train_test_split 21 | from sklearn.preprocessing import RobustScaler 22 | 23 | 24 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab3") 25 | 26 | logging.basicConfig(level=logging.DEBUG) 27 | logger = logging.getLogger(__name__) 28 | warnings.filterwarnings("ignore") 29 | 30 | 31 | class HarvesterMaintenance: 32 | 33 | def __init__(self, model_name: str): 34 | """ 35 | Initializes the model with the given model name and sets up various attributes. 36 | 37 | Args: 38 | model_name (str): The name of the model to be initialized. 39 | 40 | Attributes: 41 | model_name (str): The name of the model. 42 | file (str): Placeholder for file path or file name. 43 | y_train (str): Placeholder for training labels. 44 | y_test (str): Placeholder for test labels. 45 | X_train_scaled_transformed (str): Placeholder for scaled and transformed training features. 46 | X_test_scaled_transformed (str): Placeholder for scaled and transformed test features. 47 | accuracy_scr (str): Placeholder for accuracy score. 48 | model_path (str): Placeholder for the model path. 49 | parameters (str): Placeholder for model parameters. 50 | robust_scaler (str): Placeholder for robust scaler object. 51 | run_id (str): Placeholder for run ID. 52 | active_experiment (str): Placeholder for active experiment. 53 | xgb_model (str): Placeholder for XGBoost model. 54 | """ 55 | self.model_name = model_name 56 | self.file = "" 57 | self.y_train = "" 58 | self.y_test = "" 59 | self.X_train_scaled_transformed = "" 60 | self.X_test_scaled_transformed = "" 61 | self.accuracy_scr = "" 62 | self.model_path = "" 63 | self.parameters = "" 64 | self.robust_scaler = "" 65 | self.run_id = "" 66 | self.active_experiment = "" 67 | self.xgb_model = "" 68 | 69 | def mlflow_tracking( 70 | self, 71 | tracking_uri: str = "./mlruns", 72 | experiment: str = None, 73 | new_experiment: str = None, 74 | ) -> None: 75 | """ 76 | Sets up MLflow tracking for experiments. 77 | 78 | Args: 79 | tracking_uri (str): The URI where the MLflow tracking server is hosted. Defaults to "./mlruns". 80 | experiment (str): The name of the existing experiment to use. If None, a new experiment will be created. 81 | new_experiment (str): The name of the new experiment to create if no existing experiment is specified. 82 | """ 83 | # sets tracking URI 84 | mlflow.set_tracking_uri(tracking_uri) 85 | 86 | # creates new experiment if no experiment is specified 87 | if experiment is None: 88 | mlflow.create_experiment(new_experiment) 89 | self.active_experiment = new_experiment 90 | mlflow.set_experiment(new_experiment) 91 | else: 92 | mlflow.set_experiment(experiment) 93 | self.active_experiment = experiment 94 | 95 | def process_data(self, file: str, test_size: float = 0.25) -> None: 96 | """Processes raw data for training. 97 | 98 | Args: 99 | file (str): Path to raw training data. 100 | test_size (float, optional): Percentage of data reserved for testing. Defaults to 0.25. 101 | """ 102 | 103 | # Validate file name 104 | if not isinstance(file, str) or not file.endswith(".parquet"): 105 | raise ValueError( 106 | "Invalid file name. It should be a string ending with '.parquet'" 107 | ) 108 | 109 | # Validate test size 110 | if not isinstance(test_size, float) or not (0 < test_size < 1): 111 | raise ValueError("Invalid test size. It should be a float between 0 and 1") 112 | 113 | # Generating our data 114 | logger.info("Reading the dataset from %s...", file) 115 | if not file.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 116 | raise ValueError( 117 | f"Path is not within the allowed directory {SAFE_BASE_DIR}" 118 | ) 119 | try: 120 | data = pd.read_parquet(file) 121 | if not isinstance(data, pd.DataFrame): 122 | sys.exit("Invalid data format") 123 | except Exception as e: 124 | sys.exit(f"Error reading dataset: {e}") 125 | 126 | X = data.drop("Asset_Label", axis=1) 127 | y = data.Asset_Label 128 | 129 | X_train, X_test, self.y_train, self.y_test = train_test_split( 130 | X, y, test_size=test_size 131 | ) 132 | 133 | df_num_train = X_train.select_dtypes(["float", "int", "int32"]) 134 | df_num_test = X_test.select_dtypes(["float", "int", "int32"]) 135 | self.robust_scaler = RobustScaler() 136 | X_train_scaled = self.robust_scaler.fit_transform(df_num_train) 137 | X_test_scaled = self.robust_scaler.transform(df_num_test) 138 | 139 | # Making them pandas dataframes 140 | X_train_scaled_transformed = pd.DataFrame( 141 | X_train_scaled, index=df_num_train.index, columns=df_num_train.columns 142 | ) 143 | X_test_scaled_transformed = pd.DataFrame( 144 | X_test_scaled, index=df_num_test.index, columns=df_num_test.columns 145 | ) 146 | 147 | del X_train_scaled_transformed["Number_Repairs"] 148 | del X_test_scaled_transformed["Number_Repairs"] 149 | 150 | # Dropping the unscaled numerical columns 151 | X_train = X_train.drop( 152 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 153 | ) 154 | X_test = X_test.drop( 155 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 156 | ) 157 | 158 | X_train = X_train.astype(int) 159 | X_test = X_test.astype(int) 160 | 161 | # Creating train and test data with scaled numerical columns 162 | X_train_scaled_transformed = pd.concat( 163 | [X_train_scaled_transformed, X_train], axis=1 164 | ) 165 | X_test_scaled_transformed = pd.concat( 166 | [X_test_scaled_transformed, X_test], axis=1 167 | ) 168 | 169 | self.X_train_scaled_transformed = X_train_scaled_transformed.astype( 170 | {"Motor_Current": "float64"} 171 | ) 172 | self.X_test_scaled_transformed = X_test_scaled_transformed.astype( 173 | {"Motor_Current": "float64"} 174 | ) 175 | 176 | def train(self, ncpu: int = 4) -> None: 177 | """Trains an XGBoost Classifier and tracks models with MLFlow. 178 | 179 | Args: 180 | ncpu (int, optional): Number of CPU threads used for training. Defaults to 4. 181 | """ 182 | # Validate ncpu 183 | if not isinstance(ncpu, int) or ncpu <= 0: 184 | raise ValueError("Invalid ncpu. It should be a positive integer.") 185 | 186 | # Set xgboost parameters 187 | self.parameters = { 188 | "max_bin": 256, 189 | "scale_pos_weight": 2, 190 | "lambda_l2": 1, 191 | "alpha": 0.9, 192 | "max_depth": 8, 193 | "num_leaves": 2**8, 194 | "verbosity": 0, 195 | "objective": "multi:softmax", 196 | "learning_rate": 0.3, 197 | "num_class": 3, 198 | "nthread": ncpu, 199 | } 200 | 201 | with mlflow.start_run() as run: 202 | mlflow.xgboost.autolog() 203 | xgb_train = xgb.DMatrix( 204 | self.X_train_scaled_transformed, label=np.array(self.y_train) 205 | ) 206 | 207 | self.xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100) 208 | 209 | def validate(self) -> float: 210 | """Performs model validation with testing data. 211 | 212 | Returns: 213 | float: Accuracy metric. 214 | """ 215 | dtest = xgb.DMatrix(self.X_test_scaled_transformed, self.y_test) 216 | xgb_prediction = self.xgb_model.predict(dtest) 217 | xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(self.y_test)) 218 | self.accuracy_scr = 1 - xgb_errors_count / xgb_prediction.shape[0] 219 | 220 | xp = mlflow.get_experiment_by_name(self.active_experiment)._experiment_id 221 | self.run_id = mlflow.search_runs(xp, output_format="list")[0].info.run_id 222 | 223 | with mlflow.start_run(self.run_id): 224 | mlflow.log_metric("accuracy", self.accuracy_scr) 225 | 226 | return self.accuracy_scr 227 | 228 | def save(self, model_path: str) -> None: 229 | """Saves trained model and scaler to the specified path. 230 | 231 | Args: 232 | model_path (str): Path where trained model should be saved. 233 | """ 234 | # Validate model path 235 | if not isinstance(model_path, str) or not model_path: 236 | raise ValueError("Invalid model path. It should be a non-empty string.") 237 | 238 | sanitized_model_path = secure_filename(model_path) 239 | self.model_path = os.path.normpath( 240 | os.path.join( 241 | SAFE_BASE_DIR, sanitized_model_path, self.model_name + ".joblib" 242 | ) 243 | ) 244 | self.model_path = os.path.abspath(self.model_path) 245 | if not self.model_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 246 | raise ValueError("Path is not within the allowed model directory.") 247 | 248 | self.scaler_path = os.path.normpath( 249 | os.path.join( 250 | SAFE_BASE_DIR, sanitized_model_path, self.model_name + "_scaler.joblib" 251 | ) 252 | ) 253 | self.scaler_path = os.path.abspath(self.scaler_path) 254 | if not self.scaler_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 255 | raise ValueError("Path is not within the allowed model directory.") 256 | 257 | logger.info("Saving model") 258 | try: 259 | with open(self.model_path, "wb") as fh: 260 | joblib.dump(self.xgb_model, fh.name) 261 | except Exception as e: 262 | logger.error(f"Failed to save model: {e}") 263 | raise 264 | 265 | logger.info("Saving Scaler") 266 | try: 267 | with open(self.scaler_path, "wb") as fh: 268 | joblib.dump(self.robust_scaler, fh.name) 269 | except Exception as e: 270 | logger.error(f"Failed to save scaler: {e}") 271 | raise 272 | -------------------------------------------------------------------------------- /MLOps_Professional/lab4/env/conda.yml: -------------------------------------------------------------------------------- 1 | name: lab4 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9.13 6 | - pip: 7 | - fastapi==0.101.1 8 | - uvicorn==0.23.2 9 | - pydantic==2.2.0 10 | - pandas==2.0.3 11 | - numpy==1.25.2 12 | - scikit-learn==1.3.0 13 | - mlflow==2.6.0 14 | - xgboost==1.7.6 15 | -------------------------------------------------------------------------------- /MLOps_Professional/lab4/sample/data_model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class TrainPayload(BaseModel): 5 | """ 6 | Data model for holding training configuration parameters. 7 | 8 | Attributes: 9 | file (str): The path to the training data file. 10 | model_name (str): The name of the model to be trained. 11 | model_path (str): The path where the trained model will be saved. 12 | test_size (int): The percentage of the data to be used for testing. Default is 25. 13 | ncpu (int): The number of CPU cores to be used for training. Default is 4. 14 | mlflow_tracking_uri (str): The URI for the MLflow tracking server. 15 | mlflow_new_experiment (str, optional): The name of the new MLflow experiment. Default is None. 16 | mlflow_experiment (str, optional): The name of the existing MLflow experiment. Default is None. 17 | """ 18 | 19 | file: str 20 | model_name: str 21 | model_path: str 22 | test_size: int = 25 23 | ncpu: int = 4 24 | mlflow_tracking_uri: str 25 | mlflow_new_experiment: str = None 26 | mlflow_experiment: str = None 27 | 28 | 29 | class PredictionPayload(BaseModel): 30 | """ 31 | Data model for prediction payload. 32 | Attributes: 33 | model_name (str): The name of the model to be used for prediction. 34 | stage (str): The stage of the model (e.g., 'development', 'production'). 35 | sample (list): The input data sample for which prediction is to be made. 36 | model_run_id (str): The unique identifier for the model run. 37 | scaler_file_name (str): The name of the scaler file used for preprocessing. 38 | scaler_destination (str): The destination path where the scaler file is stored. Default is './'. 39 | """ 40 | 41 | model_name: str 42 | stage: str 43 | sample: list 44 | model_run_id: str 45 | scaler_file_name: str 46 | scaler_destination: str = "./" 47 | -------------------------------------------------------------------------------- /MLOps_Professional/lab4/sample/generate_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to generate dataset for Predictive Asset Maintenance 3 | """ 4 | 5 | import os 6 | import warnings 7 | import argparse 8 | import logging 9 | import time 10 | import pandas as pd 11 | import numpy as np 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | logger = logging.getLogger(__name__) 15 | warnings.filterwarnings("ignore") 16 | 17 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab4") 18 | 19 | 20 | def validate_inputs(size: int, save_path: str) -> None: 21 | """Validates the command line inputs. 22 | 23 | Args: 24 | size (int): The size of the dataset to generate. 25 | save_path (str): The path to save the generated dataset. 26 | 27 | Raises: 28 | ValueError: If any of the inputs are invalid. 29 | """ 30 | if not isinstance(size, int) or size <= 0: 31 | raise ValueError("Invalid size. It should be a positive integer.") 32 | if not isinstance(save_path, str) or not save_path: 33 | raise ValueError("Invalid save path. It should be a non-empty string.") 34 | 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument( 38 | "-s", "--size", type=int, required=False, default=25000, help="data size" 39 | ) 40 | parser.add_argument( 41 | "-p", 42 | "--save_path", 43 | type=str, 44 | required=True, 45 | help="path to the output Parquet file within the safe directory", 46 | ) 47 | FLAGS = parser.parse_args() 48 | 49 | # Validate inputs 50 | try: 51 | validate_inputs(FLAGS.size, FLAGS.save_path) 52 | except ValueError as e: 53 | logger.error(f"Validation error: {e}") 54 | raise 55 | 56 | dsize = FLAGS.size 57 | train_path = FLAGS.save_path 58 | train_path = os.path.abspath(os.path.normpath(os.path.join(SAFE_BASE_DIR, train_path))) 59 | 60 | # Ensure train_path is still inside SAFE_BASE_DIR 61 | if not train_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 62 | raise ValueError(f"Path is not within the allowed directory {SAFE_BASE_DIR}") 63 | 64 | # Ensure the directory exists before saving 65 | os.makedirs(os.path.dirname(train_path), exist_ok=True) 66 | 67 | # Generating our data 68 | start = time.time() 69 | logger.info("Generating data with the size %d", dsize) 70 | np.random.seed(1) 71 | manufacturer_list = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] 72 | generation_list = ["Gen1", "Gen2", "Gen3", "Gen4"] 73 | lubrication_type_list = ["LTA", "LTB", "LTC"] 74 | product_assignment_list = ["Gala", "Golden_Delicious", "Granny_Smith"] 75 | data = pd.DataFrame( 76 | { 77 | "Age": np.random.choice(range(0, 25), dsize, replace=True), 78 | "Temperature": np.random.randint(low=50, high=300, size=dsize), 79 | "Last_Maintenance": np.random.normal(0, 60, size=dsize), 80 | "Motor_Current": np.random.randint(low=0.00, high=10.00, size=dsize), 81 | "Manufacturer": np.random.choice(manufacturer_list, dsize, replace=True), 82 | "Generation": np.random.choice(generation_list, dsize, replace=True), 83 | "Number_Repairs": np.random.choice(range(0, 50), dsize, replace=True), 84 | "Lubrication": np.random.choice(lubrication_type_list, dsize, replace=True), 85 | "Product_Assignment": np.random.choice( 86 | product_assignment_list, dsize, replace=True 87 | ), 88 | } 89 | ) 90 | 91 | # Generating our target variable Asset_Label 92 | logger.info("Generating our target variable Asset_Label") 93 | data["Asset_Label"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.99, 0.01]) 94 | 95 | # When age is 0-5 and over 20 change Asset_Label to 1 96 | logger.info("Creating correlation between our variables and our target variable") 97 | logger.info("When age is 0-5 and over 20 change Asset_Label to 1") 98 | data["Asset_Label"] = np.where( 99 | ((data.Age > 0) & (data.Age <= 5)) | (data.Age > 20), 1, data.Asset_Label 100 | ) 101 | 102 | # When Temperature is between 150-300 change Asset_Label to 1 103 | logger.info("When Temperature is between 500-1500 change Asset_Label to 1") 104 | data["Asset_Label"] = np.where( 105 | (data.Temperature >= 150) & (data.Temperature <= 300), 1, data.Asset_Label 106 | ) 107 | 108 | # When Manufacturer is A, E, or H change Asset_Label to have 80% 1's 109 | logger.info("When Manufacturer is A, E, or H change Asset_Label to 1") 110 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.2, 0.8]) 111 | data["Asset_Label"] = np.where( 112 | (data.Manufacturer == "A") 113 | | (data.Manufacturer == "E") 114 | | (data.Manufacturer == "H"), 115 | data.Temp_Var, 116 | data.Asset_Label, 117 | ) 118 | 119 | # When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 1's 120 | logger.info("When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 0's") 121 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.5, 0.5]) 122 | data["Asset_Label"] = np.where( 123 | (data.Generation == "Gen1") | (data.Generation == "Gen3"), 124 | data.Temp_Var, 125 | data.Asset_Label, 126 | ) 127 | 128 | 129 | # When Product Assignment is Pill B change Asset_Label to have 70% to 1's 130 | logger.info("When District is Pill B change Asset_Label to have 70% to 1's") 131 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.3, 0.7]) 132 | data["Asset_Label"] = np.where( 133 | (data.Product_Assignment == "Gala"), data.Temp_Var, data.Asset_Label 134 | ) 135 | 136 | 137 | # When Lubrication is LTC change Asset_Label to have 75% to 1's 138 | logger.info("When Lubrication is LTC change Asset_Label to have 75% to 1's") 139 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.25, 0.75]) 140 | data["Asset_Label"] = np.where( 141 | (data.Lubrication == "LTC"), data.Temp_Var, data.Asset_Label 142 | ) 143 | 144 | data.drop("Temp_Var", axis=1, inplace=True) 145 | 146 | Categorical_Variables = pd.get_dummies( 147 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 148 | drop_first=False, 149 | ) 150 | data = pd.concat([data, Categorical_Variables], axis=1) 151 | data.drop( 152 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 153 | axis=1, 154 | inplace=True, 155 | ) 156 | 157 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 158 | 159 | etime = time.time() - start 160 | datasize = data.shape 161 | logger.info( 162 | "=====> Time taken %f secs for data generation for the size of %s", etime, datasize 163 | ) 164 | 165 | # save data to parquet file 166 | try: 167 | logger.info("Saving the data to %s ...", train_path) 168 | data.to_parquet(train_path) 169 | logger.info("DONE") 170 | except Exception as e: 171 | logger.error(f"Failed to save data: {e}") 172 | raise 173 | -------------------------------------------------------------------------------- /MLOps_Professional/lab4/sample/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import joblib 3 | import mlflow 4 | import numpy as np 5 | import pandas as pd 6 | from string import Template 7 | 8 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab4") 9 | 10 | 11 | def validate_inputs( 12 | model_name: str, 13 | stage: str, 14 | model_run_id: int, 15 | scaler_file_name: str, 16 | scaler_destination: str, 17 | data: str, 18 | ) -> None: 19 | """Validates the inputs for inference. 20 | 21 | Args: 22 | model_name (str): The name of the model to be used for inference. 23 | stage (str): The stage of the model. 24 | model_run_id (int): The run ID of the model in MLflow. 25 | scaler_file_name (str): The name of the scaler file to be used for data scaling. 26 | scaler_destination (str): The destination path for the scaler. 27 | data (str): The input data for inference. 28 | 29 | Raises: 30 | ValueError: If any of the inputs are invalid. 31 | """ 32 | if not isinstance(model_name, str) or not model_name: 33 | raise ValueError("Invalid model name. It should be a non-empty string.") 34 | if not isinstance(stage, str) or not stage: 35 | raise ValueError("Invalid stage. It should be a non-empty string.") 36 | if not isinstance(model_run_id, int) or model_run_id <= 0: 37 | raise ValueError("Invalid model run ID. It should be a positive integer.") 38 | if not isinstance(scaler_file_name, str) or not scaler_file_name: 39 | raise ValueError("Invalid scaler file name. It should be a non-empty string.") 40 | if not isinstance(scaler_destination, str) or not scaler_destination: 41 | raise ValueError("Invalid scaler destination. It should be a non-empty string.") 42 | if not isinstance(data, str) or not data: 43 | raise ValueError("Invalid data. It should be a non-empty string.") 44 | 45 | 46 | def inference( 47 | model_name: str, 48 | stage: str, 49 | model_run_id: int, 50 | scaler_file_name: str, 51 | scaler_destination: str, 52 | data: str, 53 | ) -> str: 54 | """ 55 | Perform inference using a pre-trained model and a robust scaler on the provided data. 56 | 57 | Parameters: 58 | model_name (str): The name of the model to be used for inference. 59 | stage (str): The stage of the model. 60 | model_run_id (int): The run ID of the model in MLflow. 61 | scaler_file_name (str): The name of the scaler file to be used for data scaling. 62 | scaler_destination (str): The destination path for the scaler. 63 | data (str): The input data for inference. 64 | 65 | Returns: 66 | str: The maintenance status of the equipment based on the model's prediction. 67 | """ 68 | try: 69 | # Validate inputs 70 | validate_inputs( 71 | model_name, stage, model_run_id, scaler_file_name, scaler_destination, data 72 | ) 73 | 74 | scaler_destination = os.path.normpath( 75 | os.path.join(SAFE_BASE_DIR, scaler_destination) 76 | ) 77 | scaler_file_path = os.path.normpath( 78 | os.path.join(scaler_destination, scaler_file_name) 79 | ) 80 | if not scaler_destination.startswith( 81 | SAFE_BASE_DIR 82 | ) or not scaler_file_path.startswith(SAFE_BASE_DIR): 83 | raise ValueError( 84 | "Scalar file path is not within the allowed model directory." 85 | ) 86 | 87 | # retrieve scaler 88 | try: 89 | mlflow.artifacts.download_artifacts( 90 | run_id=model_run_id, 91 | artifact_path=scaler_file_name, 92 | dst_path=scaler_destination, 93 | ) 94 | except Exception as e: 95 | raise RuntimeError(f"Failed to retrieve scaler: {e}") 96 | 97 | # load robust scaler 98 | try: 99 | with open(scaler_file_path, "rb") as fh: 100 | robust_scaler = joblib.load(fh.name) 101 | except Exception as e: 102 | raise RuntimeError(f"Failed to load robust scaler: {e}") 103 | 104 | # load model 105 | try: 106 | model_uri_template = Template("models:/$model_name/$stage") 107 | model_uri = model_uri_template.substitute( 108 | model_name=model_name, stage=stage 109 | ) 110 | model = mlflow.pyfunc.load_model(model_uri=model_uri) 111 | except Exception as e: 112 | raise RuntimeError(f"Failed to load model: {e}") 113 | 114 | # process data sample 115 | Categorical_Variables = pd.get_dummies( 116 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 117 | drop_first=False, 118 | ) 119 | data = pd.concat([data, Categorical_Variables], axis=1) 120 | data.drop( 121 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 122 | axis=1, 123 | inplace=True, 124 | ) 125 | 126 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 127 | 128 | number_samples = data.select_dtypes(["float", "int", "int32"]) 129 | scaled_samples = robust_scaler.transform(number_samples) 130 | scaled_samples_transformed = pd.DataFrame( 131 | scaled_samples, index=number_samples.index, columns=number_samples.columns 132 | ) 133 | del scaled_samples_transformed["Number_Repairs"] 134 | data = data.drop( 135 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 136 | ) 137 | data = data.astype(int) 138 | processed_sample = pd.concat([scaled_samples_transformed, data], axis=1) 139 | processed_sample = processed_sample.astype({"Motor_Current": "float64"}) 140 | 141 | column_names = [ 142 | "Age", 143 | "Temperature", 144 | "Last_Maintenance", 145 | "Motor_Current", 146 | "Number_Repairs", 147 | "Manufacturer_A", 148 | "Manufacturer_B", 149 | "Manufacturer_C", 150 | "Manufacturer_D", 151 | "Manufacturer_E", 152 | "Manufacturer_F", 153 | "Manufacturer_G", 154 | "Manufacturer_H", 155 | "Manufacturer_I", 156 | "Manufacturer_J", 157 | "Generation_Gen1", 158 | "Generation_Gen2", 159 | "Generation_Gen3", 160 | "Generation_Gen4", 161 | "Lubrication_LTA", 162 | "Lubrication_LTB", 163 | "Lubrication_LTC", 164 | "Product_Assignment_Gala", 165 | "Product_Assignment_Golden_Delicious", 166 | "Product_Assignment_Granny_Smith", 167 | ] 168 | 169 | zeroes_dataframe = pd.DataFrame(0, index=np.arange(1), columns=column_names) 170 | merged_df = pd.merge( 171 | zeroes_dataframe, 172 | processed_sample, 173 | on=processed_sample.columns.tolist(), 174 | how="right", 175 | ).fillna(0) 176 | 177 | columns_to_convert = [ 178 | "Manufacturer_A", 179 | "Manufacturer_B", 180 | "Manufacturer_C", 181 | "Manufacturer_D", 182 | "Manufacturer_E", 183 | "Manufacturer_F", 184 | "Manufacturer_G", 185 | "Manufacturer_H", 186 | "Manufacturer_I", 187 | "Manufacturer_J", 188 | "Generation_Gen1", 189 | "Generation_Gen2", 190 | "Generation_Gen3", 191 | "Generation_Gen4", 192 | "Lubrication_LTA", 193 | "Lubrication_LTB", 194 | "Lubrication_LTC", 195 | "Product_Assignment_Gala", 196 | "Product_Assignment_Golden_Delicious", 197 | "Product_Assignment_Granny_Smith", 198 | ] 199 | 200 | merged_df[columns_to_convert] = merged_df[columns_to_convert].astype(int) 201 | 202 | xgb_prediction = model.predict(merged_df) 203 | 204 | for prediction in xgb_prediction: 205 | if prediction == 0: 206 | status = "Equipment Does Not Require Scheduled Maintenance" 207 | return status 208 | elif prediction == 1: 209 | status = "Equipment Requires Scheduled Maintenance - Plan Accordingly" 210 | return status 211 | 212 | return status 213 | except ValueError as e: 214 | raise RuntimeError(f"Validation error: {e}") 215 | except Exception as e: 216 | raise RuntimeError(f"Unexpected error: {e}") 217 | -------------------------------------------------------------------------------- /MLOps_Professional/lab4/sample/serve.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import logging 3 | import warnings 4 | import pandas as pd 5 | 6 | from fastapi import FastAPI, HTTPException 7 | from data_model import TrainPayload, PredictionPayload 8 | from train import HarvesterMaintenance 9 | from inference import inference 10 | 11 | app = FastAPI() 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | logger = logging.getLogger(__name__) 15 | warnings.filterwarnings("ignore") 16 | 17 | 18 | @app.get("/ping") 19 | async def ping() -> dict: 20 | """Ping server to determine status 21 | 22 | Returns: 23 | dict: API response 24 | response from server on health status 25 | """ 26 | return {"message": "Server is Running"} 27 | 28 | 29 | @app.post("/train") 30 | async def train(payload: TrainPayload) -> dict: 31 | """Training Endpoint 32 | This endpoint process raw data and trains an XGBoost Classifier 33 | 34 | Args: 35 | payload (TrainPayload): Training endpoint payload model 36 | 37 | Returns: 38 | dict: Accuracy metrics and other logger feedback on training progress. 39 | """ 40 | try: 41 | # Validate inputs 42 | if not isinstance(payload.model_name, str) or not payload.model_name: 43 | raise ValueError("Invalid model name. It should be a non-empty string.") 44 | if not isinstance(payload.file, str) or not payload.file.endswith(".parquet"): 45 | raise ValueError( 46 | "Invalid file name. It should be a string ending with '.parquet'" 47 | ) 48 | if not isinstance(payload.test_size, float) or not (0 < payload.test_size < 1): 49 | raise ValueError("Invalid test size. It should be a float between 0 and 1") 50 | if not isinstance(payload.ncpu, int) or payload.ncpu <= 0: 51 | raise ValueError("Invalid ncpu. It should be a positive integer.") 52 | if not isinstance(payload.model_path, str) or not payload.model_path: 53 | raise ValueError("Invalid model path. It should be a non-empty string.") 54 | 55 | model = HarvesterMaintenance(payload.model_name) 56 | model.mlflow_tracking( 57 | tracking_uri=payload.mlflow_tracking_uri, 58 | new_experiment=payload.mlflow_new_experiment, 59 | experiment=payload.mlflow_experiment, 60 | ) 61 | logger.info("Configured Experiment and Tracking URI for MLFlow") 62 | model.process_data(payload.file, payload.test_size) 63 | logger.info("Data has been successfully processed") 64 | model.train(payload.ncpu) 65 | logger.info("Maintenance Apple Harvester Model Successfully Trained") 66 | model.save(payload.model_path) 67 | logger.info("Saved Harvester Maintenance Model") 68 | accuracy_score = model.validate() 69 | return { 70 | "msg": "Model trained successfully", 71 | "validation scores": accuracy_score, 72 | } 73 | except ValueError as e: 74 | logger.error(f"Validation error: {e}") 75 | raise HTTPException(status_code=400, detail=str(e)) 76 | except Exception as e: 77 | logger.error(f"Unexpected error: {e}") 78 | raise HTTPException(status_code=500, detail="Internal Server Error") 79 | 80 | 81 | @app.post("/predict") 82 | async def predict(payload: PredictionPayload) -> dict: 83 | """ 84 | Asynchronously performs prediction based on the provided payload. 85 | 86 | Args: 87 | payload (PredictionPayload): The payload containing the necessary data for prediction, including: 88 | - sample (dict): The sample data to be used for prediction. 89 | - model_name (str): The name of the model to be used for inference. 90 | - stage (str): The stage of the model to be used. 91 | - model_run_id (str): The run ID of the model. 92 | - scaler_file_name (str): The name of the scaler file. 93 | - scaler_destination (str): The destination of the scaler file. 94 | 95 | Returns: 96 | dict: A dictionary containing the message and the maintenance recommendation results. 97 | """ 98 | try: 99 | # Validate inputs 100 | if not isinstance(payload.model_name, str) or not payload.model_name: 101 | raise ValueError("Invalid model name. It should be a non-empty string.") 102 | if not isinstance(payload.stage, str) or not payload.stage: 103 | raise ValueError("Invalid stage. It should be a non-empty string.") 104 | if not isinstance(payload.model_run_id, int) or payload.model_run_id <= 0: 105 | raise ValueError("Invalid model run ID. It should be a positive integer.") 106 | if ( 107 | not isinstance(payload.scaler_file_name, str) 108 | or not payload.scaler_file_name 109 | ): 110 | raise ValueError( 111 | "Invalid scaler file name. It should be a non-empty string." 112 | ) 113 | if ( 114 | not isinstance(payload.scaler_destination, str) 115 | or not payload.scaler_destination 116 | ): 117 | raise ValueError( 118 | "Invalid scaler destination. It should be a non-empty string." 119 | ) 120 | if not isinstance(payload.sample, dict) or not payload.sample: 121 | raise ValueError( 122 | "Invalid sample data. It should be a non-empty dictionary." 123 | ) 124 | 125 | sample = pd.json_normalize(payload.sample) 126 | results = inference( 127 | model_name=payload.model_name, 128 | stage=payload.stage, 129 | model_run_id=payload.model_run_id, 130 | scaler_file_name=payload.scaler_file_name, 131 | scaler_destination=payload.scaler_destination, 132 | data=sample, 133 | ) 134 | return {"msg": "Completed Analysis", "Maintenance Recommendation": results} 135 | except ValueError as e: 136 | logger.error(f"Validation error: {e}") 137 | raise HTTPException(status_code=400, detail=str(e)) 138 | except Exception as e: 139 | logger.error(f"Unexpected error: {e}") 140 | raise HTTPException(status_code=500, detail="Internal Server Error") 141 | 142 | 143 | if __name__ == "__main__": 144 | """Main entry point for the server. 145 | 146 | This block runs the FastAPI application using Uvicorn. 147 | """ 148 | try: 149 | uvicorn.run("serve:app", host="127.0.0.1", port=5000, log_level="info") 150 | except Exception as e: 151 | logger.error(f"Failed to start server: {e}") 152 | -------------------------------------------------------------------------------- /MLOps_Professional/lab4/sample/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # pylint: disable=import-error 4 | """ 5 | Module to train and prediction using XGBoost Classifier 6 | """ 7 | 8 | import os 9 | import sys 10 | import logging 11 | import warnings 12 | import joblib 13 | import mlflow 14 | from werkzeug.utils import secure_filename 15 | 16 | import numpy as np 17 | import xgboost as xgb 18 | import pandas as pd 19 | 20 | from sklearn.model_selection import train_test_split 21 | from sklearn.preprocessing import RobustScaler 22 | 23 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab4") 24 | 25 | logging.basicConfig(level=logging.DEBUG) 26 | logger = logging.getLogger(__name__) 27 | warnings.filterwarnings("ignore") 28 | 29 | 30 | class HarvesterMaintenance: 31 | 32 | def __init__(self, model_name: str): 33 | """Initializes the HarvesterMaintenance class with default values. 34 | 35 | Args: 36 | model_name (str): Name of the model. 37 | """ 38 | self.model_name = model_name 39 | self.file = "" 40 | self.y_train = "" 41 | self.y_test = "" 42 | self.X_train_scaled_transformed = "" 43 | self.X_test_scaled_transformed = "" 44 | self.accuracy_scr = "" 45 | self.model_path = "" 46 | self.parameters = "" 47 | self.robust_scaler = "" 48 | self.run_id = "" 49 | self.active_experiment = "" 50 | self.xgb_model = "" 51 | 52 | def mlflow_tracking( 53 | self, 54 | tracking_uri: str = "./mlflow_tracking", 55 | experiment: str = None, 56 | new_experiment: str = None, 57 | ) -> None: 58 | """Sets up MLFlow tracking. 59 | 60 | Args: 61 | tracking_uri (str, optional): URI for MLFlow tracking. Defaults to "./mlflow_tracking". 62 | experiment (str, optional): Name of the existing experiment. Defaults to None. 63 | new_experiment (str, optional): Name of the new experiment to create if no experiment is specified. Defaults to None. 64 | """ 65 | # sets tracking URI 66 | mlflow.set_tracking_uri(tracking_uri) 67 | 68 | # creates new experiment if no experiment is specified 69 | if experiment is None: 70 | mlflow.create_experiment(new_experiment) 71 | self.active_experiment = new_experiment 72 | mlflow.set_experiment(new_experiment) 73 | else: 74 | mlflow.set_experiment(experiment) 75 | self.active_experiment = experiment 76 | 77 | def process_data(self, file: str, test_size: float = 0.25) -> None: 78 | """Processes raw data for training. 79 | 80 | Args: 81 | file (str): Path to raw training data. 82 | test_size (float, optional): Percentage of data reserved for testing. Defaults to 0.25. 83 | """ 84 | # Validate file name 85 | if not isinstance(file, str) or not file.endswith(".parquet"): 86 | raise ValueError( 87 | "Invalid file name. It should be a string ending with '.parquet'" 88 | ) 89 | 90 | # Validate test size 91 | if not isinstance(test_size, float) or not (0 < test_size < 1): 92 | raise ValueError("Invalid test size. It should be a float between 0 and 1") 93 | 94 | # Generating our data 95 | logger.info("Reading the dataset from %s...", file) 96 | if not file.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 97 | raise ValueError( 98 | f"Path is not within the allowed directory {SAFE_BASE_DIR}" 99 | ) 100 | try: 101 | data = pd.read_parquet(file) 102 | if not isinstance(data, pd.DataFrame): 103 | sys.exit("Invalid data format") 104 | except Exception as e: 105 | sys.exit(f"Error reading dataset: {e}") 106 | 107 | X = data.drop("Asset_Label", axis=1) 108 | y = data.Asset_Label 109 | 110 | X_train, X_test, self.y_train, self.y_test = train_test_split( 111 | X, y, test_size=test_size 112 | ) 113 | 114 | df_num_train = X_train.select_dtypes(["float", "int", "int32"]) 115 | df_num_test = X_test.select_dtypes(["float", "int", "int32"]) 116 | self.robust_scaler = RobustScaler() 117 | X_train_scaled = self.robust_scaler.fit_transform(df_num_train) 118 | X_test_scaled = self.robust_scaler.transform(df_num_test) 119 | 120 | # Making them pandas dataframes 121 | X_train_scaled_transformed = pd.DataFrame( 122 | X_train_scaled, index=df_num_train.index, columns=df_num_train.columns 123 | ) 124 | X_test_scaled_transformed = pd.DataFrame( 125 | X_test_scaled, index=df_num_test.index, columns=df_num_test.columns 126 | ) 127 | 128 | del X_train_scaled_transformed["Number_Repairs"] 129 | del X_test_scaled_transformed["Number_Repairs"] 130 | 131 | # Dropping the unscaled numerical columns 132 | X_train = X_train.drop( 133 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 134 | ) 135 | X_test = X_test.drop( 136 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 137 | ) 138 | 139 | X_train = X_train.astype(int) 140 | X_test = X_test.astype(int) 141 | 142 | # Creating train and test data with scaled numerical columns 143 | X_train_scaled_transformed = pd.concat( 144 | [X_train_scaled_transformed, X_train], axis=1 145 | ) 146 | X_test_scaled_transformed = pd.concat( 147 | [X_test_scaled_transformed, X_test], axis=1 148 | ) 149 | 150 | self.X_train_scaled_transformed = X_train_scaled_transformed.astype( 151 | {"Motor_Current": "float64"} 152 | ) 153 | self.X_test_scaled_transformed = X_test_scaled_transformed.astype( 154 | {"Motor_Current": "float64"} 155 | ) 156 | 157 | def train(self, ncpu: int = 4) -> None: 158 | """Trains an XGBoost Classifier and tracks models with MLFlow. 159 | 160 | Args: 161 | ncpu (int, optional): Number of CPU threads used for training. Defaults to 4. 162 | """ 163 | # Validate ncpu 164 | if not isinstance(ncpu, int) or ncpu <= 0: 165 | raise ValueError("Invalid ncpu. It should be a positive integer.") 166 | 167 | # Set xgboost parameters 168 | self.parameters = { 169 | "max_bin": 256, 170 | "scale_pos_weight": 2, 171 | "lambda_l2": 1, 172 | "alpha": 0.9, 173 | "max_depth": 8, 174 | "num_leaves": 2**8, 175 | "verbosity": 0, 176 | "objective": "multi:softmax", 177 | "learning_rate": 0.3, 178 | "num_class": 3, 179 | "nthread": ncpu, 180 | } 181 | 182 | mlflow.xgboost.autolog() 183 | xgb_train = xgb.DMatrix( 184 | self.X_train_scaled_transformed, label=np.array(self.y_train) 185 | ) 186 | self.xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100) 187 | 188 | # store run id for user in other methods 189 | xp = mlflow.get_experiment_by_name(self.active_experiment)._experiment_id 190 | self.run_id = mlflow.search_runs(xp, output_format="list")[0].info.run_id 191 | 192 | def validate(self) -> float: 193 | """Performs model validation with testing data. 194 | 195 | Returns: 196 | float: Accuracy metric. 197 | """ 198 | # calculate accuracy 199 | dtest = xgb.DMatrix(self.X_test_scaled_transformed, self.y_test) 200 | xgb_prediction = self.xgb_model.predict(dtest) 201 | xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(self.y_test)) 202 | self.accuracy_scr = 1 - xgb_errors_count / xgb_prediction.shape[0] 203 | 204 | # log accuracy metric with mlflow 205 | with mlflow.start_run(self.run_id): 206 | mlflow.log_metric("accuracy", self.accuracy_scr) 207 | 208 | return self.accuracy_scr 209 | 210 | def save(self, model_path: str) -> None: 211 | """Logs scaler as MLFlow artifact. 212 | 213 | Args: 214 | model_path (str): Path where trained model should be saved. 215 | """ 216 | # Validate model path 217 | if not isinstance(model_path, str) or not model_path: 218 | raise ValueError("Invalid model path. It should be a non-empty string.") 219 | 220 | sanitized_model_path = secure_filename(model_path) 221 | self.scaler_path = os.path.normpath( 222 | os.path.join( 223 | SAFE_BASE_DIR, sanitized_model_path, self.model_name + "_scaler.joblib" 224 | ) 225 | ) 226 | self.scaler_path = os.path.abspath(self.scaler_path) 227 | if not self.scaler_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 228 | raise ValueError("Path is not within the allowed model directory.") 229 | 230 | logger.info("Saving Scaler") 231 | try: 232 | with open(self.scaler_path, "wb") as fh: 233 | joblib.dump(self.robust_scaler, fh.name) 234 | except Exception as e: 235 | logger.error(f"Failed to save scaler: {e}") 236 | raise 237 | 238 | logger.info("Saving Scaler as MLFLow Artifact") 239 | with mlflow.start_run(self.run_id): 240 | mlflow.log_artifact(self.scaler_path) 241 | -------------------------------------------------------------------------------- /MLOps_Professional/lab5/env/README.md: -------------------------------------------------------------------------------- 1 | # Lab 5 2 | 3 | Lab 5 instructions have been updated on the course website. You will no longer use the VMs for this particular lab. Instead you will use the Jupyter Lab functionality under the "Learning" section of the Intel® Tiber™ AI Cloud. Please select the "PyTorch" kernel for a preconfigured environment for this lab. 4 | -------------------------------------------------------------------------------- /MLOps_Professional/lab5/sample/IntelPyTorch_Optimizations.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | """ 5 | ============================================================== 6 | Copyright © 2022 Intel Corporation 7 | 8 | SPDX-License-Identifier: MIT 9 | ============================================================== 10 | """ 11 | 12 | import os 13 | from time import time 14 | import torch 15 | import torchvision 16 | import intel_extension_for_pytorch as ipex 17 | import argparse 18 | from safetensors.torch import save_file 19 | 20 | # Hyperparameters and constants 21 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab5") 22 | LR = 0.001 23 | MOMENTUM = 0.9 24 | DOWNLOAD = True 25 | DATA = "datasets/cifar10/" 26 | 27 | """ 28 | Function to run a test case 29 | """ 30 | 31 | 32 | def trainModel( 33 | train_loader: torch.utils.data.DataLoader, 34 | modelName: str = "myModel", 35 | dtype: str = "fp32", 36 | ) -> float: 37 | """ 38 | Trains a ResNet50 model using the specified data type and data loader. 39 | 40 | Args: 41 | train_loader (torch.utils.data.DataLoader): DataLoader object containing the training data. 42 | modelName (str): The name of the model. 43 | dtype (str): The data type for model parameters, supported values - 'fp32', 'bf16'. 44 | 45 | Returns: 46 | float: The time in seconds it takes to train the model. 47 | """ 48 | 49 | # Initialize the model 50 | model = torchvision.models.resnet50() 51 | model = model.to(memory_format=torch.channels_last) 52 | criterion = torch.nn.CrossEntropyLoss() 53 | optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM) 54 | model.train() 55 | 56 | # Optimize with BF16 or FP32 (default) 57 | if "bf16" == dtype: 58 | model, optimizer = ipex.optimize( 59 | model, optimizer=optimizer, dtype=torch.bfloat16 60 | ) 61 | else: 62 | model, optimizer = ipex.optimize(model, optimizer=optimizer) 63 | 64 | # Train the model 65 | num_batches = len(train_loader) 66 | start_time = time() 67 | for batch_idx, (data, target) in enumerate(train_loader): 68 | optimizer.zero_grad() 69 | if "bf16" == dtype: 70 | with torch.cpu.amp.autocast(): # Auto Mixed Precision 71 | # Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional. 72 | data = data.to(memory_format=torch.channels_last) 73 | output = model(data) 74 | loss = criterion(output, target) 75 | loss.backward() 76 | else: 77 | # Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional. 78 | data = data.to(memory_format=torch.channels_last) 79 | output = model(data) 80 | loss = criterion(output, target) 81 | loss.backward() 82 | optimizer.step() 83 | if 0 == (batch_idx + 1) % 50: 84 | print("Batch %d/%d complete" % (batch_idx + 1, num_batches)) 85 | end_time = time() 86 | training_time = end_time - start_time 87 | print("Training took %.3f seconds" % (training_time)) 88 | 89 | # Save a checkpoint of the trained model 90 | checkpoint = { 91 | "model_state_dict": model.state_dict(), 92 | "optimizer_state_dict": optimizer.state_dict(), 93 | } 94 | 95 | checkpoint_filename = f"checkpoint_{modelName}.safetensors" 96 | checkpoint_path = os.path.normpath(os.path.join(SAFE_BASE_DIR, checkpoint_filename)) 97 | if not checkpoint_path.startswith(SAFE_BASE_DIR): 98 | raise ValueError("Path is not within the allowed model directory.") 99 | try: 100 | save_file(checkpoint, checkpoint_path) 101 | except Exception as e: 102 | print(f"Failed to save checkpoint: {e}") 103 | raise 104 | 105 | return training_time 106 | 107 | 108 | """ 109 | Perform all types of training in main function 110 | """ 111 | 112 | 113 | def main(FLAGS: argparse.Namespace) -> None: 114 | """ 115 | Main function to perform all types of training. 116 | 117 | Args: 118 | FLAGS (argparse.Namespace): Parsed command-line arguments. 119 | """ 120 | # Check if hardware supports AMX 121 | import sys 122 | 123 | sys.path.append("../../") 124 | from cpuinfo import get_cpu_info 125 | 126 | info = get_cpu_info() 127 | flags = info["flags"] 128 | amx_supported = False 129 | for flag in flags: 130 | if "amx" in flag: 131 | amx_supported = True 132 | break 133 | if not amx_supported: 134 | print("AMX is not supported on current hardware. Code sample cannot be run.\n") 135 | return 136 | 137 | # Load dataset 138 | transform = torchvision.transforms.Compose( 139 | [ 140 | torchvision.transforms.Resize((224, 224)), 141 | torchvision.transforms.ToTensor(), 142 | torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 143 | ] 144 | ) 145 | train_dataset = torchvision.datasets.CIFAR10( 146 | root=DATA, 147 | train=True, 148 | transform=transform, 149 | download=DOWNLOAD, 150 | ) 151 | train_loader = torch.utils.data.DataLoader( 152 | dataset=train_dataset, batch_size=FLAGS.batch_size 153 | ) 154 | 155 | # Train models and acquire training times 156 | print(f"Training model with {FLAGS.data_type}") 157 | try: 158 | training_time = trainModel( 159 | train_loader, modelName=f"{FLAGS.data_type}", dtype=f"{FLAGS.data_type}" 160 | ) 161 | print("Summary") 162 | print("training time: %.3f" % training_time) 163 | except ValueError as e: 164 | print(f"Validation error: {e}") 165 | except Exception as e: 166 | print(f"Unexpected error: {e}") 167 | 168 | 169 | if __name__ == "__main__": 170 | """ 171 | Main entry point for the script. 172 | 173 | This block parses command-line arguments and calls the main function. 174 | """ 175 | parser = argparse.ArgumentParser() 176 | parser.add_argument( 177 | "-dtype", 178 | "--data_type", 179 | type=str, 180 | default="fp32", 181 | help="pytorch data type options available are fp32 and bf16", 182 | ) 183 | parser.add_argument( 184 | "-batch", "--batch_size", type=int, default=128, help="set training batch size" 185 | ) 186 | FLAGS = parser.parse_args() 187 | 188 | # Validate inputs 189 | if FLAGS.data_type not in ["fp32", "bf16"]: 190 | raise ValueError("Invalid data type. Supported values are 'fp32' and 'bf16'.") 191 | if not isinstance(FLAGS.batch_size, int) or FLAGS.batch_size <= 0: 192 | raise ValueError("Invalid batch size. It should be a positive integer.") 193 | 194 | try: 195 | main(FLAGS) 196 | print("[CODE_SAMPLE_COMPLETED_SUCCESFULLY]") 197 | except ValueError as e: 198 | print(f"Validation error: {e}") 199 | except Exception as e: 200 | print(f"Unexpected error: {e}") 201 | -------------------------------------------------------------------------------- /MLOps_Professional/lab6/env/conda.yml: -------------------------------------------------------------------------------- 1 | name: lab6 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10.12 6 | - pip: 7 | - transformers==4.43.3 8 | - torch==2.3.0 9 | - accelerate==0.33.0 10 | - einops==0.6.1 11 | -------------------------------------------------------------------------------- /MLOps_Professional/lab6/sample/Falcon_HF_Pipelines.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, AutoModelForCausalLM 2 | import transformers 3 | import torch 4 | import argparse 5 | import time 6 | 7 | 8 | def main(FLAGS: argparse.Namespace) -> None: 9 | """ 10 | Main function to perform text generation using Falcon model. 11 | 12 | Args: 13 | FLAGS (argparse.Namespace): Parsed command-line arguments. 14 | """ 15 | try: 16 | # Validate falcon_version 17 | if FLAGS.falcon_version not in ["7b", "40b"]: 18 | raise ValueError( 19 | "Invalid falcon version. Supported values are '7b' and '40b'." 20 | ) 21 | 22 | # Validate max_length 23 | if not isinstance(FLAGS.max_length, int) or FLAGS.max_length <= 0: 24 | raise ValueError("Invalid max length. It should be a positive integer.") 25 | 26 | # Validate top_k 27 | if not isinstance(FLAGS.top_k, int) or FLAGS.top_k <= 0: 28 | raise ValueError("Invalid top_k. It should be a positive integer.") 29 | 30 | model = f"tiiuae/falcon-{FLAGS.falcon_version}" 31 | 32 | tokenizer = AutoTokenizer.from_pretrained(model) 33 | 34 | generator = transformers.pipeline( 35 | "text-generation", 36 | model=model, 37 | tokenizer=tokenizer, 38 | torch_dtype=torch.bfloat16, 39 | trust_remote_code=True, 40 | device_map="auto", 41 | ) 42 | 43 | user_input = "start" 44 | 45 | while user_input != "stop": 46 | 47 | user_input = input( 48 | f"Provide Input to {model} parameter Falcon (not tuned): " 49 | ) 50 | 51 | start = time.time() 52 | 53 | if user_input != "stop": 54 | sequences = generator( 55 | f""" {user_input}""", 56 | max_length=FLAGS.max_length, 57 | do_sample=True, 58 | top_k=FLAGS.top_k, 59 | num_return_sequences=1, 60 | eos_token_id=tokenizer.eos_token_id, 61 | ) 62 | else: 63 | break 64 | 65 | inference_time = time.time() - start 66 | 67 | for seq in sequences: 68 | print(f"Result: {seq['generated_text']}") 69 | 70 | print(f"Total Inference Time: {inference_time:.2f} seconds") 71 | except ValueError as e: 72 | print(f"Validation error: {e}") 73 | except Exception as e: 74 | print(f"Unexpected error: {e}") 75 | 76 | 77 | if __name__ == "__main__": 78 | """ 79 | Main entry point for the script. 80 | 81 | This block parses command-line arguments and calls the main function. 82 | """ 83 | parser = argparse.ArgumentParser() 84 | 85 | parser.add_argument( 86 | "-fv", 87 | "--falcon_version", 88 | type=str, 89 | default="7b", 90 | help="select 7b or 40b version of falcon", 91 | ) 92 | parser.add_argument( 93 | "-ml", 94 | "--max_length", 95 | type=int, 96 | default=25, 97 | help="used to control the maximum length of the generated text in text generation tasks", 98 | ) 99 | parser.add_argument( 100 | "-tk", 101 | "--top_k", 102 | type=int, 103 | default=5, 104 | help="specifies the number of highest probability tokens to consider at each step", 105 | ) 106 | 107 | FLAGS = parser.parse_args() 108 | 109 | try: 110 | main(FLAGS) 111 | except ValueError as e: 112 | print(f"Validation error: {e}") 113 | except Exception as e: 114 | print(f"Unexpected error: {e}") 115 | -------------------------------------------------------------------------------- /MLOps_Professional/lab7/env/conda.yml: -------------------------------------------------------------------------------- 1 | name: lab7 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9.13 6 | - pip: 7 | - fastapi==0.101.1 8 | - uvicorn==0.23.2 9 | - pydantic==2.2.0 10 | - pandas==2.0.3 11 | - numpy==1.25.2 12 | - scikit-learn==1.3.0 13 | - mlflow==2.6.0 14 | - xgboost==1.7.6 15 | - daal4py==2023.2.1 16 | -------------------------------------------------------------------------------- /MLOps_Professional/lab7/sample/data_model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class TrainPayload(BaseModel): 5 | """ 6 | Data model for training payload. 7 | 8 | Attributes: 9 | file (str): Path to the training data file. 10 | model_name (str): Name of the model to be trained. 11 | model_path (str): Path where the trained model will be saved. 12 | test_size (int): Percentage of data reserved for testing. Defaults to 25. 13 | ncpu (int): Number of CPU threads used for training. Defaults to 4. 14 | mlflow_tracking_uri (str): URI for MLFlow tracking. 15 | mlflow_new_experiment (str): Name of the new experiment to create if no experiment is specified. Defaults to None. 16 | mlflow_experiment (str): Name of the existing experiment. Defaults to None. 17 | """ 18 | 19 | file: str 20 | model_name: str 21 | model_path: str 22 | test_size: int = 25 23 | ncpu: int = 4 24 | mlflow_tracking_uri: str 25 | mlflow_new_experiment: str = None 26 | mlflow_experiment: str = None 27 | 28 | 29 | class PredictionPayload(BaseModel): 30 | """ 31 | Data model for prediction payload. 32 | 33 | Attributes: 34 | sample (list): List of samples for prediction. 35 | model_run_id (str): ID of the model run. 36 | scaler_file_name (str): Name of the scaler file. 37 | scaler_destination (str): Destination path for the scaler file. Defaults to './'. 38 | d4p_file_name (str): Name of the d4p file. 39 | d4p_destination (str): Destination path for the d4p file. 40 | """ 41 | 42 | sample: list 43 | model_run_id: str 44 | scaler_file_name: str 45 | scaler_destination: str = "./" 46 | d4p_file_name: str 47 | d4p_destination: str 48 | -------------------------------------------------------------------------------- /MLOps_Professional/lab7/sample/generate_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module to generate dataset for Predictive Asset Maintenance 3 | """ 4 | 5 | import os 6 | import warnings 7 | import argparse 8 | import logging 9 | import time 10 | import pandas as pd 11 | import numpy as np 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | logger = logging.getLogger(__name__) 15 | warnings.filterwarnings("ignore") 16 | 17 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab7") 18 | 19 | 20 | def validate_inputs(size: int, save_path: str) -> None: 21 | """Validates the command line inputs. 22 | 23 | Args: 24 | size (int): The size of the dataset to generate. 25 | save_path (str): The path to save the generated dataset. 26 | 27 | Raises: 28 | ValueError: If any of the inputs are invalid. 29 | """ 30 | if not isinstance(size, int) or size <= 0: 31 | raise ValueError("Invalid size. It should be a positive integer.") 32 | if not isinstance(save_path, str) or not save_path: 33 | raise ValueError("Invalid save path. It should be a non-empty string.") 34 | 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument( 38 | "-s", "--size", type=int, required=False, default=25000, help="data size" 39 | ) 40 | parser.add_argument( 41 | "-p", 42 | "--save_path", 43 | type=str, 44 | required=True, 45 | help="path to the output Parquet file within the safe directory", 46 | ) 47 | FLAGS = parser.parse_args() 48 | 49 | # Validate inputs 50 | try: 51 | validate_inputs(FLAGS.size, FLAGS.save_path) 52 | except ValueError as e: 53 | logger.error(f"Validation error: {e}") 54 | raise 55 | 56 | dsize = FLAGS.size 57 | train_path = FLAGS.save_path 58 | train_path = os.path.abspath(os.path.normpath(os.path.join(SAFE_BASE_DIR, train_path))) 59 | 60 | # Ensure train_path is still inside SAFE_BASE_DIR 61 | if not train_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 62 | raise ValueError(f"Path is not within the allowed directory {SAFE_BASE_DIR}") 63 | 64 | # Ensure the directory exists before saving 65 | os.makedirs(os.path.dirname(train_path), exist_ok=True) 66 | 67 | # Generating our data 68 | start = time.time() 69 | logger.info("Generating data with the size %d", dsize) 70 | np.random.seed(1) 71 | manufacturer_list = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] 72 | generation_list = ["Gen1", "Gen2", "Gen3", "Gen4"] 73 | lubrication_type_list = ["LTA", "LTB", "LTC"] 74 | product_assignment_list = ["Gala", "Golden_Delicious", "Granny_Smith"] 75 | data = pd.DataFrame( 76 | { 77 | "Age": np.random.choice(range(0, 25), dsize, replace=True), 78 | "Temperature": np.random.randint(low=50, high=300, size=dsize), 79 | "Last_Maintenance": np.random.normal(0, 60, size=dsize), 80 | "Motor_Current": np.random.randint(low=0.00, high=10.00, size=dsize), 81 | "Manufacturer": np.random.choice(manufacturer_list, dsize, replace=True), 82 | "Generation": np.random.choice(generation_list, dsize, replace=True), 83 | "Number_Repairs": np.random.choice(range(0, 50), dsize, replace=True), 84 | "Lubrication": np.random.choice(lubrication_type_list, dsize, replace=True), 85 | "Product_Assignment": np.random.choice( 86 | product_assignment_list, dsize, replace=True 87 | ), 88 | } 89 | ) 90 | 91 | # Generating our target variable Asset_Label 92 | logger.info("Generating our target variable Asset_Label") 93 | data["Asset_Label"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.99, 0.01]) 94 | 95 | # When age is 0-5 and over 20 change Asset_Label to 1 96 | logger.info("Creating correlation between our variables and our target variable") 97 | logger.info("When age is 0-5 and over 20 change Asset_Label to 1") 98 | data["Asset_Label"] = np.where( 99 | ((data.Age > 0) & (data.Age <= 5)) | (data.Age > 20), 1, data.Asset_Label 100 | ) 101 | 102 | # When Temperature is between 150-300 change Asset_Label to 1 103 | logger.info("When Temperature is between 500-1500 change Asset_Label to 1") 104 | data["Asset_Label"] = np.where( 105 | (data.Temperature >= 150) & (data.Temperature <= 300), 1, data.Asset_Label 106 | ) 107 | 108 | # When Manufacturer is A, E, or H change Asset_Label to have 80% 1's 109 | logger.info("When Manufacturer is A, E, or H change Asset_Label to 1") 110 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.2, 0.8]) 111 | data["Asset_Label"] = np.where( 112 | (data.Manufacturer == "A") 113 | | (data.Manufacturer == "E") 114 | | (data.Manufacturer == "H"), 115 | data.Temp_Var, 116 | data.Asset_Label, 117 | ) 118 | 119 | # When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 1's 120 | logger.info("When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 0's") 121 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.5, 0.5]) 122 | data["Asset_Label"] = np.where( 123 | (data.Generation == "Gen1") | (data.Generation == "Gen3"), 124 | data.Temp_Var, 125 | data.Asset_Label, 126 | ) 127 | 128 | 129 | # When Product Assignment is Pill B change Asset_Label to have 70% to 1's 130 | logger.info("When District is Pill B change Asset_Label to have 70% to 1's") 131 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.3, 0.7]) 132 | data["Asset_Label"] = np.where( 133 | (data.Product_Assignment == "Gala"), data.Temp_Var, data.Asset_Label 134 | ) 135 | 136 | 137 | # When Lubrication is LTC change Asset_Label to have 75% to 1's 138 | logger.info("When Lubrication is LTC change Asset_Label to have 75% to 1's") 139 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.25, 0.75]) 140 | data["Asset_Label"] = np.where( 141 | (data.Lubrication == "LTC"), data.Temp_Var, data.Asset_Label 142 | ) 143 | 144 | data.drop("Temp_Var", axis=1, inplace=True) 145 | 146 | Categorical_Variables = pd.get_dummies( 147 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 148 | drop_first=False, 149 | ) 150 | data = pd.concat([data, Categorical_Variables], axis=1) 151 | data.drop( 152 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 153 | axis=1, 154 | inplace=True, 155 | ) 156 | 157 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 158 | 159 | etime = time.time() - start 160 | datasize = data.shape 161 | logger.info( 162 | "=====> Time taken %f secs for data generation for the size of %s", etime, datasize 163 | ) 164 | 165 | # save data to parquet file 166 | try: 167 | logger.info("Saving the data to %s ...", train_path) 168 | data.to_parquet(train_path) 169 | logger.info("DONE") 170 | except Exception as e: 171 | logger.error(f"Failed to save data: {e}") 172 | raise 173 | -------------------------------------------------------------------------------- /MLOps_Professional/lab7/sample/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import joblib 3 | import mlflow 4 | import numpy as np 5 | import pandas as pd 6 | import daal4py as d4p 7 | 8 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab7") 9 | 10 | 11 | def validate_inputs( 12 | model_run_id: int, 13 | scaler_file_name: str, 14 | scaler_destination: str, 15 | d4p_file_name: str, 16 | d4p_destination: str, 17 | data: str, 18 | ) -> None: 19 | """Validates the inputs for inference. 20 | 21 | Args: 22 | model_run_id (int): ID of the model run. 23 | scaler_file_name (str): Name of the scaler file. 24 | scaler_destination (str): Destination path for the scaler file. 25 | d4p_file_name (str): Name of the d4p file. 26 | d4p_destination (str): Destination path for the d4p file. 27 | data (str): Path to the data file. 28 | 29 | Raises: 30 | ValueError: If any of the inputs are invalid. 31 | """ 32 | if not isinstance(model_run_id, int) or model_run_id <= 0: 33 | raise ValueError("Invalid model run ID. It should be a positive integer.") 34 | if not isinstance(scaler_file_name, str) or not scaler_file_name: 35 | raise ValueError("Invalid scaler file name. It should be a non-empty string.") 36 | if not isinstance(scaler_destination, str) or not scaler_destination: 37 | raise ValueError("Invalid scaler destination. It should be a non-empty string.") 38 | if not isinstance(d4p_file_name, str) or not d4p_file_name: 39 | raise ValueError("Invalid d4p file name. It should be a non-empty string.") 40 | if not isinstance(d4p_destination, str) or not d4p_destination: 41 | raise ValueError("Invalid d4p destination. It should be a non-empty string.") 42 | if not isinstance(data, str) or not data: 43 | raise ValueError("Invalid data. It should be a non-empty string.") 44 | 45 | 46 | def inference( 47 | model_run_id: int, 48 | scaler_file_name: str, 49 | scaler_destination: str, 50 | d4p_file_name: str, 51 | d4p_destination: str, 52 | data: str, 53 | ) -> str: 54 | """ 55 | Perform inference using a pre-trained model and scaler. 56 | 57 | Args: 58 | model_run_id (int): ID of the model run. 59 | scaler_file_name (str): Name of the scaler file. 60 | scaler_destination (str): Destination path for the scaler file. 61 | d4p_file_name (str): Name of the d4p file. 62 | d4p_destination (str): Destination path for the d4p file. 63 | data (str): Path to the data file. 64 | 65 | Returns: 66 | str: Inference result indicating maintenance status. 67 | """ 68 | try: 69 | # Validate inputs 70 | validate_inputs( 71 | model_run_id, 72 | scaler_file_name, 73 | scaler_destination, 74 | d4p_file_name, 75 | d4p_destination, 76 | data, 77 | ) 78 | 79 | scaler_destination = os.path.normpath( 80 | os.path.join(SAFE_BASE_DIR, scaler_destination) 81 | ) 82 | scaler_file_path = os.path.normpath( 83 | os.path.join(scaler_destination, scaler_file_name) 84 | ) 85 | if not scaler_destination.startswith( 86 | SAFE_BASE_DIR 87 | ) or not scaler_file_path.startswith(SAFE_BASE_DIR): 88 | raise ValueError( 89 | "Scalar file path is not within the allowed model directory." 90 | ) 91 | 92 | d4p_destination = os.path.normpath(os.path.join(SAFE_BASE_DIR, d4p_destination)) 93 | d4p_file_path = os.path.normpath(os.path.join(d4p_destination, d4p_file_name)) 94 | if not d4p_destination.startswith( 95 | SAFE_BASE_DIR 96 | ) or not d4p_file_path.startswith(SAFE_BASE_DIR): 97 | raise ValueError("d4p file path is not within the allowed model directory.") 98 | 99 | # retrieve scaler 100 | try: 101 | mlflow.artifacts.download_artifacts( 102 | run_id=model_run_id, 103 | artifact_path=scaler_file_name, 104 | dst_path=scaler_destination, 105 | ) 106 | except Exception as e: 107 | raise RuntimeError(f"Failed to retrieve scaler: {e}") 108 | 109 | # load robust scaler 110 | try: 111 | with open(scaler_file_path, "rb") as fh: 112 | robust_scaler = joblib.load(fh.name) 113 | except Exception as e: 114 | raise RuntimeError(f"Failed to load robust scaler: {e}") 115 | 116 | # retrieve d4p model 117 | try: 118 | mlflow.artifacts.download_artifacts( 119 | run_id=model_run_id, 120 | artifact_path=d4p_file_name, 121 | dst_path=d4p_destination, 122 | ) 123 | except Exception as e: 124 | raise RuntimeError(f"Failed to retrieve d4p model: {e}") 125 | 126 | # load d4p model 127 | try: 128 | with open(d4p_file_path, "rb") as fh: 129 | daal_model = joblib.load(fh.name) 130 | except Exception as e: 131 | raise RuntimeError(f"Failed to load d4p model: {e}") 132 | 133 | # process data sample 134 | try: 135 | data = pd.read_parquet(data) 136 | except Exception as e: 137 | raise RuntimeError(f"Failed to read data file: {e}") 138 | 139 | Categorical_Variables = pd.get_dummies( 140 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 141 | drop_first=False, 142 | ) 143 | data = pd.concat([data, Categorical_Variables], axis=1) 144 | data.drop( 145 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 146 | axis=1, 147 | inplace=True, 148 | ) 149 | 150 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 151 | 152 | number_samples = data.select_dtypes(["float", "int", "int32"]) 153 | scaled_samples = robust_scaler.transform(number_samples) 154 | scaled_samples_transformed = pd.DataFrame( 155 | scaled_samples, index=number_samples.index, columns=number_samples.columns 156 | ) 157 | del scaled_samples_transformed["Number_Repairs"] 158 | data = data.drop( 159 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 160 | ) 161 | data = data.astype(int) 162 | processed_sample = pd.concat([scaled_samples_transformed, data], axis=1) 163 | processed_sample = processed_sample.astype({"Motor_Current": "float64"}) 164 | 165 | column_names = [ 166 | "Age", 167 | "Temperature", 168 | "Last_Maintenance", 169 | "Motor_Current", 170 | "Number_Repairs", 171 | "Manufacturer_A", 172 | "Manufacturer_B", 173 | "Manufacturer_C", 174 | "Manufacturer_D", 175 | "Manufacturer_E", 176 | "Manufacturer_F", 177 | "Manufacturer_G", 178 | "Manufacturer_H", 179 | "Manufacturer_I", 180 | "Manufacturer_J", 181 | "Generation_Gen1", 182 | "Generation_Gen2", 183 | "Generation_Gen3", 184 | "Generation_Gen4", 185 | "Lubrication_LTA", 186 | "Lubrication_LTB", 187 | "Lubrication_LTC", 188 | "Product_Assignment_Gala", 189 | "Product_Assignment_Golden_Delicious", 190 | "Product_Assignment_Granny_Smith", 191 | ] 192 | 193 | zeroes_dataframe = pd.DataFrame(0, index=np.arange(1), columns=column_names) 194 | merged_df = pd.merge( 195 | zeroes_dataframe, 196 | processed_sample, 197 | on=processed_sample.columns.tolist(), 198 | how="right", 199 | ).fillna(0) 200 | 201 | columns_to_convert = [ 202 | "Manufacturer_A", 203 | "Manufacturer_B", 204 | "Manufacturer_C", 205 | "Manufacturer_D", 206 | "Manufacturer_E", 207 | "Manufacturer_F", 208 | "Manufacturer_G", 209 | "Manufacturer_H", 210 | "Manufacturer_I", 211 | "Manufacturer_J", 212 | "Generation_Gen1", 213 | "Generation_Gen2", 214 | "Generation_Gen3", 215 | "Generation_Gen4", 216 | "Lubrication_LTA", 217 | "Lubrication_LTB", 218 | "Lubrication_LTC", 219 | "Product_Assignment_Gala", 220 | "Product_Assignment_Golden_Delicious", 221 | "Product_Assignment_Granny_Smith", 222 | ] 223 | 224 | merged_df[columns_to_convert] = merged_df[columns_to_convert].astype(int) 225 | 226 | # perform inference 227 | daal_predict_algo = d4p.gbt_classification_prediction( 228 | nClasses=3, resultsToEvaluate="computeClassLabels", fptype="float" 229 | ) 230 | 231 | daal_prediction = daal_predict_algo.compute(merged_df, daal_model) 232 | 233 | for prediction in daal_prediction.prediction[:, 0]: 234 | if prediction == 0: 235 | status = "Equipment Does Not Require Scheduled Maintenance" 236 | return status 237 | elif prediction == 1: 238 | status = "Equipment Requires Scheduled Maintenance - Plan Accordingly" 239 | return status 240 | 241 | return status 242 | except ValueError as e: 243 | raise RuntimeError(f"Validation error: {e}") 244 | except Exception as e: 245 | raise RuntimeError(f"Unexpected error: {e}") 246 | -------------------------------------------------------------------------------- /MLOps_Professional/lab7/sample/serve.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import logging 3 | import warnings 4 | import pandas as pd 5 | 6 | from fastapi import FastAPI, HTTPException 7 | from data_model import TrainPayload, PredictionPayload 8 | from train import HarvesterMaintenance 9 | from inference import inference 10 | 11 | app = FastAPI() 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | logger = logging.getLogger(__name__) 15 | warnings.filterwarnings("ignore") 16 | 17 | 18 | @app.get("/ping") 19 | async def ping() -> dict: 20 | """ 21 | Ping server to determine status. 22 | 23 | Returns: 24 | dict: Response from server on health status. 25 | """ 26 | return {"message": "Server is Running"} 27 | 28 | 29 | @app.post("/train") 30 | async def train(payload: TrainPayload) -> dict: 31 | """ 32 | Training Endpoint 33 | This endpoint processes raw data and trains an XGBoost Classifier. 34 | 35 | Args: 36 | payload (TrainPayload): Training endpoint payload model. 37 | 38 | Returns: 39 | dict: Accuracy metrics and other logger feedback on training progress. 40 | """ 41 | try: 42 | # Validate inputs 43 | if not isinstance(payload.model_name, str) or not payload.model_name: 44 | raise ValueError("Invalid model name. It should be a non-empty string.") 45 | if not isinstance(payload.file, str) or not payload.file.endswith(".parquet"): 46 | raise ValueError( 47 | "Invalid file name. It should be a string ending with '.parquet'" 48 | ) 49 | if not isinstance(payload.test_size, int) or not (0 < payload.test_size < 100): 50 | raise ValueError( 51 | "Invalid test size. It should be an integer between 0 and 100" 52 | ) 53 | if not isinstance(payload.ncpu, int) or payload.ncpu <= 0: 54 | raise ValueError("Invalid ncpu. It should be a positive integer.") 55 | if not isinstance(payload.model_path, str) or not payload.model_path: 56 | raise ValueError("Invalid model path. It should be a non-empty string.") 57 | 58 | model = HarvesterMaintenance(payload.model_name) 59 | model.mlflow_tracking( 60 | tracking_uri=payload.mlflow_tracking_uri, 61 | new_experiment=payload.mlflow_new_experiment, 62 | experiment=payload.mlflow_experiment, 63 | ) 64 | logger.info("Configured Experiment and Tracking URI for MLFlow") 65 | model.process_data(payload.file, payload.test_size) 66 | logger.info("Data has been successfully processed") 67 | model.train(payload.ncpu) 68 | logger.info("Maintenance Apple Harvester Model Successfully Trained") 69 | model.save(payload.model_path) 70 | logger.info("Saved Harvester Maintenance Model") 71 | accuracy_score = model.validate() 72 | return { 73 | "msg": "Model trained successfully", 74 | "validation scores": accuracy_score, 75 | } 76 | except ValueError as e: 77 | logger.error(f"Validation error: {e}") 78 | raise HTTPException(status_code=400, detail=str(e)) 79 | except Exception as e: 80 | logger.error(f"Unexpected error: {e}") 81 | raise HTTPException(status_code=500, detail="Internal Server Error") 82 | 83 | 84 | @app.post("/predict") 85 | async def predict(payload: PredictionPayload) -> dict: 86 | """ 87 | Prediction Endpoint 88 | This endpoint performs inference using a pre-trained model and scaler. 89 | 90 | Args: 91 | payload (PredictionPayload): Prediction endpoint payload model. 92 | 93 | Returns: 94 | dict: Maintenance recommendation based on the inference result. 95 | """ 96 | try: 97 | # Validate inputs 98 | if not isinstance(payload.model_run_id, int) or payload.model_run_id <= 0: 99 | raise ValueError("Invalid model run ID. It should be a positive integer.") 100 | if ( 101 | not isinstance(payload.scaler_file_name, str) 102 | or not payload.scaler_file_name 103 | ): 104 | raise ValueError( 105 | "Invalid scaler file name. It should be a non-empty string." 106 | ) 107 | if ( 108 | not isinstance(payload.scaler_destination, str) 109 | or not payload.scaler_destination 110 | ): 111 | raise ValueError( 112 | "Invalid scaler destination. It should be a non-empty string." 113 | ) 114 | if not isinstance(payload.d4p_file_name, str) or not payload.d4p_file_name: 115 | raise ValueError("Invalid d4p file name. It should be a non-empty string.") 116 | if not isinstance(payload.d4p_destination, str) or not payload.d4p_destination: 117 | raise ValueError( 118 | "Invalid d4p destination. It should be a non-empty string." 119 | ) 120 | if not isinstance(payload.sample, list) or not payload.sample: 121 | raise ValueError("Invalid sample data. It should be a non-empty list.") 122 | 123 | sample = pd.json_normalize(payload.sample) 124 | results = inference( 125 | model_run_id=payload.model_run_id, 126 | scaler_file_name=payload.scaler_file_name, 127 | scaler_destination=payload.scaler_destination, 128 | d4p_file_name=payload.d4p_file_name, 129 | d4p_destination=payload.d4p_destination, 130 | data=sample, 131 | ) 132 | return {"msg": "Completed Analysis", "Maintenance Recommendation": results} 133 | except ValueError as e: 134 | logger.error(f"Validation error: {e}") 135 | raise HTTPException(status_code=400, detail=str(e)) 136 | except Exception as e: 137 | logger.error(f"Unexpected error: {e}") 138 | raise HTTPException(status_code=500, detail="Internal Server Error") 139 | 140 | 141 | if __name__ == "__main__": 142 | """ 143 | Main entry point for the server. 144 | 145 | This block runs the FastAPI application using Uvicorn. 146 | """ 147 | try: 148 | uvicorn.run("serve:app", host="127.0.0.1", port=5000, log_level="info") 149 | except Exception as e: 150 | logger.error(f"Failed to start server: {e}") 151 | -------------------------------------------------------------------------------- /MLOps_Professional/lab7/sample/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # pylint: disable=import-error 4 | 5 | """ 6 | Module to train and prediction using XGBoost Classifier 7 | """ 8 | 9 | import os 10 | import sys 11 | import logging 12 | import warnings 13 | import joblib 14 | import mlflow 15 | 16 | import numpy as np 17 | import xgboost as xgb 18 | import pandas as pd 19 | import daal4py as d4p 20 | 21 | from sklearn.model_selection import train_test_split 22 | from sklearn.preprocessing import RobustScaler 23 | from werkzeug.utils import secure_filename 24 | 25 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab7") 26 | 27 | logging.basicConfig(level=logging.DEBUG) 28 | logger = logging.getLogger(__name__) 29 | warnings.filterwarnings("ignore") 30 | 31 | 32 | class HarvesterMaintenance: 33 | def __init__(self, model_name: str): 34 | """ 35 | Initializes the HarvesterMaintenance class with default values. 36 | 37 | Args: 38 | model_name (str): Name of the model. 39 | """ 40 | self.model_name = model_name 41 | self.file = "" 42 | self.y_train = "" 43 | self.y_test = "" 44 | self.X_train_scaled_transformed = "" 45 | self.X_test_scaled_transformed = "" 46 | self.d4p_model = "" 47 | self.accuracy_scr = "" 48 | self.model_path = "" 49 | self.parameters = "" 50 | self.robust_scaler = "" 51 | self.run_id = "" 52 | self.active_experiment = "" 53 | 54 | def mlflow_tracking( 55 | self, 56 | tracking_uri: str = "./mlflow_tracking", 57 | experiment: str = None, 58 | new_experiment: str = None, 59 | ): 60 | """ 61 | Sets up MLFlow tracking. 62 | 63 | Args: 64 | tracking_uri (str, optional): URI for MLFlow tracking. Defaults to "./mlflow_tracking". 65 | experiment (str, optional): Name of the existing experiment. Defaults to None. 66 | new_experiment (str, optional): Name of the new experiment to create if no experiment is specified. Defaults to None. 67 | """ 68 | # sets tracking URI 69 | mlflow.set_tracking_uri(tracking_uri) 70 | 71 | # creates new experiment if no experiment is specified 72 | if experiment is None: 73 | mlflow.create_experiment(new_experiment) 74 | self.active_experiment = new_experiment 75 | mlflow.set_experiment(new_experiment) 76 | else: 77 | mlflow.set_experiment(experiment) 78 | self.active_experiment = experiment 79 | 80 | def process_data(self, file: str, test_size: float = 0.25): 81 | """Processes raw data for training. 82 | 83 | Args: 84 | file (str): Path to raw training data. 85 | test_size (float, optional): Percentage of data reserved for testing. Defaults to 0.25. 86 | """ 87 | # Validate file name 88 | if not isinstance(file, str) or not file.endswith(".parquet"): 89 | raise ValueError( 90 | "Invalid file name. It should be a string ending with '.parquet'" 91 | ) 92 | 93 | # Validate test size 94 | if not isinstance(test_size, float) or not (0 < test_size < 1): 95 | raise ValueError("Invalid test size. It should be a float between 0 and 1") 96 | 97 | # Generating our data 98 | logger.info("Reading the dataset from %s...", file) 99 | if not file.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 100 | raise ValueError( 101 | f"Path is not within the allowed directory {SAFE_BASE_DIR}" 102 | ) 103 | try: 104 | data = pd.read_parquet(file) 105 | if not isinstance(data, pd.DataFrame): 106 | sys.exit("Invalid data format") 107 | except Exception as e: 108 | sys.exit(f"Error reading dataset: {e}") 109 | 110 | X = data.drop("Asset_Label", axis=1) 111 | y = data.Asset_Label 112 | 113 | X_train, X_test, self.y_train, self.y_test = train_test_split( 114 | X, y, test_size=test_size 115 | ) 116 | 117 | df_num_train = X_train.select_dtypes(["float", "int", "int32"]) 118 | df_num_test = X_test.select_dtypes(["float", "int", "int32"]) 119 | self.robust_scaler = RobustScaler() 120 | X_train_scaled = self.robust_scaler.fit_transform(df_num_train) 121 | X_test_scaled = self.robust_scaler.transform(df_num_test) 122 | 123 | # Making them pandas dataframes 124 | X_train_scaled_transformed = pd.DataFrame( 125 | X_train_scaled, index=df_num_train.index, columns=df_num_train.columns 126 | ) 127 | X_test_scaled_transformed = pd.DataFrame( 128 | X_test_scaled, index=df_num_test.index, columns=df_num_test.columns 129 | ) 130 | 131 | del X_train_scaled_transformed["Number_Repairs"] 132 | del X_test_scaled_transformed["Number_Repairs"] 133 | 134 | # Dropping the unscaled numerical columns 135 | X_train = X_train.drop( 136 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 137 | ) 138 | X_test = X_test.drop( 139 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 140 | ) 141 | 142 | X_train = X_train.astype(int) 143 | X_test = X_test.astype(int) 144 | 145 | # Creating train and test data with scaled numerical columns 146 | X_train_scaled_transformed = pd.concat( 147 | [X_train_scaled_transformed, X_train], axis=1 148 | ) 149 | X_test_scaled_transformed = pd.concat( 150 | [X_test_scaled_transformed, X_test], axis=1 151 | ) 152 | 153 | self.X_train_scaled_transformed = X_train_scaled_transformed.astype( 154 | {"Motor_Current": "float64"} 155 | ) 156 | self.X_test_scaled_transformed = X_test_scaled_transformed.astype( 157 | {"Motor_Current": "float64"} 158 | ) 159 | 160 | def train(self, ncpu: int = 4): 161 | """Trains an XGBoost Classifier and tracks models with MLFlow. 162 | 163 | Args: 164 | ncpu (int, optional): Number of CPU threads used for training. Defaults to 4. 165 | """ 166 | # Validate ncpu 167 | if not isinstance(ncpu, int) or ncpu <= 0: 168 | raise ValueError("Invalid ncpu. It should be a positive integer.") 169 | 170 | # Set xgboost parameters 171 | self.parameters = { 172 | "max_bin": 256, 173 | "scale_pos_weight": 2, 174 | "lambda_l2": 1, 175 | "alpha": 0.9, 176 | "max_depth": 8, 177 | "num_leaves": 2**8, 178 | "verbosity": 0, 179 | "objective": "multi:softmax", 180 | "learning_rate": 0.3, 181 | "num_class": 3, 182 | "nthread": ncpu, 183 | } 184 | 185 | mlflow.xgboost.autolog() 186 | xgb_train = xgb.DMatrix( 187 | self.X_train_scaled_transformed, label=np.array(self.y_train) 188 | ) 189 | xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100) 190 | self.d4p_model = d4p.get_gbt_model_from_xgboost(xgb_model) 191 | 192 | # store run id for user in other methods 193 | xp = mlflow.get_experiment_by_name(self.active_experiment)._experiment_id 194 | self.run_id = mlflow.search_runs(xp, output_format="list")[0].info.run_id 195 | 196 | def validate(self) -> float: 197 | """Performs model validation with testing data. 198 | 199 | Returns: 200 | float: Accuracy metric. 201 | """ 202 | daal_predict_algo = d4p.gbt_classification_prediction( 203 | nClasses=self.parameters["num_class"], 204 | resultsToEvaluate="computeClassLabels", 205 | fptype="float", 206 | ) 207 | 208 | daal_prediction = daal_predict_algo.compute( 209 | self.X_test_scaled_transformed, self.d4p_model 210 | ) 211 | 212 | daal_errors_count = np.count_nonzero( 213 | daal_prediction.prediction[:, 0] - np.ravel(self.y_test) 214 | ) 215 | self.d4p_acc = abs( 216 | (daal_errors_count / daal_prediction.prediction.shape[0]) - 1 217 | ) 218 | 219 | print("=====> XGBoost Daal accuracy score %f", self.d4p_acc) 220 | print("DONE") 221 | return self.d4p_acc 222 | 223 | def save(self, model_path): 224 | """Logs scaler and d4p models as MLFlow artifacts. 225 | 226 | Args: 227 | model_path (str): Path where trained model should be saved. 228 | """ 229 | # Validate model path 230 | if not isinstance(model_path, str) or not model_path: 231 | raise ValueError("Invalid model path. It should be a non-empty string.") 232 | 233 | sanitized_model_path = secure_filename(model_path) 234 | self.model_path = os.path.normpath( 235 | os.path.join( 236 | SAFE_BASE_DIR, sanitized_model_path, self.model_name + ".joblib" 237 | ) 238 | ) 239 | self.model_path = os.path.abspath(self.model_path) 240 | if not self.model_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 241 | raise ValueError("Path is not within the allowed model directory.") 242 | 243 | self.scaler_path = os.path.normpath( 244 | os.path.join( 245 | SAFE_BASE_DIR, sanitized_model_path, self.model_name + "_scaler.joblib" 246 | ) 247 | ) 248 | self.scaler_path = os.path.abspath(self.scaler_path) 249 | if not self.scaler_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 250 | raise ValueError("Path is not within the allowed model directory.") 251 | 252 | logger.info("Saving model") 253 | with open(self.model_path, "wb") as fh: 254 | joblib.dump(self.d4p_model, fh.name) 255 | 256 | logger.info("Saving Scaler") 257 | with open(self.scaler_path, "wb") as fh: 258 | joblib.dump(self.robust_scaler, fh.name) 259 | 260 | logger.info("Saving Scaler and d4p model as MLFLow Artifact") 261 | with mlflow.start_run(self.run_id): 262 | mlflow.log_artifact(self.scaler_path) 263 | mlflow.log_artifact(self.model_path) 264 | -------------------------------------------------------------------------------- /MLOps_Professional/lab8/README.md: -------------------------------------------------------------------------------- 1 | # Lab 8 2 | 3 | Please follow instructions provided through the MLOps Professional Course content. 4 | -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.8@sha256:d411270700143fa2683cc8264d9fa5d3279fd3b6afff62ae81ea2f9d070e390c 2 | 3 | # Create a non-root user 4 | RUN useradd -m nonrootuser 5 | 6 | # Switch to the non-root user 7 | USER nonrootuser 8 | 9 | # copy assets over to image 10 | COPY . /pickerbot 11 | 12 | # set the working directory 13 | WORKDIR /pickerbot 14 | 15 | # install dependancies 16 | RUN apt-get update && apt-get install -y \ 17 | python3-dev \ 18 | build-essential 19 | 20 | # set PATH 21 | ENV PATH=.local/bin:$PATH 22 | 23 | # install python dependancies 24 | RUN pip3 install --user --no-cache-dir -r requirements.txt 25 | 26 | # exposing endpoint port 27 | EXPOSE 5000 28 | 29 | ENTRYPOINT ["python", "serve.py"] 30 | -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/PickerBot.py: -------------------------------------------------------------------------------- 1 | from langchain.chains import LLMChain 2 | from langchain.prompts import PromptTemplate 3 | from langchain.document_loaders import TextLoader 4 | from langchain.text_splitter import RecursiveCharacterTextSplitter 5 | from langchain.indexes import VectorstoreIndexCreator 6 | from langchain.embeddings import HuggingFaceEmbeddings 7 | from datasets import load_dataset 8 | 9 | import pandas as pd 10 | import time 11 | import os 12 | from typing import Any, Optional 13 | 14 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab8") 15 | 16 | 17 | class PickerBot: 18 | def __init__(self, data: str, model: Any, safe_root: str = SAFE_BASE_DIR): 19 | """ 20 | Initializes the PickerBot class with data and model. 21 | 22 | Args: 23 | data (str): Path to the data file. 24 | model (Any): The model to be used for inference. 25 | safe_root (str, optional): The root directory for safe file operations. Defaults to SAFE_BASE_DIR. 26 | """ 27 | self.safe_root = safe_root 28 | self.data = self._validate_path(data) 29 | self.model = model 30 | 31 | def _validate_path(self, path: str) -> str: 32 | """ 33 | Validates and normalizes the given path to ensure it is within the safe root directory. 34 | 35 | Args: 36 | path (str): The path to be validated. 37 | 38 | Returns: 39 | str: The normalized and validated path. 40 | 41 | Raises: 42 | ValueError: If the path is not within the safe root directory. 43 | """ 44 | # Normalize the path 45 | fullpath = os.path.normpath(os.path.join(self.safe_root, path)) 46 | # Ensure the path is within the safe root directory 47 | if not fullpath.startswith(self.safe_root): 48 | raise ValueError("Invalid path: Access denied") 49 | return fullpath 50 | 51 | def data_proc(self) -> None: 52 | """ 53 | Processes the data by downloading the customer service robot support dialogue from Hugging Face 54 | and saving it to a text file if it does not already exist. 55 | """ 56 | try: 57 | if not os.path.isfile(self.data): 58 | # Download the customer service robot support dialogue from hugging face 59 | dataset = load_dataset( 60 | "FunDialogues/customer-service-apple-picker-maintenance", 61 | cache_dir=None, 62 | ) 63 | 64 | # Convert the dataset to a pandas dataframe 65 | dialogues = dataset["train"] 66 | df = pd.DataFrame(dialogues, columns=["id", "description", "dialogue"]) 67 | 68 | # Print the first 5 rows of the dataframe 69 | df.head() 70 | 71 | # only keep the dialogue column 72 | dialog_df = df["dialogue"] 73 | 74 | # save the data to txt file 75 | dialog_df.to_csv(self.data, sep=" ", index=False) 76 | else: 77 | print("data already exists in path.") 78 | except Exception as e: 79 | print(f"Error processing data: {e}") 80 | raise 81 | 82 | def create_vectorstore(self, chunk_size: int = 500, overlap: int = 25) -> None: 83 | """ 84 | Creates a vector store for the data using text splitting and embeddings. 85 | 86 | Args: 87 | chunk_size (int, optional): The size of each text chunk. Defaults to 500. 88 | overlap (int, optional): The overlap between text chunks. Defaults to 25. 89 | """ 90 | try: 91 | loader = TextLoader(self.data) 92 | # Text Splitter 93 | text_splitter = RecursiveCharacterTextSplitter( 94 | chunk_size=chunk_size, chunk_overlap=overlap 95 | ) 96 | # Embed the document and store into chroma DB 97 | self.index = VectorstoreIndexCreator( 98 | embedding=HuggingFaceEmbeddings(), text_splitter=text_splitter 99 | ).from_loaders([loader]) 100 | except Exception as e: 101 | print(f"Error creating vector store: {e}") 102 | raise 103 | 104 | def inference( 105 | self, user_input: str, context_verbosity: bool = False, top_k: int = 2 106 | ) -> str: 107 | """ 108 | Performs inference using the provided user input and model. 109 | 110 | Args: 111 | user_input (str): The user input for the inference. 112 | context_verbosity (bool, optional): Whether to print the retrieved context information. Defaults to False. 113 | top_k (int, optional): The number of top similar documents to retrieve. Defaults to 2. 114 | 115 | Returns: 116 | str: The processed response from the model. 117 | """ 118 | try: 119 | # perform similarity search and retrieve the context from our documents 120 | results = self.index.vectorstore.similarity_search(user_input, k=top_k) 121 | # join all context information into one string 122 | context = "\n".join([document.page_content for document in results]) 123 | if context_verbosity: 124 | print(f"Retrieving information related to your question...") 125 | print( 126 | f"Found this content which is most similar to your question: {context}" 127 | ) 128 | 129 | template = """ 130 | Please use the following apple picker technical support related questions to answer questions. 131 | Context: {context} 132 | --- 133 | This is the user's question: {question} 134 | Answer: This is what our auto apple picker technical expert suggest.""" 135 | 136 | prompt = PromptTemplate( 137 | template=template, input_variables=["context", "question"] 138 | ).partial(context=context) 139 | 140 | llm_chain = LLMChain(prompt=prompt, llm=self.model) 141 | 142 | print("Processing the information with gpt4all...\n") 143 | start_time = time.time() 144 | response = llm_chain.run(user_input) 145 | elapsed_time_milliseconds = (time.time() - start_time) * 1000 146 | 147 | tokens = len(response.split()) 148 | time_per_token_milliseconds = ( 149 | elapsed_time_milliseconds / tokens if tokens != 0 else 0 150 | ) 151 | 152 | processed_reponse = ( 153 | response 154 | + f" --> {time_per_token_milliseconds:.4f} milliseconds/token AND Time taken for response: {elapsed_time_milliseconds:.2f} milliseconds" 155 | ) 156 | 157 | return processed_reponse 158 | except Exception as e: 159 | print(f"Error during inference: {e}") 160 | raise 161 | -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/certified-developer/9b0c167ef393db17c62c11ba46d4b73607f81fee/MLOps_Professional/lab8/sample/__init__.py -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class GenPayload(BaseModel): 5 | """ 6 | Data model for generation payload. 7 | 8 | Attributes: 9 | data (str): The data to be used for generation. 10 | user_input (str): The user input for the generation process. 11 | """ 12 | 13 | data: str 14 | user_input: str 15 | -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/models/pickerbot/README.md: -------------------------------------------------------------------------------- 1 | ## directory for model storage -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.3.25 2 | pygpt4all==1.1.0 3 | gpt4all==2.8.2 4 | transformers==4.51.3 5 | datasets==3.5.0 6 | tiktoken==0.9.0 7 | chromadb==1.0.8 8 | sentence_transformers==4.1.0 9 | -------------------------------------------------------------------------------- /MLOps_Professional/lab8/sample/serve.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import os 3 | import requests 4 | 5 | from tqdm import tqdm 6 | from langchain.llms import GPT4All 7 | from fastapi import FastAPI, HTTPException 8 | from model import GenPayload 9 | from PickerBot import PickerBot 10 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 11 | 12 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "lab8") 13 | 14 | app = FastAPI() 15 | 16 | 17 | def load_gpt4allj( 18 | model_path: str, 19 | n_threads: int = 6, 20 | max_tokens: int = 50, 21 | repeat_penalty: float = 1.20, 22 | n_batch: int = 6, 23 | top_k: int = 1, 24 | timeout: int = 90, # Timeout in seconds 25 | ) -> GPT4All: 26 | """ 27 | Loads the GPT4All model, downloading it if necessary. 28 | 29 | Args: 30 | model_path (str): Path to the model file. 31 | n_threads (int, optional): Number of threads to use. Defaults to 6. 32 | max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 50. 33 | repeat_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.20. 34 | n_batch (int, optional): Batch size for processing. Defaults to 6. 35 | top_k (int, optional): Number of top tokens to consider. Defaults to 1. 36 | timeout (int, optional): Timeout for downloading the model in seconds. Defaults to 90. 37 | 38 | Returns: 39 | GPT4All: Loaded GPT4All model. 40 | """ 41 | if not os.path.isfile(model_path): 42 | # download model 43 | url = "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin" 44 | # send a GET request to the URL to download the file. Stream since it's large 45 | try: 46 | response = requests.get(url, stream=True, timeout=timeout) 47 | response.raise_for_status() 48 | except requests.RequestException as e: 49 | raise RuntimeError(f"Failed to download model: {e}") 50 | 51 | # open the file in binary mode and write the contents of the response to it in chunks 52 | # This is a large file, so be prepared to wait. 53 | try: 54 | with open(model_path, "wb") as f: 55 | for chunk in tqdm(response.iter_content(chunk_size=10000)): 56 | if chunk: 57 | f.write(chunk) 58 | except Exception as e: 59 | raise RuntimeError(f"Failed to save model: {e}") 60 | else: 61 | print("model already exists in path.") 62 | 63 | # Callbacks support token-wise streaming 64 | callbacks = [StreamingStdOutCallbackHandler()] 65 | # Verbose is required to pass to the callback manager 66 | llm = GPT4All( 67 | model=model_path, 68 | callbacks=callbacks, 69 | verbose=True, 70 | n_threads=n_threads, 71 | n_predict=max_tokens, 72 | repeat_penalty=repeat_penalty, 73 | n_batch=n_batch, 74 | top_k=top_k, 75 | ) 76 | 77 | return llm 78 | 79 | 80 | gptj = load_gpt4allj( 81 | model_path="./models/pickerbot/ggml-model-gpt4all-falcon-q4_0.bin", 82 | n_threads=15, 83 | max_tokens=100, 84 | repeat_penalty=1.20, 85 | n_batch=15, 86 | top_k=1, 87 | ) 88 | 89 | 90 | @app.get("/ping") 91 | async def ping() -> dict: 92 | """ 93 | Ping the server to check its status. 94 | 95 | Returns: 96 | dict: A response indicating the server's health status. 97 | """ 98 | return {"message": "Server is Running"} 99 | 100 | 101 | @app.post("/predict") 102 | async def predict(payload: GenPayload) -> dict: 103 | """ 104 | Prediction Endpoint 105 | 106 | Args: 107 | payload (GenPayload): Prediction endpoint payload model. 108 | 109 | Returns: 110 | dict: PickerBot response based on the inference result. 111 | """ 112 | try: 113 | # Validate inputs 114 | if not isinstance(payload.data, str) or not payload.data: 115 | raise ValueError("Invalid data path. It should be a non-empty string.") 116 | if not isinstance(payload.user_input, str) or not payload.user_input: 117 | raise ValueError("Invalid user input. It should be a non-empty string.") 118 | 119 | bot = PickerBot(payload.data, model=gptj, safe_root=SAFE_BASE_DIR) 120 | bot.data_proc() 121 | bot.create_vectorstore() 122 | response = bot.inference(user_input=payload.user_input) 123 | return { 124 | "msg": "Sim Search and Inference Complete", 125 | "PickerBot Response": response, 126 | } 127 | except ValueError as e: 128 | raise HTTPException(status_code=400, detail=str(e)) 129 | except Exception as e: 130 | raise HTTPException(status_code=500, detail="Internal Server Error") 131 | 132 | 133 | if __name__ == "__main__": 134 | """ 135 | Main entry point for the server. 136 | 137 | This block runs the FastAPI application using Uvicorn. 138 | """ 139 | try: 140 | uvicorn.run("serve:app", host="127.0.0.1", port=5000, log_level="info") 141 | except Exception as e: 142 | logger.error(f"Failed to start server: {e}") 143 | -------------------------------------------------------------------------------- /MLOps_Professional/lab9/README.md: -------------------------------------------------------------------------------- 1 | # Lab 9 2 | 3 | Please follow instructions provided through the MLOps Professional Course content. 4 | -------------------------------------------------------------------------------- /MLOps_Professional/lab9/sample/Part I - Leveraging Intel Optimizations with Hugging Face for Enhanced Model Performance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "0541c12e-12f7-4d23-bc14-0590672c6979", 7 | "metadata": {}, 8 | "source": [ 9 | "# Leveraging Intel Optimizations with Hugging Face for Enhanced Model Performance\n", 10 | "\n", 11 | "\"Alt\n", 12 | "\n", 13 | "This notebook serves as an introduction to utilizing IPEX for fine-tuning a pre-trained model, specifically focusing on the `distilbert-base-uncased` model for multi-class emotion classification in text. In part two of this lab you will learn how to contribute the open source Hugging Face model hub. \n", 14 | "\n", 15 | "## Why This is Important\n", 16 | "\n", 17 | "Understanding how to leverage Intel optimizations is crucial for developers looking to maximize computational efficiency and performance. By integrating IPEX with Hugging Face's API, we can significantly enhance training speeds, especially when utilizing mixed precision training with FP32 and BF16. This notebook will demonstrate these capabilities practically, offering insights into:\n", 18 | "\n", 19 | "- How to enable IPEX within Hugging Face's `TrainingArguments` and training functions.\n", 20 | "- Comparing training speeds and efficiencies between IPEX-enabled and standard training processes.\n", 21 | "- Performing inference to assess the model's accuracy in classifying emotions.\n", 22 | "\n", 23 | "## Acquiring the Learnings\n", 24 | "\n", 25 | "Through step-by-step instructions, hands-on examples, and comparative analyses, this workshop will equip you with the skills to effectively integrate Intel's optimizations into your NLP projects using Hugging Face. Let's dive into the world where cutting-edge language processing meets optimized computational performance.\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "id": "3501a047-ca9b-41d9-a1ca-ec3e1613c573", 31 | "metadata": {}, 32 | "source": [ 33 | "#### Environment Setup and Dependencies Installation\n", 34 | "\n", 35 | "This cell prepares our working environment. It sources Intel oneAPI for optimal performance on Intel hardware (optional based on your setup) and installs specific versions of essential libraries: `transformers`, `torch`, and `intel_extension_for_pytorch`. These installations ensure we have the necessary tools to leverage Intel's optimizations." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "id": "175e5202-27ad-47a8-9906-e831ad51db6e", 42 | "metadata": { 43 | "scrolled": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "!source /opt/intel/oneapi/setvars.sh #comment out if not running on Intel® Tiber™ AI Cloud Jupyter\n", 48 | "!pip install transformers==4.35.2\n", 49 | "!pip install torch==2.1.0\n", 50 | "!pip install intel_extension_for_pytorch==2.1.0\n", 51 | "!pip install datasets==2.16.1\n", 52 | "!pip install accelerate==0.26.0" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "0a6d9300-aa72-4cb2-bfd9-ee5c2ca391af", 58 | "metadata": {}, 59 | "source": [ 60 | "#### Loading Libraries and Packages\n", 61 | "\n", 62 | "In this cell, we import the core libraries that will be used throughout the notebook. This setup is crucial as it prepares our Python environment with all the necessary tools for our tasks.\n", 63 | "\n", 64 | "- `from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments`: We import key components from the Hugging Face Transformers library. `AutoModelForSequenceClassification` and `AutoTokenizer` are used for loading the model and tokenizer, respectively. `Trainer` and `TrainingArguments` are essential for setting up and running our model training.\n", 65 | "- `from datasets import load_dataset`: This import from the `datasets` library allows us to easily load and preprocess datasets available in Hugging Face's datasets hub.\n", 66 | "- `import numpy as np`: Numpy is a fundamental package for scientific computing in Python. It provides support for arrays, mathematical operations, and various utility functions.\n", 67 | "- `from sklearn.metrics import accuracy_score`: We import the `accuracy_score` function from Scikit-Learn to calculate the accuracy of our model predictions during evaluation. This metric will help us quantify the performance of our fine-tuned model.\n", 68 | "\n", 69 | "Overall, this cell lays the foundation for our machine learning tasks by equipping us with the necessary libraries and modules.t." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "abc4acc2-5808-449f-a55b-bd3484ad3236", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "# Import necessary libraries\n", 80 | "from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments\n", 81 | "from datasets import load_dataset\n", 82 | "import numpy as np\n", 83 | "from sklearn.metrics import accuracy_score\n", 84 | "import torch " 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "eea22e9f-4e4e-4672-bdba-b264f5893d41", 90 | "metadata": {}, 91 | "source": [ 92 | "#### Dataset Loading\n", 93 | "\n", 94 | "Here, we load the `emotion` dataset from Hugging Face's datasets library. This dataset will be used for training and evaluating our DistilBERT model, providing a practical context for emotion classification in text.k..\n" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "89638882-e384-46a7-93c4-de9b60d33e61", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Load the dataset\n", 105 | "dataset = load_dataset(\"emotion\")" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "id": "5efc4f80-7806-4bc8-b251-dd3d8e1d5189", 111 | "metadata": {}, 112 | "source": [ 113 | "#### Model and Tokenizer Initialization\n", 114 | "\n", 115 | "In this cell, we initialize the `distilbert-base-uncased` model and its corresponding tokenizer for sequence classification. This setup is the first step in preparing our model for fine-tuning on the emotion classification task..\n" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "id": "882dcc84-5752-489f-9e0d-adcd1b8201a3", 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Load a pre-trained BERT model and tokenizer\n", 126 | "model_name = \"distilbert-base-uncased\"\n", 127 | "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)\n", 128 | "tokenizer = AutoTokenizer.from_pretrained(model_name)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "a6ad31f1-2669-4018-9629-ff6ba02af7d3", 134 | "metadata": {}, 135 | "source": [ 136 | "#### Data Preprocessing\n", 137 | "\n", 138 | "Data preprocessing is essential for model training. We define and apply a preprocessing function that tokenizes our text data, making it compatible with the DistilBERT model's input requirements.\n", 139 | ".\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "id": "114b861c-54c8-4a43-95dc-94975ac1ca72", 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "def preprocess_function(examples):\n", 150 | " \"\"\"\n", 151 | " Tokenizes the input text data.\n", 152 | "\n", 153 | " Args:\n", 154 | " examples (dict): A dictionary containing the text data to be tokenized.\n", 155 | "\n", 156 | " Returns:\n", 157 | " dict: A dictionary with tokenized text data, padded and truncated to the maximum length.\n", 158 | " \"\"\"\n", 159 | " return tokenizer(examples['text'], padding='max_length', truncation=True)\n", 160 | "\n", 161 | "# Apply preprocessing\n", 162 | "encoded_dataset = dataset.map(preprocess_function, batched=True)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "id": "9ecf58cd-55a0-4bec-88e5-930beecf5e78", 168 | "metadata": {}, 169 | "source": [ 170 | "#### Training with IPEX\n", 171 | "\n", 172 | "This cell is where the integration of Intel Extension for PyTorch (IPEX) comes into play. We define training arguments, including enabling BF16 and IPEX, and set up our Hugging Face trainer. The model is then trained on the emotion dataset, utilizing the enhanced capabilities provided by IPEX..\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "id": "cb0798d9-7d90-41c0-ac7b-4d1b0370873d", 179 | "metadata": { 180 | "scrolled": true 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "import intel_extension_for_pytorch as ipex\n", 185 | "\n", 186 | "# Define training arguments\n", 187 | "training_args = TrainingArguments(\n", 188 | " output_dir=\"./results\",\n", 189 | " learning_rate=2e-5,\n", 190 | " per_device_train_batch_size=16,\n", 191 | " per_device_eval_batch_size=16,\n", 192 | " num_train_epochs=1,\n", 193 | " weight_decay=0.01,\n", 194 | " evaluation_strategy=\"epoch\",\n", 195 | " bf16=True, \n", 196 | " use_ipex=True,\n", 197 | " no_cuda=True,\n", 198 | ")\n", 199 | "\n", 200 | "# Define the trainer\n", 201 | "def compute_metrics(pred):\n", 202 | " \"\"\"\n", 203 | " Computes accuracy metrics for the model predictions.\n", 204 | "\n", 205 | " Args:\n", 206 | " pred (transformers.EvalPrediction): A namedtuple with two fields:\n", 207 | " - predictions: The predicted labels by the model.\n", 208 | " - label_ids: The true labels.\n", 209 | "\n", 210 | " Returns:\n", 211 | " dict: A dictionary containing the accuracy of the predictions.\n", 212 | " \"\"\"\n", 213 | " labels = pred.label_ids\n", 214 | " preds = np.argmax(pred.predictions, axis=-1)\n", 215 | " return {'accuracy': accuracy_score(labels, preds)}\n", 216 | "\n", 217 | "trainer = Trainer(\n", 218 | " model=model,\n", 219 | " args=training_args,\n", 220 | " train_dataset=encoded_dataset[\"train\"],\n", 221 | " eval_dataset=encoded_dataset[\"validation\"],\n", 222 | " compute_metrics=compute_metrics,\n", 223 | ")\n", 224 | "\n", 225 | "# Train the model\n", 226 | "trainer.train()" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "id": "f1046c73-336d-4edb-9210-71a50571583e", 232 | "metadata": {}, 233 | "source": [ 234 | "#### Model Evaluation\n", 235 | "\n", 236 | "Post-training, we evaluate the model's performance on the validation dataset. This evaluation will give us insights into the effectiveness of our training and the accuracy of the model in emotion classification." 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "id": "0d7678f5-385d-44fc-857b-c501712ae8bc", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "trainer.evaluate()" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "id": "a0a7c017-dcdf-4d79-8441-0eb3eb9ae50f", 252 | "metadata": {}, 253 | "source": [ 254 | "#### Inference and Testing\n", 255 | "\n", 256 | "Finally, we test the fine-tuned model's inference capabilities on new sentences. This step involves preprocessing the test sentences, performing predictions, and mapping these predictions to human-readable labels. It allows us to visually inspect the model's ability to classify emotions in various text inputs." 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "id": "300ea670-26d0-439c-8b1f-e2156ba8f9eb", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "# Define test sentences\n", 267 | "test_sentences = [\n", 268 | " \"I am feeling incredibly happy and joyful today!\",\n", 269 | " \"I am so sad and down.\",\n", 270 | " \"I have mixed feelings about this.\",\n", 271 | " \"This is absolutely terrifying!\",\n", 272 | "]\n", 273 | "\n", 274 | "# Preprocess the test sentences\n", 275 | "encoded_input = tokenizer(test_sentences, padding=True, truncation=True, max_length=128, return_tensors='pt')\n", 276 | "\n", 277 | "# Predict using the fine-tuned model\n", 278 | "with torch.no_grad():\n", 279 | " predictions = model(**encoded_input)\n", 280 | "\n", 281 | "# Convert predictions to human-readable labels\n", 282 | "predicted_labels = np.argmax(predictions.logits.numpy(), axis=1)\n", 283 | "\n", 284 | "# Mapping for the 'emotion' dataset labels\n", 285 | "label_map = {0: \"sadness\", 1: \"joy\", 2: \"love\", 3: \"anger\", 4: \"fear\", 5: \"surprise\"}\n", 286 | "\n", 287 | "# Print predictions\n", 288 | "for sentence, label_id in zip(test_sentences, predicted_labels):\n", 289 | " print(f\"Sentence: '{sentence}' - Emotion Prediction: {label_map[label_id]}\")\n" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "id": "46160e25-50d6-48b6-8353-851320c95794", 295 | "metadata": {}, 296 | "source": [ 297 | "# Conclusion and Discussion\n", 298 | "\n", 299 | "### Conclusion\n", 300 | "\n", 301 | "Throughout this workshop, we have explored the integration of Intel optimizations with Hugging Face's powerful Transformers library. By fine-tuning the DistilBERT model with the support of Intel Extension for PyTorch, we observed enhanced training speeds and efficient utilization of computational resources, especially notable in mixed precision training scenarios.\n", 302 | "\n", 303 | "### Discussion\n", 304 | "\n", 305 | "The exercise showcased not only the technical prowess of combining Hugging Face with Intel optimizations but also highlighted the practical benefits such as reduced training times and resource efficiency. This understanding is pivotal for developers working on NLP tasks, seeking to optimize model performance on Intel hardware. As AI and NLP continue to evolve, harnessing these optimizations will be key in developing more efficient and powerful AI applications." 306 | ] 307 | } 308 | ], 309 | "metadata": { 310 | "kernelspec": { 311 | "display_name": "base", 312 | "language": "python", 313 | "name": "base" 314 | }, 315 | "language_info": { 316 | "codemirror_mode": { 317 | "name": "ipython", 318 | "version": 3 319 | }, 320 | "file_extension": ".py", 321 | "mimetype": "text/x-python", 322 | "name": "python", 323 | "nbconvert_exporter": "python", 324 | "pygments_lexer": "ipython3", 325 | "version": "3.9.16" 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 5 330 | } 331 | -------------------------------------------------------------------------------- /MLOps_Professional/lab9/sample/Part II - Uploading and Sharing Models on Hugging Face Hub with Intel Optimizations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "915234aa-2e83-4430-bd4c-06edcdb19bc9", 6 | "metadata": {}, 7 | "source": [ 8 | "# Uploading and Sharing Models on Hugging Face Hub with Intel Optimizations\n", 9 | "\n", 10 | "\"Alt\n", 11 | "\n", 12 | "\n", 13 | "Here, we'll learn how to take a model trained using Hugging Face APIs with the Intel Extension for PyTorch and upload it to the Hugging Face Hub. \n", 14 | "\n", 15 | "## Why This is Important\n", 16 | "\n", 17 | "Sharing models on platforms like Hugging Face Hub not only contributes to the open-source community but also allows for wider testing, evaluation, and improvement of models by others. This process is critical for collaborative development and advancing the field of AI.\n", 18 | "\n", 19 | "### Key Learning Points\n", 20 | "\n", 21 | "- **Model Upload**: We will go through the steps of uploading our trained model to the Hugging Face Hub.\n", 22 | "- **Creating a Model Card**: A model card is crucial for documenting our model. It provides information about the model's purpose, architecture, and training data, guiding other users in understanding and using the model effectively.\n", 23 | "- **Open Sourcing the Model**: By open-sourcing our model, we contribute to the community and enable collective advancements in AI and NLP.\n", 24 | "- **Evaluation on Hugging Face Hub**: We'll also look at how our model can be evaluated directly on the Hugging Face Hub.\n", 25 | "\n", 26 | "### Prerequisites\n", 27 | "\n", 28 | "- A Hugging Face account and a token with write permissions are necessary to upload models to the Hub.\n", 29 | "\n", 30 | "Let's start by setting up our environment and preparing our model for upload to the Hugging Face Hub.\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "fc69d348-ba24-4ec5-ac59-3cfe5a3726cd", 36 | "metadata": {}, 37 | "source": [ 38 | "### Logging in to Hugging Face\n", 39 | "\n", 40 | "Before uploading the model, you need to authenticate with Hugging Face. Ensure you have an account and are logged in. The `notebook_login` function provides an easy way to log in for notebook environments.\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "3d4a3f02-32da-46b5-aebb-f2fb5e81e1de", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "from huggingface_hub import notebook_login, Repository\n", 51 | "\n", 52 | "# Login to Hugging Face\n", 53 | "notebook_login()" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "id": "10831457-cded-48fb-a3a7-e6f4a842b900", 59 | "metadata": {}, 60 | "source": [ 61 | "#### Model and Tokenizer Loading\n", 62 | "\n", 63 | "In this cell, we load our trained model and tokenizer:\n", 64 | "- We use `AutoModelForSequenceClassification` and `AutoTokenizer` to load the model and tokenizer. The model is loaded from a saved checkpoint, while the tokenizer is loaded using the base DistilBERT tokenizer.\n", 65 | "- `checkpoint_path` should be set to the path where your model checkpoint is saved. Always target the last checkpoint if the model was trained succesfully." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "f7d7afd2-3e03-4e74-8674-b5e0426680b1", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", 76 | "\n", 77 | "# Define the path to the checkpoint\n", 78 | "checkpoint_path = r\"./results/checkpoint-2000\" # Replace with your checkpoint folder\n", 79 | "\n", 80 | "# Load the model\n", 81 | "model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)\n", 82 | "\n", 83 | "# Load the tokenizer\n", 84 | "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "62dd0d4d-03e3-432e-9e65-ddcc6c8637ac", 90 | "metadata": {}, 91 | "source": [ 92 | "#### Saving and Uploading the Model and Tokenizer\n", 93 | "\n", 94 | "Here, we prepare and upload the model and tokenizer to the Hugging Face Hub:\n", 95 | "- The model and tokenizer are saved locally with the names specified in `model_name_on_hub`.\n", 96 | "- `push_to_hub` methods are used to upload both the model and tokenizer to the Hugging Face Hub under your username." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "f4c59a28-8158-42c6-97f1-2723919d82ac", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# Save the model and tokenizer\n", 107 | "model_name_on_hub = \"desired-model-name\"\n", 108 | "model.save_pretrained(model_name_on_hub)\n", 109 | "tokenizer.save_pretrained(model_name_on_hub)\n", 110 | "\n", 111 | "# Push to the hub\n", 112 | "model.push_to_hub(model_name_on_hub)\n", 113 | "tokenizer.push_to_hub(model_name_on_hub)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "id": "3b3b157a-2b48-44a5-8fd4-0a8de8f812b9", 119 | "metadata": {}, 120 | "source": [ 121 | "#### Model Uploaded to Hugging Face Model Hub\n", 122 | "\n", 123 | "Congratulations! Your fine-tuned model is now uploaded to the Hugging Face Model Hub. You can view and share your model using its URL: `https://huggingface.co/your-username/your-model-name`\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "id": "9cace9d1-1c22-4e8b-9210-08899c121ca3", 129 | "metadata": {}, 130 | "source": [ 131 | "## Creating and Uploading the Model Card\n", 132 | "\n", 133 | "A model card is a critical document that provides information about the model's purpose, creation, and usage. It enhances transparency and helps users understand and use the model appropriately. Here is a template below\n", 134 | "\n", 135 | "Check out this example model card: https://huggingface.co/eduardo-alvarez/distilbert-emotions-clf/blob/main/README.md" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "7660c612-5fea-43b9-b34c-6126e95db32a", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "model_card_content = \"\"\"\n", 146 | "# Model Card for My Fine-Tuned Model\n", 147 | "\n", 148 | "## Model Description\n", 149 | "- **Purpose**: [Describe the purpose of your model. What task does it perform?]\n", 150 | "- **Model architecture**: [Specify the architecture, e.g., BERT, GPT-2, etc.]\n", 151 | "- **Training data**: [Briefly describe the dataset used for training. Include any data cleaning or preprocessing steps.]\n", 152 | "\n", 153 | "## Intended Use\n", 154 | "- **Intended users**: [Who are the intended users of the model?]\n", 155 | "- **Use cases**: [Describe potential use cases for the model.]\n", 156 | "\n", 157 | "## Limitations\n", 158 | "- **Known limitations**: [Mention any known limitations of the model.]\n", 159 | "\n", 160 | "## Hardware \n", 161 | "- **Training Platform**: [Describe details about the systems and platform used to train the model.]\n", 162 | "\n", 163 | "## Software Optimizations\n", 164 | "- **Known Optimizations**: [Describe details about any optimizations used during training.]\n", 165 | "\n", 166 | "## Ethical Considerations\n", 167 | "- **Ethical concerns**: [Discuss any ethical concerns related to the use of your model.]\n", 168 | "\n", 169 | "## More Information\n", 170 | "- [Include any additional information, links, or references.]\n", 171 | "\n", 172 | "\"\"\"" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "id": "8e9fde36-7c64-47e4-9a9f-02a1879e17a1", 178 | "metadata": {}, 179 | "source": [ 180 | "If you have git-lfs installed, you can try uploading directly. Git-LFS is not available in the Intel® Tiber™ AI Cloud free notebook environments at the time of creating this tutorial (1/30/24)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "id": "6f65be83-978f-448f-b0de-3c0fcb1109e7", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "# Write the model card content to a file\n", 191 | "model_card_filename = f\"{model_name_on_hub}/README.md\"\n", 192 | "with open(model_card_filename, \"w\") as file:\n", 193 | " file.write(model_card_content)\n", 194 | "\n", 195 | "# Push the model card to the hub\n", 196 | "repo = Repository(model_name_on_hub, clone_from=model_name_on_hub)\n", 197 | "repo.push_to_hub(commit_message=\"Add model card\")" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "id": "65bf9282-e2f0-4331-a558-7637871c5109", 203 | "metadata": {}, 204 | "source": [ 205 | "# Conclusion and Discussion\n", 206 | "\n", 207 | "### Conclusion\n", 208 | "\n", 209 | "In this section of the workshop, we successfully uploaded a model to the Hugging Face Hub. This process included logging into Hugging Face, loading the model and tokenizer, saving them with appropriate names, and finally pushing them to the Hub. #\n", 210 | "\n", 211 | "## Discussion\n", 212 | "\n", 213 | "The ability to share models via platforms like Hugging Face Hub is invaluable in the field of AI and ML. It not only fosters collaboration and open-source contributions but also provides a platform for model evaluation and improvement by the community. Uploading models with detailed documentation and model cards ensures transparency and usability, paving the way for future advancements and applications.\n" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "base", 220 | "language": "python", 221 | "name": "base" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.9.16" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 5 238 | } 239 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/README.md: -------------------------------------------------------------------------------- 1 | ## Code Snippets for Completing Capstone 2 | 3 | ```python 4 | # Part 1 - Connecting Training FE/BE Target File: RoboMaintenance.py 5 | if st.button('Train Model', key='training'): 6 | # build request 7 | 8 | URL = 'http://localhost:5000/train' 9 | DATA = {'file':data_file, 'model_name':model_name, 'model_path':model_path, 10 | 'test_size': test_size, 'ncpu': 4, 'mlflow_tracking_uri':mlflow_tracking_uri, 11 | 'mlflow_new_experiment':mlflow_new_experiment, 'mlflow_experiment':mlflow_experiment} 12 | TRAINING_RESPONSE = requests.post(url = URL, json = DATA) 13 | 14 | if len(TRAINING_RESPONSE.text) < 40: 15 | st.error("Model Training Failed") 16 | st.info(TRAINING_RESPONSE.text) 17 | else: 18 | st.success('Training was Succesful') 19 | st.info('Model Validation Accuracy Score: ' + str(TRAINING_RESPONSE.json().get('validation scores'))) 20 | 21 | # Part 2 - Connecting Inference FE/BE Target File: RoboMaintenance.py 22 | if st.button('Run Maintenance Analysis', key='analysis'): 23 | URL = 'http://localhost:5000/predict' 24 | DATA = {'model_name':model_name, 'stage':stage, 'sample':sample, 25 | 'model_run_id':model_run_id, 'scaler_file_name':scaler_file_name, 'scaler_destination':scaler_destination} 26 | INFERENCE_RESPONSE = requests.post(url = URL, json = DATA) 27 | 28 | if len(INFERENCE_RESPONSE.text) < 40: 29 | st.error("Model Inference Failed") 30 | st.info(INFERENCE_RESPONSE.text) 31 | else: 32 | st.success(str(INFERENCE_RESPONSE.json().get('Maintenance Recommendation'))) 33 | 34 | # Part 3 - Add Monitoring Log File Logic Target File: inference.py 35 | current_datetime = datetime.datetime.now() 36 | current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S") 37 | data_dict = {'model':model_name, 'stage': stage, 38 | 'model_run_id': model_run_id, 'scaler_file_name': scaler_file_name, 39 | 'prediction': prediction, 'inference_time': elapsed_time_milliseconds, 'datetime': current_datetime_str} 40 | file_path = scaler_destination + '/monitoring.csv' 41 | 42 | 43 | if os.path.isfile(file_path): 44 | df = pd.read_csv(file_path) 45 | else: 46 | df = pd.DataFrame(columns=data_dict.keys()) 47 | 48 | df = pd.concat([df, pd.DataFrame(data_dict, index=[0])], ignore_index=True) 49 | df.to_csv(file_path, index=False) 50 | 51 | # Part 4 - Add Monitoring Logic Target File: Monitoring.py 52 | df = pd.read_csv(r'/home/ubuntu/certified-developer/MLOps_Professional/mlops_capstone/store/outputs/robot_maintenance/monitoring.csv') 53 | df 54 | 55 | st.line_chart(data=df, x='datetime', y='inference_time') 56 | 57 | st.scatter_chart(data=df, x='datetime', y='prediction') 58 | 59 | fig, ax = plt.subplots() 60 | ax.hist(df['prediction']) 61 | st.pyplot(fig) 62 | ``` 63 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.8@sha256:d411270700143fa2683cc8264d9fa5d3279fd3b6afff62ae81ea2f9d070e390c 2 | 3 | # Create a non-root user 4 | RUN useradd -m nonrootuser 5 | 6 | # Switch to the non-root user 7 | USER nonrootuser 8 | 9 | # copy assets over to image 10 | COPY . /app_frontend 11 | 12 | # set the working directory 13 | WORKDIR /app_frontend 14 | 15 | # install dependancies 16 | RUN pip3 install --user --no-cache-dir -r requirements.txt 17 | 18 | # set PATH 19 | ENV PATH="/root/.local/bin:${PATH}" 20 | 21 | # exposing endpoint port 22 | EXPOSE 5005 23 | 24 | ENTRYPOINT ["streamlit", "run", "Home.py", "--server.port", "5005", "--server.address", "0.0.0.0"] -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/Home.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from PIL import Image 3 | 4 | 5 | st.title('The Prototype') 6 | st.header('Pharmaceutical Manufacturing Business') 7 | st.markdown('Building a Prototype for the MLOps Certifcation Capstone Project.') 8 | 9 | st.divider() 10 | 11 | col1, col2 = st.columns(2) 12 | 13 | with col1: 14 | st.subheader("Robotics Maintenance") 15 | forecasting_image = Image.open('./assets/robot_arm.png') 16 | st.image(forecasting_image) 17 | st.caption( 18 | 'Computer vision quality inspection tool to flag and remove bad pills from production line') 19 | 20 | with col2: 21 | st.subheader('Monitoring Dashboard') 22 | forecasting_image = Image.open('./assets/stats.png') 23 | st.image(forecasting_image) 24 | st.caption( 25 | 'Customer support chatbot based on pre-trained gpt-j large language model') 26 | 27 | st.divider() 28 | 29 | st.markdown('##### Notices & Disclaimers') 30 | st.caption('Performance varies by use, configuration, and other factors. Learn more on the Performance \ 31 | Index site. Performance results are based on testing as of dates shown in configurations and may not\ 32 | reflect all publicly available updates. See backup for configuration details. No product or component\ 33 | can be absolutely secure. Your costs and results may vary. Intel technologies may require enabled\ 34 | hardware, software, or service activation. © Intel Corporation. Intel, the Intel logo, and other\ 35 | Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may\ 36 | be claimed as the property of others.') 37 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/README.md: -------------------------------------------------------------------------------- 1 | ## Visit the **/setup** directory for instructions on launching the frontend 2 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/assets/robot_arm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/certified-developer/9b0c167ef393db17c62c11ba46d4b73607f81fee/MLOps_Professional/mlops_capstone/app_frontend/assets/robot_arm.png -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/assets/stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/certified-developer/9b0c167ef393db17c62c11ba46d4b73607f81fee/MLOps_Professional/mlops_capstone/app_frontend/assets/stats.png -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/pages/Monitoring.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import requests 3 | import os 4 | from PIL import Image 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | 8 | st.title('Monitoring Dashboard') 9 | image = Image.open('./assets/stats.png') 10 | st.image(image) 11 | st.markdown("###### A simple tool for monitoring the performance of our model. This simple monitoring dashboard will help us track the inference latency and evaluate trends in prediction results.") 12 | 13 | st.markdown("### Record of Inference Results") 14 | st.caption("A table containing metadata about each inference request made.") 15 | 16 | # Logic for inference metadata table 17 | 18 | st.divider() 19 | 20 | st.markdown("### Chart of Inference Time in Milliseconds (ms) vs Request DateTime Stamps") 21 | st.caption("A line graph depicting the change inference time over time. ") 22 | 23 | # Logic for inference latency line chart 24 | 25 | st.divider() 26 | 27 | st.markdown("### Chart of Predicted Labels vs Request DateTime Stamps") 28 | st.caption("A plot depicting the change predictions over time. ") 29 | 30 | # Logic for predictions over time 31 | 32 | st.divider() 33 | 34 | st.markdown("### Histogram of Results") 35 | st.caption("A histogram showing the frequency of each prediction label.") 36 | 37 | # Logic for predictions histogram 38 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/pages/RoboMaintenance.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import requests 3 | import os 4 | from PIL import Image 5 | 6 | st.title("Robotics Predictive Maintenance") 7 | 8 | app_tab, help_tab = st.tabs(["Application", "Help"]) 9 | 10 | with app_tab: 11 | 12 | col11, col22 = st.columns(2) 13 | 14 | with col11: 15 | image = Image.open("./assets/robot_arm.png") 16 | st.image(image) 17 | with col22: 18 | st.markdown( 19 | "##### The demand predictive asset maintenance component uses an XGBoost classifier to flag assets that need maintenance. It leverages the Intel® Extension for Scikit-Learn, XGBoost, and daal4py on Intel® 4th Generation Xeon® Scalable processors." 20 | ) 21 | 22 | st.divider() 23 | 24 | st.markdown("#### Predictive Maintenance Model Training") 25 | 26 | data_file = st.text_input( 27 | "Training Data File Path", 28 | key="data", 29 | value="/home/ubuntu/certified-developer/MLOps_Professional/mlops_capstone/store/datasets/robot_maintenance/train.parquet", 30 | ) 31 | model_name = st.text_input( 32 | "Model Name", 33 | key="model name", 34 | help="The name of the model without extensions", 35 | value="model", 36 | ) 37 | model_path = st.text_input( 38 | "Model Save Path", 39 | key="model path", 40 | help="Provide the path without file name", 41 | value="./", 42 | ) 43 | test_size = st.slider( 44 | "Percentage of data saved for Testing", 45 | min_value=5, 46 | max_value=50, 47 | value=25, 48 | step=5, 49 | ) 50 | ncpu = st.number_input("Threads", min_value=2, max_value=16, step=2) 51 | mlflow_tracking_uri = st.text_input( 52 | "Tracking URI", 53 | key="uri", 54 | value="/home/ubuntu/certified-developer/MLOps_Professional/mlops_capstone/store/models/robot_maintenance", 55 | ) 56 | mlflow_new_experiment = st.text_input("New Experiment Name", key="new exp") 57 | mlflow_experiment = st.text_input("Existing Experiment Name", key="existing exp") 58 | 59 | # logic for training API connections 60 | 61 | st.divider() 62 | 63 | st.markdown("#### Predictive Maintenance Analysis") 64 | 65 | model_name = st.text_input("Model Name", key="model name option", value="model") 66 | stage = manufacturer = st.selectbox( 67 | "Model Stage", options=["Staging", "Production"] 68 | ) 69 | model_run_id = st.text_input("Run ID", key="model id") 70 | scaler_file_name = st.text_input( 71 | "Scaler File Name", key="scalar file", value="model_scaler.joblib" 72 | ) 73 | scaler_destination = st.text_input( 74 | "Scaler Destination", 75 | key="scalerdest", 76 | value="/home/ubuntu/certified-developer/MLOps_Professional/mlops_capstone/store/outputs/robot_maintenance", 77 | ) 78 | 79 | col21, col22, col23 = st.columns(3) 80 | 81 | manufacturer_list = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] 82 | model_list = ["Gen1", "Gen2", "Gen3", "Gen4"] 83 | lubrication_type_list = ["LTA", "LTB", "LTC"] 84 | product_assignment_list = ["PillA", "PillB", "PillC"] 85 | 86 | with col21: 87 | manufacturer = st.selectbox("Manufacturer", manufacturer_list) 88 | generation = st.selectbox("Generation", model_list) 89 | age = st.number_input("Robot Age", min_value=0, max_value=25, step=1, value=0) 90 | 91 | with col22: 92 | temperature = st.number_input( 93 | "Temperature", min_value=50, max_value=300, step=1 94 | ) 95 | motor_current = st.number_input( 96 | "Motor Current", min_value=0.00, max_value=10.00, step=0.05, value=5.00 97 | ) 98 | lubrication_type = st.selectbox("Lubrication Type", lubrication_type_list) 99 | with col23: 100 | last_maintenance = st.number_input( 101 | "Last Maintenance", min_value=0, max_value=60, step=1 102 | ) 103 | num_repairs = st.number_input( 104 | "Repair Counts", min_value=0, max_value=50, step=1 105 | ) 106 | product_assignment = st.selectbox( 107 | "Pill Product Assignment", product_assignment_list 108 | ) 109 | 110 | sample = [ 111 | { 112 | "Age": age, 113 | "Temperature": temperature, 114 | "Last_Maintenance": last_maintenance, 115 | "Motor_Current": motor_current, 116 | "Number_Repairs": num_repairs, 117 | "Manufacturer": manufacturer, 118 | "Generation": generation, 119 | "Lubrication": lubrication_type, 120 | "Product_Assignment": product_assignment, 121 | } 122 | ] 123 | 124 | # logic for inference API connections 125 | 126 | # Help tab frontend below 127 | 128 | with help_tab: 129 | st.markdown("#### Input Descriptions:") 130 | st.markdown( 131 | "- Manufacturer: Provide the name of the manufacturer of the robotic arm" 132 | ) 133 | st.markdown("- Model: Specify the model or specific type of the robotic arm. ") 134 | st.markdown( 135 | "- Lubrication Type: Indicate the type of lubrication used in the robotic arm." 136 | ) 137 | st.markdown( 138 | "- Pill Type: Specify the type or category that the robotic arm is assigned to" 139 | ) 140 | st.markdown( 141 | "- Age of the Machine: Enter the age or duration of use of the robotic arm." 142 | ) 143 | st.markdown( 144 | "- Motor Current: Provide the current reading from the motor of the robotic arm. " 145 | ) 146 | st.markdown( 147 | "- Temperature of Sensors: Specify the temperature readings from the sensors installed on the robotic arm." 148 | ) 149 | st.markdown( 150 | "- Number of Historic Repairs: Enter the total number of repairs or maintenance activities performed on the robotic arm in the past. " 151 | ) 152 | st.markdown( 153 | "- Last Maintenance Date: Provide the date of the last maintenance activity performed on the robotic arm." 154 | ) 155 | st.markdown("#### Code Samples:") 156 | 157 | st.markdown("##### Conversion of XGBoost to Daal4py Model") 158 | daalxgboost_code = """xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100) 159 | self.d4p_model = d4p.get_gbt_model_from_xgboost(xgb_model)""" 160 | st.code(daalxgboost_code, language="python") 161 | 162 | st.markdown("##### Inference with Daal4py Model") 163 | daalxgboost_code = """ 164 | daal_predict_algo = d4p.gbt_classification_prediction( 165 | nClasses=num_class, 166 | resultsToEvaluate="computeClassLabels", 167 | fptype='float') 168 | 169 | daal_prediction = daal_predict_algo.compute(data, daal_model) 170 | """ 171 | st.code(daalxgboost_code, language="python") 172 | 173 | st.markdown( 174 | "[Visit GitHub Repository for Source Code](https://github.com/intel/AI-Hackathon)" 175 | ) 176 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/app_frontend/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit==1.45.0 2 | matplotlib==3.10.1 -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.8@sha256:d411270700143fa2683cc8264d9fa5d3279fd3b6afff62ae81ea2f9d070e390c 2 | 3 | # Create a non-root user 4 | RUN useradd -m nonrootuser 5 | 6 | # Switch to the non-root user 7 | USER nonrootuser 8 | 9 | # copy assets over to image 10 | COPY /src /robot_maintenance 11 | 12 | # set the working directory 13 | WORKDIR /robot_maintenance 14 | 15 | # install dependancies 16 | RUN pip3 install --user --no-cache-dir -r requirements.txt 17 | 18 | # set PATH 19 | ENV PATH=.local/bin:$PATH 20 | 21 | # exposing endpoint port 22 | EXPOSE 5000 23 | 24 | ENTRYPOINT ["python", "serve.py"] -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/README.md: -------------------------------------------------------------------------------- 1 | ## Robotics Predictive Maintenance Lifecycle Solution Component 2 | The demand predictive asset maintenance component uses an XGBoost classifier to flag assets that need maintenance. It leverages the Intel® Extension for Scikit-Learn, XGBoost, and daal4py on Intel® 4th Generation Xeon® Scalable processors. 3 | 4 | ![image](https://github.com/intel-innersource/frameworks.ai.ai-hackathon/assets/57263404/f80ad7af-4248-45a6-9b9c-69f47b9a7db6) 5 | 6 | ## Technology Stack 7 | 8 | This component is a comprehensive solution that utilizes the power of Intel Xeon Scalable Processors on the Intel Developer Cloud. It includes a set of convenient scripts to facilitate the seamless deployment of the application on 4th Generation Xeon VMs. To enhance the efficiency of data processing and inference in the machine learning pipeline, we leverage the daal4py library and Intel scikit-learn extensions from the AI analytics toolkit. To kickstart the development process, we incorporate the "Predictive Asset Health Analytics" AI Reference Kit. For streamlined deployment, we employ FastAPI to build API endpoints and Docker to containerize our applications, ensuring ease of deployment and management within the DevOps framework. 9 | 10 | ## Additional Resources 11 | 12 | [Original AI Reference Kit for Predictive Asset Health Analytics](https://github.com/oneapi-src/predictive-asset-health-analytics) 13 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/certified-developer/9b0c167ef393db17c62c11ba46d4b73607f81fee/MLOps_Professional/mlops_capstone/robot_maintenance/src/__init__.py -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/data_model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class TrainPayload(BaseModel): 5 | """ 6 | Data model for training payload. 7 | 8 | Attributes: 9 | file (str): Path to the training data file. 10 | model_name (str): Name of the model to be trained. 11 | model_path (str): Path where the trained model will be saved. 12 | test_size (int, optional): Percentage of data reserved for testing. Defaults to 25. 13 | ncpu (int, optional): Number of CPU threads used for training. Defaults to 4. 14 | mlflow_tracking_uri (str): URI for MLFlow tracking. 15 | mlflow_new_experiment (str, optional): Name of the new experiment to create if no experiment is specified. Defaults to None. 16 | mlflow_experiment (str, optional): Name of the existing experiment. Defaults to None. 17 | """ 18 | 19 | file: str 20 | model_name: str 21 | model_path: str 22 | test_size: int = 25 23 | ncpu: int = 4 24 | mlflow_tracking_uri: str 25 | mlflow_new_experiment: str = None 26 | mlflow_experiment: str = None 27 | 28 | 29 | class PredictionPayload(BaseModel): 30 | """ 31 | Data model for prediction payload. 32 | 33 | Attributes: 34 | model_name (str): Name of the model to be used for prediction. 35 | stage (str): Stage of the model to be used for prediction. 36 | sample (list): List of samples for prediction. 37 | model_run_id (str): ID of the model run. 38 | scaler_file_name (str): Name of the scaler file. 39 | scaler_destination (str, optional): Destination path for the scaler file. Defaults to './'. 40 | """ 41 | 42 | model_name: str 43 | stage: str 44 | sample: list 45 | model_run_id: str 46 | scaler_file_name: str 47 | scaler_destination: str = "./" 48 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/inference.py: -------------------------------------------------------------------------------- 1 | import joblib 2 | import mlflow 3 | import numpy as np 4 | import pandas as pd 5 | import time 6 | import os 7 | import datetime 8 | from string import Template 9 | 10 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "capstone") 11 | 12 | 13 | def inference( 14 | model_name: str, 15 | stage: str, 16 | model_run_id: int, 17 | scaler_file_name: str, 18 | scaler_destination: str, 19 | data: str, 20 | ) -> str: 21 | """ 22 | Perform inference using a pre-trained model and scaler. 23 | 24 | Args: 25 | model_name (str): Name of the model to be used for inference. 26 | stage (str): Stage of the model to be used for inference. 27 | model_run_id (int): ID of the model run. 28 | scaler_file_name (str): Name of the scaler file. 29 | scaler_destination (str): Destination path for the scaler file. 30 | data (str): Path to the data file. 31 | 32 | Returns: 33 | str: Inference result indicating maintenance status. 34 | """ 35 | scaler_destination = os.path.normpath( 36 | os.path.join(SAFE_BASE_DIR, scaler_destination) 37 | ) 38 | scaler_file_path = os.path.normpath( 39 | os.path.join(scaler_destination, scaler_file_name) 40 | ) 41 | if not scaler_destination.startswith( 42 | SAFE_BASE_DIR 43 | ) or not scaler_file_path.startswith(SAFE_BASE_DIR): 44 | raise ValueError("Scalar file path is not within the allowed model directory.") 45 | 46 | try: 47 | # retrieve scaler 48 | mlflow.artifacts.download_artifacts( 49 | run_id=model_run_id, 50 | artifact_path=scaler_file_name, 51 | dst_path=scaler_destination, 52 | ) 53 | except Exception as e: 54 | raise RuntimeError(f"Failed to retrieve scaler: {e}") 55 | 56 | try: 57 | # load robust scaler 58 | with open(scaler_file_path, "rb") as fh: 59 | robust_scaler = joblib.load(fh.name) 60 | except Exception as e: 61 | raise RuntimeError(f"Failed to load robust scaler: {e}") 62 | try: 63 | # load model 64 | model_uri_template = Template("models:/$model_name/$stage") 65 | model_uri = model_uri_template.substitute( 66 | model_name=model_name, stage=stage 67 | ) 68 | model = mlflow.pyfunc.load_model(model_uri=model_uri) 69 | except Exception as e: 70 | raise RuntimeError(f"Failed to load model: {e}") 71 | 72 | # process data sample 73 | Categorical_Variables = pd.get_dummies( 74 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 75 | drop_first=False, 76 | ) 77 | data = pd.concat([data, Categorical_Variables], axis=1) 78 | data.drop( 79 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 80 | axis=1, 81 | inplace=True, 82 | ) 83 | 84 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 85 | 86 | number_samples = data.select_dtypes(["float", "int", "int32"]) 87 | scaled_samples = robust_scaler.transform(number_samples) 88 | scaled_samples_transformed = pd.DataFrame( 89 | scaled_samples, index=number_samples.index, columns=number_samples.columns 90 | ) 91 | del scaled_samples_transformed["Number_Repairs"] 92 | data = data.drop( 93 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 94 | ) 95 | data = data.astype(int) 96 | processed_sample = pd.concat([scaled_samples_transformed, data], axis=1) 97 | processed_sample = processed_sample.astype({"Motor_Current": "float64"}) 98 | 99 | column_names = [ 100 | "Age", 101 | "Temperature", 102 | "Last_Maintenance", 103 | "Motor_Current", 104 | "Number_Repairs", 105 | "Manufacturer_A", 106 | "Manufacturer_B", 107 | "Manufacturer_C", 108 | "Manufacturer_D", 109 | "Manufacturer_E", 110 | "Manufacturer_F", 111 | "Manufacturer_G", 112 | "Manufacturer_H", 113 | "Manufacturer_I", 114 | "Manufacturer_J", 115 | "Generation_Gen1", 116 | "Generation_Gen2", 117 | "Generation_Gen3", 118 | "Generation_Gen4", 119 | "Lubrication_LTA", 120 | "Lubrication_LTB", 121 | "Lubrication_LTC", 122 | "Product_Assignment_PillA", 123 | "Product_Assignment_PillB", 124 | "Product_Assignment_PillC", 125 | ] 126 | 127 | zeroes_dataframe = pd.DataFrame(0, index=np.arange(1), columns=column_names) 128 | merged_df = pd.merge( 129 | zeroes_dataframe, 130 | processed_sample, 131 | on=processed_sample.columns.tolist(), 132 | how="right", 133 | ).fillna(0) 134 | 135 | columns_to_convert = [ 136 | "Manufacturer_A", 137 | "Manufacturer_B", 138 | "Manufacturer_C", 139 | "Manufacturer_D", 140 | "Manufacturer_E", 141 | "Manufacturer_F", 142 | "Manufacturer_G", 143 | "Manufacturer_H", 144 | "Manufacturer_I", 145 | "Manufacturer_J", 146 | "Generation_Gen1", 147 | "Generation_Gen2", 148 | "Generation_Gen3", 149 | "Generation_Gen4", 150 | "Lubrication_LTA", 151 | "Lubrication_LTB", 152 | "Lubrication_LTC", 153 | "Product_Assignment_PillA", 154 | "Product_Assignment_PillB", 155 | "Product_Assignment_PillC", 156 | ] 157 | 158 | merged_df[columns_to_convert] = merged_df[columns_to_convert].astype(int) 159 | 160 | start_time = time.time() 161 | xgb_prediction = model.predict(merged_df) 162 | elapsed_time_milliseconds = (time.time() - start_time) * 1000 163 | 164 | for prediction in xgb_prediction: 165 | if prediction == 0: 166 | status = "Equipment Does Not Require Scheduled Maintenance" 167 | elif prediction == 1: 168 | status = "Equipment Requires Scheduled Maintenance - Plan Accordingly" 169 | 170 | # logic for monitoring log file creation 171 | 172 | return status 173 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/requirements.txt: -------------------------------------------------------------------------------- 1 | daal4py==2024.7.0 2 | pydantic==2.11.5 3 | xgboost==2.1.4 4 | fastapi==0.115.12 5 | numpy==2.2.3 6 | pandas==2.2.3 7 | scikit-learn-intelex==2025.4.0 8 | uvicorn==0.34.2 9 | streamlit==1.45.1 10 | mlflow==2.21.2 11 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/serve.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | import logging 3 | import warnings 4 | import pandas as pd 5 | 6 | from fastapi import FastAPI, HTTPException 7 | from data_model import TrainPayload, PredictionPayload 8 | from train import RoboMaintenance 9 | from inference import inference 10 | 11 | app = FastAPI() 12 | 13 | logging.basicConfig(level=logging.DEBUG) 14 | logger = logging.getLogger(__name__) 15 | warnings.filterwarnings("ignore") 16 | 17 | 18 | @app.get("/ping") 19 | async def ping() -> dict: 20 | """ 21 | Ping server to determine status. 22 | 23 | Returns: 24 | dict: Response from server on health status. 25 | """ 26 | return {"message": "Server is Running"} 27 | 28 | 29 | @app.post("/train") 30 | async def train(payload: TrainPayload) -> dict: 31 | """ 32 | Training Endpoint 33 | This endpoint processes raw data and trains an XGBoost Classifier. 34 | 35 | Args: 36 | payload (TrainPayload): Training endpoint payload model. 37 | 38 | Returns: 39 | dict: Accuracy metrics and other logger feedback on training progress. 40 | """ 41 | try: 42 | # Validate inputs 43 | if not isinstance(payload.model_name, str) or not payload.model_name: 44 | raise ValueError("Invalid model name. It should be a non-empty string.") 45 | if not isinstance(payload.file, str) or not payload.file.endswith(".parquet"): 46 | raise ValueError( 47 | "Invalid file name. It should be a string ending with '.parquet'" 48 | ) 49 | if not isinstance(payload.test_size, int) or not (0 < payload.test_size < 100): 50 | raise ValueError( 51 | "Invalid test size. It should be an integer between 0 and 100" 52 | ) 53 | if not isinstance(payload.ncpu, int) or payload.ncpu <= 0: 54 | raise ValueError("Invalid ncpu. It should be a positive integer.") 55 | if not isinstance(payload.model_path, str) or not payload.model_path: 56 | raise ValueError("Invalid model path. It should be a non-empty string.") 57 | 58 | model = RoboMaintenance(payload.model_name) 59 | model.mlflow_tracking( 60 | tracking_uri=payload.mlflow_tracking_uri, 61 | new_experiment=payload.mlflow_new_experiment, 62 | experiment=payload.mlflow_experiment, 63 | ) 64 | logger.info("Configured Experiment and Tracking URI for MLFlow") 65 | model.process_data(payload.file, payload.test_size) 66 | logger.info("Data has been successfully processed") 67 | model.train(payload.ncpu) 68 | logger.info("Maintenance Model Successfully Trained") 69 | model.save(payload.model_path) 70 | logger.info("Saved Maintenance Model") 71 | accuracy_score = model.validate() 72 | return { 73 | "msg": "Model trained successfully", 74 | "validation scores": accuracy_score, 75 | } 76 | except ValueError as e: 77 | logger.error(f"Validation error: {e}") 78 | raise HTTPException(status_code=400, detail=str(e)) 79 | except Exception as e: 80 | logger.error(f"Unexpected error: {e}") 81 | raise HTTPException(status_code=500, detail="Internal Server Error") 82 | 83 | 84 | @app.post("/predict") 85 | async def predict(payload: PredictionPayload) -> dict: 86 | """ 87 | Prediction Endpoint 88 | This endpoint performs inference using a pre-trained model and scaler. 89 | 90 | Args: 91 | payload (PredictionPayload): Prediction endpoint payload model. 92 | 93 | Returns: 94 | dict: Maintenance recommendation based on the inference result. 95 | """ 96 | try: 97 | # Validate inputs 98 | if not isinstance(payload.model_name, str) or not payload.model_name: 99 | raise ValueError("Invalid model name. It should be a non-empty string.") 100 | if not isinstance(payload.stage, str) or not payload.stage: 101 | raise ValueError("Invalid stage. It should be a non-empty string.") 102 | if not isinstance(payload.model_run_id, int) or payload.model_run_id <= 0: 103 | raise ValueError("Invalid model run ID. It should be a positive integer.") 104 | if ( 105 | not isinstance(payload.scaler_file_name, str) 106 | or not payload.scaler_file_name 107 | ): 108 | raise ValueError( 109 | "Invalid scaler file name. It should be a non-empty string." 110 | ) 111 | if ( 112 | not isinstance(payload.scaler_destination, str) 113 | or not payload.scaler_destination 114 | ): 115 | raise ValueError( 116 | "Invalid scaler destination. It should be a non-empty string." 117 | ) 118 | if not isinstance(payload.sample, list) or not payload.sample: 119 | raise ValueError("Invalid sample data. It should be a non-empty list.") 120 | 121 | sample = pd.json_normalize(payload.sample) 122 | results = inference( 123 | model_name=payload.model_name, 124 | stage=payload.stage, 125 | model_run_id=payload.model_run_id, 126 | scaler_file_name=payload.scaler_file_name, 127 | scaler_destination=payload.scaler_destination, 128 | data=sample, 129 | ) 130 | return {"msg": "Completed Analysis", "Maintenance Recommendation": results} 131 | except ValueError as e: 132 | logger.error(f"Validation error: {e}") 133 | raise HTTPException(status_code=400, detail=str(e)) 134 | except Exception as e: 135 | logger.error(f"Unexpected error: {e}") 136 | raise HTTPException(status_code=500, detail="Internal Server Error") 137 | 138 | 139 | if __name__ == "__main__": 140 | """ 141 | Main entry point for the server. 142 | 143 | This block runs the FastAPI application using Uvicorn. 144 | """ 145 | try: 146 | uvicorn.run("serve:app", host="127.0.0.1", port=5000, log_level="info") 147 | except Exception as e: 148 | logger.error(f"Failed to start server: {e}") 149 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # pylint: disable=import-error 4 | 5 | """ 6 | Module to train and prediction using XGBoost Classifier 7 | """ 8 | 9 | import os 10 | import sys 11 | import logging 12 | import warnings 13 | import joblib 14 | import mlflow 15 | 16 | import numpy as np 17 | import xgboost as xgb 18 | import pandas as pd 19 | 20 | from sklearn.model_selection import train_test_split 21 | from sklearn.preprocessing import RobustScaler 22 | from werkzeug.utils import secure_filename 23 | from typing import Optional 24 | 25 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "capstone") 26 | 27 | logging.basicConfig(level=logging.DEBUG) 28 | logger = logging.getLogger(__name__) 29 | warnings.filterwarnings("ignore") 30 | 31 | 32 | class RoboMaintenance: 33 | def __init__(self, model_name: str): 34 | """ 35 | Initializes the RoboMaintenance class with default values. 36 | 37 | Args: 38 | model_name (str): Name of the model. 39 | """ 40 | self.model_name = model_name 41 | self.file = "" 42 | self.y_train = "" 43 | self.y_test = "" 44 | self.X_train_scaled_transformed = "" 45 | self.X_test_scaled_transformed = "" 46 | self.accuracy_scr = "" 47 | self.model_path = "" 48 | self.parameters = "" 49 | self.robust_scaler = "" 50 | self.run_id = "" 51 | self.active_experiment = "" 52 | self.xgb_model = "" 53 | 54 | def mlflow_tracking( 55 | self, 56 | tracking_uri: str = "./mlflow_tracking", 57 | experiment: Optional[str] = None, 58 | new_experiment: Optional[str] = None, 59 | ) -> None: 60 | """ 61 | Sets up MLFlow tracking. 62 | 63 | Args: 64 | tracking_uri (str, optional): URI for MLFlow tracking. Defaults to "./mlflow_tracking". 65 | experiment (str, optional): Name of the existing experiment. Defaults to None. 66 | new_experiment (str, optional): Name of the new experiment to create if no experiment is specified. Defaults to None. 67 | """ 68 | # sets tracking URI 69 | mlflow.set_tracking_uri(tracking_uri) 70 | 71 | # creates new experiment if no experiment is specified 72 | if experiment is None: 73 | mlflow.create_experiment(new_experiment) 74 | self.active_experiment = new_experiment 75 | mlflow.set_experiment(new_experiment) 76 | else: 77 | mlflow.set_experiment(experiment) 78 | self.active_experiment = experiment 79 | 80 | def process_data(self, file: str, test_size: float = 0.25) -> None: 81 | """ 82 | Processes raw data for training. 83 | 84 | Args: 85 | file (str): Path to raw training data. 86 | test_size (float, optional): Percentage of data reserved for testing. Defaults to 0.25. 87 | """ 88 | # Validate file name 89 | if not isinstance(file, str) or not file.endswith(".parquet"): 90 | raise ValueError( 91 | "Invalid file name. It should be a string ending with '.parquet'" 92 | ) 93 | 94 | # Validate test size 95 | if not isinstance(test_size, float) or not (0 < test_size < 1): 96 | raise ValueError("Invalid test size. It should be a float between 0 and 1") 97 | 98 | # Generating our data 99 | logger.info("Reading the dataset from %s...", file) 100 | if not file.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 101 | raise ValueError( 102 | f"Path is not within the allowed directory {SAFE_BASE_DIR}" 103 | ) 104 | try: 105 | data = pd.read_parquet(file) 106 | if not isinstance(data, pd.DataFrame): 107 | sys.exit("Invalid data format") 108 | except Exception as e: 109 | sys.exit(f"Error reading dataset: {e}") 110 | 111 | X = data.drop("Asset_Label", axis=1) 112 | y = data.Asset_Label 113 | 114 | X_train, X_test, self.y_train, self.y_test = train_test_split( 115 | X, y, test_size=test_size 116 | ) 117 | 118 | df_num_train = X_train.select_dtypes(["float", "int", "int32"]) 119 | df_num_test = X_test.select_dtypes(["float", "int", "int32"]) 120 | self.robust_scaler = RobustScaler() 121 | X_train_scaled = self.robust_scaler.fit_transform(df_num_train) 122 | X_test_scaled = self.robust_scaler.transform(df_num_test) 123 | 124 | # Making them pandas dataframes 125 | X_train_scaled_transformed = pd.DataFrame( 126 | X_train_scaled, index=df_num_train.index, columns=df_num_train.columns 127 | ) 128 | X_test_scaled_transformed = pd.DataFrame( 129 | X_test_scaled, index=df_num_test.index, columns=df_num_test.columns 130 | ) 131 | 132 | del X_train_scaled_transformed["Number_Repairs"] 133 | del X_test_scaled_transformed["Number_Repairs"] 134 | 135 | # Dropping the unscaled numerical columns 136 | X_train = X_train.drop( 137 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 138 | ) 139 | X_test = X_test.drop( 140 | ["Age", "Temperature", "Last_Maintenance", "Motor_Current"], axis=1 141 | ) 142 | 143 | X_train = X_train.astype(int) 144 | X_test = X_test.astype(int) 145 | 146 | # Creating train and test data with scaled numerical columns 147 | X_train_scaled_transformed = pd.concat( 148 | [X_train_scaled_transformed, X_train], axis=1 149 | ) 150 | X_test_scaled_transformed = pd.concat( 151 | [X_test_scaled_transformed, X_test], axis=1 152 | ) 153 | 154 | self.X_train_scaled_transformed = X_train_scaled_transformed.astype( 155 | {"Motor_Current": "float64"} 156 | ) 157 | self.X_test_scaled_transformed = X_test_scaled_transformed.astype( 158 | {"Motor_Current": "float64"} 159 | ) 160 | 161 | def train(self, ncpu: int = 4) -> None: 162 | """ 163 | Trains an XGBoost Classifier and tracks models with MLFlow. 164 | 165 | Args: 166 | ncpu (int, optional): Number of CPU threads used for training. Defaults to 4. 167 | """ 168 | # Validate ncpu 169 | if not isinstance(ncpu, int) or ncpu <= 0: 170 | raise ValueError("Invalid ncpu. It should be a positive integer.") 171 | 172 | # Set xgboost parameters 173 | self.parameters = { 174 | "max_bin": 256, 175 | "scale_pos_weight": 2, 176 | "lambda_l2": 1, 177 | "alpha": 0.9, 178 | "max_depth": 8, 179 | "num_leaves": 2**8, 180 | "verbosity": 0, 181 | "objective": "multi:softmax", 182 | "learning_rate": 0.3, 183 | "num_class": 3, 184 | "nthread": ncpu, 185 | } 186 | 187 | mlflow.xgboost.autolog() 188 | xgb_train = xgb.DMatrix( 189 | self.X_train_scaled_transformed, label=np.array(self.y_train) 190 | ) 191 | self.xgb_model = xgb.train(self.parameters, xgb_train, num_boost_round=100) 192 | 193 | # store run id for user in other methods 194 | xp = mlflow.get_experiment_by_name(self.active_experiment)._experiment_id 195 | self.run_id = mlflow.search_runs(xp, output_format="list")[0].info.run_id 196 | 197 | def validate(self) -> float: 198 | """ 199 | Performs model validation with testing data. 200 | 201 | Returns: 202 | float: Accuracy metric. 203 | """ 204 | # calculate accuracy 205 | dtest = xgb.DMatrix(self.X_test_scaled_transformed, self.y_test) 206 | xgb_prediction = self.xgb_model.predict(dtest) 207 | xgb_errors_count = np.count_nonzero(xgb_prediction - np.ravel(self.y_test)) 208 | self.accuracy_scr = 1 - xgb_errors_count / xgb_prediction.shape[0] 209 | 210 | # log accuracy metric with mlflow 211 | with mlflow.start_run(self.run_id): 212 | mlflow.log_metric("accuracy", self.accuracy_scr) 213 | 214 | return self.accuracy_scr 215 | 216 | def save(self, model_path: str) -> None: 217 | """ 218 | Logs scaler as MLFlow artifact. 219 | 220 | Args: 221 | model_path (str): Path where trained model should be saved. 222 | """ 223 | # Validate model path 224 | if not isinstance(model_path, str) or not model_path: 225 | raise ValueError("Invalid model path. It should be a non-empty string.") 226 | 227 | sanitized_model_path = secure_filename(model_path) 228 | self.scaler_path = os.path.normpath( 229 | os.path.join( 230 | SAFE_BASE_DIR, sanitized_model_path, self.model_name + "_scaler.joblib" 231 | ) 232 | ) 233 | self.scaler_path = os.path.abspath(self.scaler_path) 234 | if not self.scaler_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 235 | raise ValueError("Path is not within the allowed model directory.") 236 | 237 | logger.info("Saving Scaler") 238 | try: 239 | with open(self.scaler_path, "wb") as fh: 240 | joblib.dump(self.robust_scaler, fh.name) 241 | except Exception as e: 242 | logger.error(f"Failed to save scaler: {e}") 243 | raise 244 | 245 | logger.info("Saving Scaler as MLFLow Artifact") 246 | with mlflow.start_run(self.run_id): 247 | mlflow.log_artifact(self.scaler_path) 248 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/certified-developer/9b0c167ef393db17c62c11ba46d4b73607f81fee/MLOps_Professional/mlops_capstone/robot_maintenance/src/utils/__init__.py -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/robot_maintenance/src/utils/generate_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # pylint: disable=import-error 4 | 5 | # Copyright (C) 2022 Intel Corporation 6 | # SPDX-License-Identifier: BSD-3-Clause 7 | 8 | """ 9 | Module to generate dataset for Predictive Asset Maintenance 10 | """ 11 | 12 | import os 13 | import warnings 14 | import argparse 15 | import logging 16 | import time 17 | import pandas as pd 18 | import numpy as np 19 | 20 | logging.basicConfig(level=logging.DEBUG) 21 | logger = logging.getLogger(__name__) 22 | warnings.filterwarnings("ignore") 23 | 24 | SAFE_BASE_DIR = os.path.join(os.path.expanduser("~"), "mlops", "capstone") 25 | 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument( 28 | "-s", "--size", type=int, required=False, default=25000, help="data size" 29 | ) 30 | parser.add_argument( 31 | "-p", 32 | "--save_path", 33 | type=str, 34 | required=True, 35 | help="path to the output Parquet file within the safe directory", 36 | ) 37 | FLAGS = parser.parse_args() 38 | dsize = FLAGS.size 39 | 40 | train_path = FLAGS.save_path 41 | train_path = os.path.abspath(os.path.normpath(os.path.join(SAFE_BASE_DIR, train_path))) 42 | 43 | # Ensure train_path is still inside SAFE_BASE_DIR 44 | if not train_path.startswith(os.path.abspath(SAFE_BASE_DIR) + os.sep): 45 | raise ValueError(f"Path is not within the allowed directory {SAFE_BASE_DIR}") 46 | 47 | # Ensure the directory exists before saving 48 | os.makedirs(os.path.dirname(train_path), exist_ok=True) 49 | 50 | # Generating our data 51 | start = time.time() 52 | logger.info("Generating data with the size %d", dsize) 53 | np.random.seed(1) 54 | manufacturer_list = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] 55 | generation_list = ["Gen1", "Gen2", "Gen3", "Gen4"] 56 | lubrication_type_list = ["LTA", "LTB", "LTC"] 57 | product_assignment_list = ["PillA", "PillB", "PillC"] 58 | data = pd.DataFrame( 59 | { 60 | "Age": np.random.choice(range(0, 25), dsize, replace=True), 61 | "Temperature": np.random.randint(low=50, high=300, size=dsize), 62 | "Last_Maintenance": np.random.normal(0, 60, size=dsize), 63 | "Motor_Current": np.random.randint(low=0.00, high=10.00, size=dsize), 64 | "Manufacturer": np.random.choice(manufacturer_list, dsize, replace=True), 65 | "Generation": np.random.choice(generation_list, dsize, replace=True), 66 | "Number_Repairs": np.random.choice(range(0, 50), dsize, replace=True), 67 | "Lubrication": np.random.choice(lubrication_type_list, dsize, replace=True), 68 | "Product_Assignment": np.random.choice( 69 | product_assignment_list, dsize, replace=True 70 | ), 71 | } 72 | ) 73 | 74 | # Generating our target variable Asset_Label 75 | logger.info("Generating our target variable Asset_Label") 76 | data["Asset_Label"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.99, 0.01]) 77 | 78 | # When age is 0-5 and over 20 change Asset_Label to 1 79 | logger.info("Creating correlation between our variables and our target variable") 80 | logger.info("When age is 0-5 and over 20 change Asset_Label to 1") 81 | data["Asset_Label"] = np.where( 82 | ((data.Age > 0) & (data.Age <= 5)) | (data.Age > 20), 1, data.Asset_Label 83 | ) 84 | 85 | # When Temperature is between 150-300 change Asset_Label to 1 86 | logger.info("When Temperature is between 500-1500 change Asset_Label to 1") 87 | data["Asset_Label"] = np.where( 88 | (data.Temperature >= 150) & (data.Temperature <= 300), 1, data.Asset_Label 89 | ) 90 | 91 | # When Manufacturer is A, E, or H change Asset_Label to have 80% 1's 92 | logger.info("When Manufacturer is A, E, or H change Asset_Label to 1") 93 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.2, 0.8]) 94 | data["Asset_Label"] = np.where( 95 | (data.Manufacturer == "A") 96 | | (data.Manufacturer == "E") 97 | | (data.Manufacturer == "H"), 98 | data.Temp_Var, 99 | data.Asset_Label, 100 | ) 101 | 102 | # When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 1's 103 | logger.info("When Generation is Gen1 or Gen3 change Asset_Label to have 50% to 0's") 104 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.5, 0.5]) 105 | data["Asset_Label"] = np.where( 106 | (data.Generation == "Gen1") | (data.Generation == "Gen3"), 107 | data.Temp_Var, 108 | data.Asset_Label, 109 | ) 110 | 111 | 112 | # When Product Assignment is Pill B change Asset_Label to have 70% to 1's 113 | logger.info("When District is Pill B change Asset_Label to have 70% to 1's") 114 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.3, 0.7]) 115 | data["Asset_Label"] = np.where( 116 | (data.Product_Assignment == "PillB"), data.Temp_Var, data.Asset_Label 117 | ) 118 | 119 | 120 | # When Lubrication is LTC change Asset_Label to have 75% to 1's 121 | logger.info("When Lubrication is LTC change Asset_Label to have 75% to 1's") 122 | data["Temp_Var"] = np.random.choice(range(0, 2), dsize, replace=True, p=[0.25, 0.75]) 123 | data["Asset_Label"] = np.where( 124 | (data.Lubrication == "LTC"), data.Temp_Var, data.Asset_Label 125 | ) 126 | 127 | data.drop("Temp_Var", axis=1, inplace=True) 128 | 129 | Categorical_Variables = pd.get_dummies( 130 | data[["Manufacturer", "Generation", "Lubrication", "Product_Assignment"]], 131 | drop_first=False, 132 | ) 133 | data = pd.concat([data, Categorical_Variables], axis=1) 134 | data.drop( 135 | ["Manufacturer", "Generation", "Lubrication", "Product_Assignment"], 136 | axis=1, 137 | inplace=True, 138 | ) 139 | 140 | data = data.astype({"Motor_Current": "float64", "Number_Repairs": "float64"}) 141 | 142 | etime = time.time() - start 143 | datasize = data.shape 144 | logger.info( 145 | "=====> Time taken %f secs for data generation for the size of %s", etime, datasize 146 | ) 147 | 148 | # save data to parquet file 149 | train_path = FLAGS.save_path 150 | logger.info("Saving the data to %s ...", train_path) 151 | data.to_parquet(train_path) 152 | logger.info("DONE") 153 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/setup/Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash # Use bash syntax 2 | 3 | setup-requirements: 4 | pip install -r ../robot_maintenance/src/requirements.txt 5 | 6 | setup-store: 7 | # create model stores 8 | mkdir -p ../store/models/robot_maintenance 9 | # create data stores 10 | mkdir -p ../store/datasets/{monitoring,robot_maintenance} 11 | # create output stores 12 | mkdir -p ../store/outputs/robot_maintenance 13 | 14 | create-data: 15 | python3 ../robot_maintenance/src/utils/generate_data.py --save_path '../store/datasets/robot_maintenance/' 16 | -------------------------------------------------------------------------------- /MLOps_Professional/mlops_capstone/setup/README.md: -------------------------------------------------------------------------------- 1 | # Configuration Utilities for Development and Deployment 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lab Overview 2 | 3 | This repository contains hands-on labs that help you practice and build skills associated with Intel® Certified Developer – MLOps Professional Certification exam. 4 | 5 | Whether you are deploying an AI project into production or adding AI to an existing application, building a performant and scalable machine learning operations (MLOps) environment is crucial to maximizing your resources. This curriculum teaches you to incorporate compute awareness into the AI solution design process to maximize performance across the AI pipeline. 6 | 7 | ## Table of Contents 8 | 9 | Lab 1: Building REST API Endpoints with FastAPI 10 | 11 | Lab 2: Practice creating Architecture diagrams from Application Specs 12 | 13 | Lab 3: Implementing Model Development Components with MLflow 14 | 15 | Lab 4: Building an Inference Endpoint using FastAPI 16 | 17 | Lab 5: Intel Deep Learning Optimizations 18 | 19 | Lab 6: Hugging Face LLM Inference 20 | 21 | Lab 7: Optimizing the Full Stack with OneAPI 22 | 23 | Lab 8: Retrieval Augmented Generation with PyTorch 2.0 and LangChain 24 | 25 | Lab 9: Your First Open Source Contribution 26 | 27 | #### NOTE: This code is educational in nature and should not be used in production. 28 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Vulnerability 5 | Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 6 | 7 | --------------------------------------------------------------------------------