├── .github
    └── workflows
    │   ├── 02-manual-trigger-job.yml
    │   └── 04-code-checks.yml
├── .gitignore
├── _build.yml
├── _config.yml
├── documentation
    ├── 00-script.md
    ├── 01-aml-job.md
    ├── 02-github-actions.md
    ├── 03-trigger-workflow.md
    ├── 04-unit-test-linting.md
    ├── 05-environments.md
    ├── 06-deploy-model.md
    └── media
    │   └── 00-01-github-secret.png
├── experimentation
    ├── data
    │   └── diabetes-dev.csv
    └── train-classification-model.ipynb
├── index.md
├── production
    └── data
    │   └── diabetes-prod.csv
├── pytest.ini
├── requirements.txt
├── src
    ├── job.yml
    └── model
    │   └── train.py
└── tests
    ├── .flake8
    ├── __init__.py
    ├── datasets
        ├── first.csv
        ├── foo.py
        └── second.csv
    └── test_train.py


/.github/workflows/02-manual-trigger-job.yml:
--------------------------------------------------------------------------------
 1 | name: Manually trigger an Azure Machine Learning job
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   train:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - name: Check out repo
11 |       uses: actions/checkout@main
12 |     - name: Install az ml extension
13 |       run: az extension add -n ml -y
14 |     - name: Azure login
15 |       uses: azure/login@v1
16 |       with:
17 |         creds: ${{secrets.AZURE_CREDENTIALS}}
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/.github/workflows/04-code-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Code checks
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   job1:
 8 |     name: linting
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - name: Check out repo
12 |       uses: actions/checkout@main
13 |     - name: Use Python version 3.8
14 |       uses: actions/setup-python@v3
15 |       with:
16 |         python-version: '3.8'
17 |     - name: Install Flake8
18 |       run: |
19 |         python -m pip install flake8
20 |     - name: Run linting tests
21 |       run: | 
22 |         flake8 src/model/
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/_build.yml:
--------------------------------------------------------------------------------
 1 | name: '$(Date:yyyyMMdd)$(Rev:.rr)'
 2 | jobs:
 3 |   - job: build_markdown_content
 4 |     displayName: 'Build Markdown Content'
 5 |     workspace:
 6 |       clean: all
 7 |     pool:
 8 |       vmImage: 'Ubuntu 16.04'
 9 |     container:
10 |       image: 'microsoftlearning/markdown-build:latest'
11 |     steps:
12 |       - task: Bash@3
13 |         displayName: 'Build Content'
14 |         inputs:
15 |           targetType: inline
16 |           script: |
17 |             cp /{attribution.md,template.docx,package.json,package.js} .
18 |             npm install
19 |             node package.js --version $(Build.BuildNumber)
20 |       - task: GitHubRelease@0
21 |         displayName: 'Create GitHub Release'
22 |         inputs:
23 |           gitHubConnection: 'github-microsoftlearning-organization'
24 |           repositoryName: '$(Build.Repository.Name)'
25 |           tagSource: manual
26 |           tag: 'v$(Build.BuildNumber)'
27 |           title: 'Version $(Build.BuildNumber)'
28 |           releaseNotesSource: input
29 |           releaseNotes: '# Version $(Build.BuildNumber) Release'
30 |           assets: '$(Build.SourcesDirectory)/out/*.zip'
31 |           assetUploadMode: replace
32 |       - task: PublishBuildArtifacts@1
33 |         displayName: 'Publish Output Files'
34 |         inputs:
35 |           pathtoPublish: '$(Build.SourcesDirectory)/out/'
36 |           artifactName: 'Lab Files'
37 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
 1 | remote_theme: MicrosoftLearning/Jekyll-Theme
 2 | exclude:
 3 |   - readme.md
 4 |   - .github/
 5 | header_pages:
 6 |   - index.html
 7 | author: Microsoft Learning
 8 | twitter_username: mslearning
 9 | github_username:  MicrosoftLearning
10 | plugins:
11 |   - jekyll-sitemap
12 |   - jekyll-mentions
13 |   - jemoji
14 | markdown: kramdown
15 | kramdown:
16 |    syntax_highlighter_opts:
17 |       disable : true
18 | 


--------------------------------------------------------------------------------
/documentation/00-script.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | challenge:
 3 |     module: Convert a notebook to production code
 4 |     challenge: '0: Convert a notebook to production code'
 5 | ---
 6 | 
 7 | <style>
 8 | .button  {
 9 |   border: none;
10 |   color: white;
11 |   padding: 12px 28px;
12 |   background-color: #008CBA;
13 |   float: right;
14 | }
15 | </style>
16 | 
17 | # Challenge 0: Convert a notebook to production code
18 | 
19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
20 | 
21 | ## Challenge scenario
22 | 
23 | The first step to automate machine learning workflows is to convert a Jupyter notebook to production-ready code. When you store your code as scripts, it's easier to automate the code execution. You can parameterize scripts to easily reuse the code for retraining.
24 | 
25 | ## Prerequisites
26 | 
27 | To complete this challenge, you'll need:
28 | 
29 | - Access to an Azure subscription.
30 | - A GitHub account.
31 | 
32 | ## Objectives
33 | 
34 | By completing this challenge, you'll learn how to:
35 | 
36 | - Clean nonessential code.
37 | - Convert your code to Python scripts.
38 | - Use functions in your scripts.
39 | - Use parameters in your scripts.
40 | 
41 | > **Important!**
42 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
43 | 
44 | ## Challenge Duration
45 | 
46 | - **Estimated Time**: 30 minutes
47 | 
48 | ## Instructions
49 | 
50 | To work through the challenges, you need **your own public repo** which includes the challenge files. Create a new public repo by navigating to [https://github.com/MicrosoftLearning/mslearn-mlops](https://github.com/MicrosoftLearning/mslearn-mlops) and selecting the **Use this template** button to create your own repo.
51 | 
52 | In the **experimentation** folder, you'll find a Jupyter notebook that trains a classification model. The data used by the notebook is in the **experimentation/data** folder and contains a CSV file. 
53 | 
54 | In the **src/model** folder you'll find a `train.py` script which already includes code converted from part of the notebook. It's up to you to complete it. 
55 | 
56 | - Go through the notebook to understand what the code does. 
57 | - Convert the code under the **Split data** header and include it in the `train.py` script as a `split_data` function. Remember to:
58 |     - Remove nonessential code.
59 |     - Include the necessary code as a function.
60 |     - Include any necessary libraries at the top of the script.
61 | 
62 | <details>
63 | <summary>Hint</summary>
64 | <br/>
65 | The <code>split_data</code> function is already included in the main function. You only need to add the function itself with the required inputs and outputs underneath the comment <code>TO DO: add function to split data</code>. 
66 | </details>
67 | 
68 | - Add logging so that every time you run the script, all parameters and metrics are tracked. Use the autologging feature of MLflow to also ensure the necessary model files are stored with the job run to easily deploy the model in the future.
69 | 
70 | <details>
71 | <summary>Hint</summary>
72 | <br/>
73 | MLflow is an open source library for tracking and managing machine learning models. You can use it to track custom metrics. However, since the current model is trained with the common Scikit-learn library, you can also use autologging. By enabling autologging with <code>mlflow.autolog()</code>, all parameters, metrics, and model files will automatically be stored with your job run. Enable autologging in the main function under <code>TO DO: enable autologging</code>.
74 | </details>
75 | 
76 | ## Success criteria
77 | 
78 | To complete this challenge successfully, you should be able to show:
79 | 
80 | - A training script which includes a function to split the data and autologging using MLflow.
81 | 
82 | > **Note:**
83 | > If you've used a compute instance for experimentation, remember to stop the compute instance when you're done. 
84 | 
85 | ## Useful resources
86 | 
87 | - [Tutorial: Convert ML experiments to production Python code](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production)
88 | - [Logging MLflow models in Azure Machine Learning](https://docs.microsoft.com/azure/machine-learning/how-to-log-mlflow-models)
89 | - [MLflow documentation](https://www.mlflow.org/docs/latest/python_api/mlflow.html)
90 | 
91 | <button class="button" onclick="window.location.href='01-aml-job';">Continue with challenge 1</button>
92 | 


--------------------------------------------------------------------------------
/documentation/01-aml-job.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | challenge:
 3 |     module: Use an Azure Machine Learning job for automation
 4 |     challenge: '1: Create an Azure Machine Learning job'
 5 | ---
 6 | 
 7 | <style>
 8 | .button  {
 9 |   border: none;
10 |   color: white;
11 |   padding: 12px 28px;
12 |   background-color: #008CBA;
13 |   float: right;
14 | }
15 | </style>
16 | 
17 | # Challenge 1: Create an Azure Machine Learning job
18 | 
19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
20 | 
21 | ## Challenge scenario
22 | 
23 | To automate machine learning workflows, you can define machine learning tasks in scripts. To execute any workflow consisting of Python scripts, use Azure Machine Learning jobs. Azure Machine Learning jobs store all metadata of a workflow, including input parameters and output metrics. By running scripts as jobs, it's easier to track and manage your machine learning models.
24 | 
25 | ## Prerequisites
26 | 
27 | If you haven't, complete the [previous challenge](00-script.md) before you continue.
28 | 
29 | ## Objectives
30 | 
31 | By completing this challenge, you'll learn how to:
32 | 
33 | - Define an Azure Machine Learning job in YAML.
34 | - Run an Azure Machine Learning job with the CLI v2.
35 | 
36 | > **Important!**
37 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
38 | 
39 | ## Challenge Duration
40 | 
41 | - **Estimated Time**: 30 minutes
42 | 
43 | ## Instructions
44 | 
45 | In the **src/model** folder, you'll find a Python script which reads CSV files from a folder and uses the data to train a classification model. In the **src** folder, you'll find a YAML file to define a job. There are values missing in the YAML file. It's up to you to complete it. 
46 | 
47 | - Create an Azure Machine Learning workspace and a compute instance.
48 | - Use the CLI (v2) to create a registered data asset with the following configuration:
49 |     - **Name**: *diabetes-dev-folder*
50 |     - **Path**: The **data** folder in the **experimentation** folder which contains the CSV file to train the model. The path should point to the folder, not to the specific file.
51 | 
52 | <details>
53 | <summary>Hint</summary>
54 | <br/>
55 | Using the CLI (v2) you can create a data asset by defining the <a href="https://docs.microsoft.com/azure/machine-learning/reference-yaml-data">configuration in a YAML file</a> <b>or</b> by specifying the configuration in the <a href="https://docs.microsoft.com/cli/azure/ml/data?view=azure-cli-latest">CLI command</a>.
56 | </details>
57 |  
58 | - Complete the `job.yml` file to define the Azure Machine Learning job to run the `train.py` script, with the registered data asset as input. 
59 | - Use the CLI (v2) to run the job. 
60 | 
61 | > **Tip:**
62 | > Whether you're working from the Cloud Shell, compute instance or a local terminal, make sure to update the Azure Machine Learning extension for the CLI to the latest version.
63 | 
64 | ## Success criteria
65 | 
66 | To complete this challenge successfully, you should be able to show:
67 | 
68 | - A successfully completed job in the Azure Machine Learning workspace. The job should contain all input parameters and output metrics for the model you trained.
69 | 
70 | > **Note:**
71 | > If you've used a compute instance for experimentation, remember to stop the compute instance when you're done. 
72 | 
73 | ## Useful resources
74 | 
75 | - [Learning path on how to use the CLI v2 with Azure Machine Learning.](https://docs.microsoft.com/learn/paths/train-models-azure-machine-learning-cli-v2/)
76 | - [CLI reference for managing Azure Machine Learning workspaces](https://docs.microsoft.com/cli/azure/ml/workspace?view=azure-cli-latest)
77 | - [CLI reference for managing Azure ML compute resources](https://docs.microsoft.com/cli/azure/ml/compute?view=azure-cli-latest)
78 | - [CLI reference for managing Azure ML data assets](https://docs.microsoft.com/cli/azure/ml/data?view=azure-cli-latest)
79 | - [CLI reference for jobs.](https://docs.microsoft.com/cli/azure/ml/job?view=azure-cli-latest)
80 | - [YAML reference for command jobs.](https://docs.microsoft.com/azure/machine-learning/reference-yaml-job-command) 
81 | - [Example job YAML files.](https://github.com/Azure/azureml-examples/tree/main/cli/jobs/basics) 
82 | 
83 | <button class="button" onclick="window.location.href='02-github-actions';">Continue with challenge 2</button>
84 | 


--------------------------------------------------------------------------------
/documentation/02-github-actions.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | challenge:
  3 |     module: 'Trigger Azure Machine Learning jobs with GitHub Actions'
  4 |     challenge: '2: Trigger the Azure Machine Learning job with GitHub Actions'
  5 | ---
  6 | 
  7 | <style>
  8 | .button  {
  9 |   border: none;
 10 |   color: white;
 11 |   padding: 12px 28px;
 12 |   background-color: #008CBA;
 13 |   float: right;
 14 | }
 15 | </style>
 16 | 
 17 | # Challenge 2: Trigger the Azure Machine Learning job with GitHub Actions
 18 | 
 19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
 20 | 
 21 | ## Challenge scenario
 22 | 
 23 | The benefit of using the CLI (v2) to run an Azure Machine Learning job, is that you can submit the job from anywhere. Using a platform like GitHub will allow you to automate Azure Machine Learning jobs. To trigger the job to run, you can use GitHub Actions.
 24 | 
 25 | ## Prerequisites
 26 | 
 27 | If you haven't, complete the [previous challenge](01-aml-job.md) before you continue.
 28 | 
 29 | To complete the challenge, you need to have the authorization to create a service principal. 
 30 | 
 31 | ## Objectives
 32 | 
 33 | By completing this challenge, you'll learn how to:
 34 | 
 35 | - Create a service principal and use it to create a GitHub secret for authentication.
 36 | - Run the Azure Machine Learning job with GitHub Actions.
 37 | 
 38 | > **Important!**
 39 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
 40 | 
 41 | ## Challenge Duration
 42 | 
 43 | - **Estimated Time**: 45 minutes
 44 | 
 45 | ## Instructions
 46 | 
 47 | In the **.github/workflows** folder, you'll find the `02-manual-trigger.yml` file. The file defines a GitHub Action which can be manually triggered. The workflow checks out the repo onto the runner, installs the Azure Machine Learning extension for the CLI (v2), and logs in to Azure using the `AZURE_CREDENTIALS` secret.
 48 | 
 49 | - Create a service principal, using the Cloud Shell in the Azure portal, which has contributor access to your resource group. 
 50 |     
 51 |     **Save the output**, you'll *also* need it for later challenges. Update the `<service-principal-name>` (should be unique), `<subscription-id>`, and `<your-resource-group-name>` before using the following command:
 52 | ```azurecli
 53 |     az ad sp create-for-rbac --name "<service-principal-name>" --role contributor \
 54 |                                 --scopes /subscriptions/<subscription-id>/resourceGroups/<your-resource-group-name> \
 55 |                                 --sdk-auth
 56 | ```
 57 | - Create a GitHub secret in your repository. Name it `AZURE_CREDENTIALS` and copy and paste the output of the service principal to the **Value** field of the secret. 
 58 | 
 59 | <details>
 60 | <summary>Hint</summary>
 61 | <br/>
 62 | The output of the service principal which you need to paste into the <b>Value</b> field of the secret should be a JSON with the following structure:
 63 | <pre>
 64 | {
 65 | "clientId": "your-client-id",
 66 | "clientSecret": "your-client-secret",
 67 | "subscriptionId": "your-subscription-id",
 68 | "tenantId": "your-tenant-id",
 69 | "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
 70 | "resourceManagerEndpointUrl": "https://management.azure.com/",
 71 | "activeDirectoryGraphResourceId": "https://graph.windows.net/",
 72 | "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/",
 73 | "galleryEndpointUrl": "https://gallery.azure.com/",
 74 | "managementEndpointUrl": "https://management.core.windows.net/"
 75 | }
 76 | </pre>
 77 | </details>
 78 | 
 79 | - Edit the `02-manual-trigger.yml` workflow to trigger the Azure Machine Learning job you defined in challenge 1.
 80 | 
 81 | <details>
 82 | <summary>Hint</summary>
 83 | <br/>
 84 | GitHub is authenticated to use your Azure Machine Learning workspace with a service principal. The service principal is only allowed to submit jobs that use a compute cluster, not a compute instance.
 85 | </details>
 86 | 
 87 | ## Success criteria
 88 | 
 89 | To complete this challenge successfully, you should be able to show:
 90 | 
 91 | - A successfully completed Action in your GitHub repo, triggered manually in GitHub.
 92 | - A step in the Action should have submitted a job to the Azure Machine Learning workspace.
 93 | - A successfully completed Azure Machine Learning job, shown in the Azure Machine Learning workspace.
 94 | 
 95 | ## Useful resources
 96 | 
 97 | - The introduction to DevOps principles for machine learning module covers [how to integrate Azure Machine Learning with DevOps tools.](https://docs.microsoft.com/learn/paths/introduction-machine-learn-operations/)
 98 | - [Use GitHub Actions with Azure Machine Learning.](https://docs.microsoft.com/azure/machine-learning/how-to-github-actions-machine-learning)
 99 | - Learn more about [service principal objects in Azure Active Directory.](https://docs.microsoft.com/azure/active-directory/develop/app-objects-and-service-principals#service-principal-object)
100 | - Learn more about encrypted secrets in GitHub, like [how to name and how to create a secret in a GitHub repo.](https://docs.github.com/actions/security-guides/encrypted-secrets)
101 | - [Manually running a workflow in GitHub Actions.](https://docs.github.com/actions/managing-workflow-runs/manually-running-a-workflow)
102 | - [Re-running workflows and jobs in GitHub Actions.](https://docs.github.com/actions/managing-workflow-runs/re-running-workflows-and-jobs)
103 | - [General documentation for GitHub Actions.](https://docs.github.com/actions/guides)
104 | 
105 | <button class="button" onclick="window.location.href='03-trigger-workflow';">Continue with challenge 3</button>


--------------------------------------------------------------------------------
/documentation/03-trigger-workflow.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | challenge:
 3 |     module: 'Trigger GitHub Actions with feature-based development'
 4 |     challenge: '3: Trigger GitHub Actions with feature-based development'
 5 | ---
 6 | 
 7 | <style>
 8 | .button  {
 9 |   border: none;
10 |   color: white;
11 |   padding: 12px 28px;
12 |   background-color: #008CBA;
13 |   float: right;
14 | }
15 | </style>
16 | 
17 | # Challenge 3: Trigger GitHub Actions with feature-based development
18 | 
19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
20 | 
21 | ## Challenge scenario
22 | 
23 | Triggering a workflow by pushing directly to the repo is **not** considered a best practice. Preferably, you'll want to review any changes before you build them with GitHub Actions.
24 | 
25 | ## Prerequisites
26 | 
27 | If you haven't, complete the [previous challenge](02-github-actions.md) before you continue.
28 | 
29 | ## Objectives
30 | 
31 | By completing this challenge, you'll learn how to:
32 | 
33 | - Work with feature-based development.
34 | - Protect the main branch.
35 | - Trigger a GitHub Actions workflow by creating a pull request.
36 | 
37 | > **Important!**
38 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
39 | 
40 | ## Challenge Duration
41 | 
42 | - **Estimated Time**: 45 minutes
43 | 
44 | ## Instructions
45 | 
46 | Use feature-based development to better govern changes made to the repo and the triggering of GitHub Actions.
47 | 
48 | - Create a GitHub Actions workflow which is triggered by the creation of a pull request. 
49 | 
50 |     The workflow will be used for code verification in the next challenge. For now, you can include whatever step you want. For example, use the `echo` command:
51 | 
52 | ```yml
53 |     - name: Placeholder
54 |       run: |
55 |         echo "Will add code checks here in next challenge"
56 | ```
57 | 
58 | - Create a **branch protection rule** to block any direct pushes to the **main** branch.
59 | 
60 | > **Note:**
61 | > By default, branch protection rules do not apply to administrators. If you're the administrator of the repo you're working with, you'll still be allowed to push directly to the repo. 
62 | 
63 | To trigger the workflow, do the following:
64 | 
65 | - Create a branch in the repo.
66 | - Make a change and push it. For example, change the hyperparameter value. 
67 | - Create a pull request merge the new branch with the main. 
68 | 
69 | ## Success criteria
70 | 
71 | To complete this challenge successfully, you should be able to show:
72 | 
73 | - The branch protection rule for the main branch.
74 | - A successfully completed Action in your GitHub repo which is triggered by a new pull request.
75 | 
76 | ## Useful resources
77 | 
78 | - Learn more about source control for machine learning projects and [how to work with feature-based development and GitHub repos.](https://docs.microsoft.com/learn/modules/source-control-for-machine-learning-projects/)
79 | - [General documentation for GitHub Actions.](https://docs.github.com/actions/guides)
80 | - [Triggering a GitHub Actions workflow.](https://docs.github.com/actions/using-workflows/triggering-a-workflow)
81 | - [Events that trigger workflows.](https://docs.github.com/actions/using-workflows/events-that-trigger-workflows)
82 | - [Workflow syntax for GitHub Actions.](https://docs.github.com/actions/using-workflows/workflow-syntax-for-github-actions)
83 | 
84 | <button class="button" onclick="window.location.href='04-unit-test-linting';">Continue with challenge 4</button>


--------------------------------------------------------------------------------
/documentation/04-unit-test-linting.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | challenge:
 3 |     module: 'Work with linting and unit testing in GitHub Actions'
 4 |     challenge: '4: Work with linting and unit testing'
 5 | ---
 6 | 
 7 | <style>
 8 | .button  {
 9 |   border: none;
10 |   color: white;
11 |   padding: 12px 28px;
12 |   background-color: #008CBA;
13 |   float: right;
14 | }
15 | </style>
16 | 
17 | # Challenge 4: Work with linting and unit testing
18 | 
19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
20 | 
21 | ## Challenge scenario
22 | 
23 | Code quality can be assessed in two ways: linting and unit testing. Use linting to check for any stylistic errors and unit testing to verify your functions.
24 | 
25 | ## Prerequisites
26 | 
27 | If you haven't, complete the [previous challenge](03-trigger-workflow.md) before you continue.
28 | 
29 | You'll complete the workflow created in the previous challenge.
30 | 
31 | ## Objectives
32 | 
33 | By completing this challenge, you'll learn how to:
34 | 
35 | - Run linters and unit tests with GitHub Actions.
36 | - Troubleshoot errors to improve your code.
37 | 
38 | > **Important!**
39 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
40 | 
41 | ## Challenge Duration
42 | 
43 | - **Estimated Time**: 45 minutes
44 | 
45 | ## Instructions
46 | 
47 | In the **tests** folder, you'll find files that will perform linting and unit testing on your code. The `flake8` lints your code to check for stylistic errors. The `test_train.py` performs unit tests on your code to check whether the functions behave as expected.
48 | 
49 | - Go to the **Actions** tab in your GitHub repo and trigger the **Code checks** workflow manually. Inspect the output and fix your code where necessary.
50 | 
51 | <details>
52 | <summary>Hint</summary>
53 | <br/>
54 | Whenever the linter finds an error, the GitHub Actions step will fail with exit code 1. Inspect the output of the workflow to see the specific error codes for the linter. Next to the error code, the output will also list the source file with the line number and column number to help you find the cause of the error.
55 | </details>
56 | 
57 | - Add linting and unit tests jobs to the workflow you created in the previous challenge. The workflow should be triggered by the creation of a new pull request. The workflow should run the Flake8 linter *and* run the Pytest unit tests.
58 | 
59 | <details>
60 | <summary>Hint</summary>
61 | <br/>
62 | To include unit testing in your workflow, install Pytest (using the <code>requirements.txt</code>), and run the tests with <code>pytest tests/</code>. By default, Pytest uses test files that are prefixed with <code>test</code>.
63 | </details>
64 | 
65 | - Create (or edit) a **branch protection rule** to require the two code checks to be successful before merging a pull request to the **main** branch.
66 | 
67 | <details>
68 | <summary>Hint</summary>
69 | <br/>
70 | To configure checks to be required to pass before merging, you can enable <b>status checks</b> in a branch protection rule. To find the checks, your jobs need to have a name. To ensure the checks run whenever a pull request is created, your checks should be part of a GitHub Actions workflow triggered by a <code>pull_request</code> event.
71 | </details>
72 | 
73 | To trigger the workflow, do the following:
74 | 
75 | - Make a change and push it. For example, change the hyperparameter value. 
76 | - Create a pull request, showing the integrated code checks.
77 | 
78 | ## Success criteria
79 | 
80 | To complete this challenge successfully, you should be able to show:
81 | 
82 | - Both the **Linting** and **Unit tests** checks are completed successfully without any errors. The successful checks should be shown in a newly created pull request.
83 | 
84 | ## Useful resources
85 | 
86 | - [Flake8 documentation](https://flake8.pycqa.org/latest/user/index.html), including [error codes and their descriptions.](https://flake8.pycqa.org/en/latest/user/error-codes.html)
87 | - [A beginner's guide to Python testing.](https://miguelgfierro.com/blog/2018/a-beginners-guide-to-python-testing)
88 | - Learn more about [test infrastructure using Azure ML and how to create tests.](https://github.com/microsoft/recommenders/tree/main/tests)
89 | - Learn more about [testing with Pytest.](https://docs.microsoft.com/learn/modules/test-python-with-pytest/)
90 | 
91 | In this challenge, all testing is executed with GitHub Actions. Optionally, you can learn how to [verify your code locally with Visual Studio Code](https://docs.microsoft.com/learn/modules/source-control-for-machine-learning-projects/5-verify-your-code-locally). Running linters and unit tests locally is not required for this challenge.
92 | 
93 | <button class="button" onclick="window.location.href='05-environments';">Continue with challenge 5</button>


--------------------------------------------------------------------------------
/documentation/05-environments.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | challenge:
 3 |     module: 'Work with environments in GitHub Actions'
 4 |     challenge: '5: Work with environments'
 5 | ---
 6 | 
 7 | <style>
 8 | .button  {
 9 |   border: none;
10 |   color: white;
11 |   padding: 12px 28px;
12 |   background-color: #008CBA;
13 |   float: right;
14 | }
15 | </style>
16 | 
17 | # Challenge 5: Work with environments
18 | 
19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
20 | 
21 | ## Challenge scenario
22 | 
23 | There are many advantages to using environments in machine learning projects. When you have separate environments for development, staging, and production, you can more easily control access to resources. 
24 | 
25 | Use environments to isolate workloads and control the deployment of the model.
26 | 
27 | ## Prerequisites
28 | 
29 | If you haven't, complete the [previous challenge](04-unit-test-linting.md) before you continue.
30 | 
31 | **Your repo should be set to public**. If you're using a private repo without GitHub Enterprise Cloud, you'll not be able to create environments. [Change the visibility of your repo to public](https://docs.github.com/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/setting-repository-visibility) if your repo is set to private.
32 | 
33 | You'll re-use the workflow you created for [challenge 2: trigger the Azure Machine Learning job with GitHub Actions](02-github-actions.md). 
34 | 
35 | ## Objectives
36 | 
37 | By completing this challenge, you'll learn how to:
38 | 
39 | - Set up a development and production environment.
40 | - Add a required reviewer.
41 | - Add environments to a GitHub Actions workflow.
42 | 
43 | > **Important!**
44 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
45 | 
46 | ## Challenge Duration
47 | 
48 | - **Estimated Time**: 60 minutes
49 | 
50 | ## Instructions
51 | 
52 | Initially, data scientists will train the model in an Azure Machine Learning workspace which is configured for experimentation. Ideally, we don't want to make the production data available in the experimentation or development environment. Instead, data scientists will only have access to a small dataset which should behave similarly to the production dataset. 
53 | 
54 | By reusing the training script created by the data scientists, you can train the model in the production environment using the production data, simply by changing the data input.
55 | 
56 | > **Note:**
57 | > Though it's a best practice to associate a separate Azure Machine Learning workspace to each separate environment, you can use one workspace for both the development and production environment for this challenge (to avoid extra costs). 
58 | 
59 | - Within your GitHub repo, create a development and production environment.
60 | - Add an approval check for the production environment. 
61 | - Remove the global repo **AZURE_CREDENTIALS** secret, so that each environment will only be able to use its own secret.
62 | - For each environment, add the **AZURE_CREDENTIALS** secret that contains the service principal output. 
63 | 
64 | > **Note:**
65 | > If you don't have the service principal output anymore from [challenge 2](03-github-actions.md), go back to the Azure portal and create it again. You can only get the necessary output at the time of creation.
66 | 
67 | - Create a new data asset in the workspace with the following configuration:
68 |   - **Name**: *diabetes-prod-folder*
69 |   - **Path**: The **data** folder in the **production** folder which contains a larger CSV file to train the model. The path should point to the folder, not to the specific file.  
70 | - Create one GitHub Actions workflow, triggered by changes being pushed to the main branch, with two jobs:
71 |   - The **experiment** job that trains the model using the *diabetes-dev-folder* dataset in the **development environment**. 
72 |   - The **production** job that trains the model in the **production environment**, using the production data (the *diabetes-prod-folder* data asset as input).
73 | - Add a condition that the **production** job is only allowed to run when the **experiment** job ran *successfully*. Success means that the Azure Machine Learning job ran successfully too.
74 | 
75 | <details>
76 | <summary>Hint</summary>
77 | <br/>
78 | You'll need to do two things to ensure the production job only runs when the experiment job is successful: add <code>needs</code> to the workflow and add <code>--stream</code> to the CLI command to trigger the Azure Machine Learning job. 
79 | </details>
80 | 
81 | ## Success criteria
82 | 
83 | To complete this challenge successfully, you should be able to show:
84 | 
85 | - Show the environment secrets in the settings.
86 | - A successfully completed Actions workflow that contains two jobs. The production job needs the experimentation job to be successful to run.
87 | - Show that the workflow required an approval before running the production workload.
88 | - Show two successful Azure Machine Learning jobs, one trained with the *diabetes-dev-folder* as input and the other with the *diabetes-prod-folder* as input.
89 | 
90 | ## Useful resources
91 | 
92 | - Learn more about [continuous deployment for machine learning.](https://docs.microsoft.com/learn/modules/continuous-deployment-for-machine-learning/)
93 | - [Workflow syntax for GitHub Actions.](https://docs.github.com/actions/using-workflows/workflow-syntax-for-github-actions)
94 | - [Using environments for deployment in GitHub.](https://docs.github.com/actions/deployment/targeting-different-environments/using-environments-for-deployment)
95 | - [How to create a secret in a GitHub repo.](https://docs.github.com/actions/security-guides/encrypted-secrets)
96 | - [CLI reference for jobs.](https://docs.microsoft.com/cli/azure/ml/job?view=azure-cli-latest)
97 | 
98 | <button class="button" onclick="window.location.href='06-deploy-model';">Continue with challenge 6</button>


--------------------------------------------------------------------------------
/documentation/06-deploy-model.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | challenge:
 3 |     module: 'Deploy a model with GitHub Actions'
 4 |     challenge: '6: Deploy and test the model'
 5 | ---
 6 | 
 7 | <style>
 8 | .button  {
 9 |   border: none;
10 |   color: white;
11 |   padding: 12px 28px;
12 |   background-color: #008CBA;
13 |   float: right;
14 | }
15 | </style>
16 | 
17 | # Challenge 6: Deploy and test the model
18 | 
19 | <button class="button" onclick="window.location.href='https://microsoftlearning.github.io/mslearn-mlops/';">Back to overview</button>
20 | 
21 | ## Challenge scenario
22 | 
23 | To get value from a model, you'll want to deploy it. You can deploy a model to a managed online or batch endpoint.
24 | 
25 | ## Prerequisites
26 | 
27 | If you haven't, complete the [previous challenge](05-environments.md) before you continue.
28 | 
29 | ## Objectives
30 | 
31 | By completing this challenge, you'll learn how to:
32 | 
33 | - Register the model with GitHub Actions.
34 | - Deploy the model to an online endpoint with GitHub Actions.
35 | - Test the deployed model.
36 | 
37 | > **Important!**
38 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want.
39 | 
40 | ## Challenge Duration
41 | 
42 | - **Estimated Time**: 45 minutes
43 | 
44 | ## Instructions
45 | 
46 | When a model is trained and logged by using MLflow, you can easily register and deploy the model with Azure Machine Learning. After training the model, you want to deploy the model to a real-time endpoint so that it can be consumed by a web app.
47 | 
48 | - Register the model from the production job output in the Azure Machine Learning Studio. 
49 | - Create a GitHub Actions workflow which deploys the latest version of the registered model.
50 | - The workflow should create an endpoint and deploy your model to the endpoint using the CLI (v2).
51 | 
52 | <details>
53 | <summary>Hint</summary>
54 | <br/>
55 | The model's output was automatically generated by the MLflow auto log function in the training script. When you register the model as an MLflow type model, you don't need to provide a scoring script or environment to deploy the model. 
56 | </details>
57 | 
58 | - Test whether the deployed model returns predictions as expected.
59 | 
60 | <details>
61 | <summary>Hint</summary>
62 | <br/>
63 | You can test the endpoint in the Studio, using the CLI, or by calling the endpoint from an app like Postman.
64 | </details>
65 | 
66 | Here's some sample data to test your endpoint with:
67 | ```
68 | Pregnancies,PlasmaGlucose,DiastolicBloodPressure,TricepsThickness,SerumInsulin,BMI,DiabetesPedigree,Age
69 | 9,104,51,7,24,27.36983156,1.350472047,43
70 | 6,73,61,35,24,18.74367404,1.074147566,75
71 | 4,115,50,29,243,34.69215364,0.741159926,59
72 | ```
73 | 
74 | ## Success criteria
75 | 
76 | To complete this challenge successfully, you should be able to show:
77 | 
78 | - A model registered in the Azure Machine Learning workspace.
79 | - A successfully completed Action in your GitHub repo that deploys the model to a managed online endpoint.
80 | 
81 | ## Useful resources
82 | 
83 | - [Work with models in Azure Machine Learning.](https://docs.microsoft.com/azure/machine-learning/how-to-manage-models)
84 | - [Deploy an Azure Machine Learning model to a managed endpoint with CLI (v2).](https://docs.microsoft.com/learn/modules/deploy-azure-machine-learning-model-managed-endpoint-cli-v2/)
85 | - [Deploy MLflow models.](https://docs.microsoft.com/azure/machine-learning/how-to-deploy-mlflow-models)
86 | - [YAML reference to create an online endpoint.](https://docs.microsoft.com/azure/machine-learning/reference-yaml-endpoint-online)
87 | - [YAML reference to create a managed online deployment.](https://docs.microsoft.com/azure/machine-learning/reference-yaml-deployment-managed-online)
88 | - [CLI (v2) documentation for managing Azure ML online endpoints.](https://docs.microsoft.com/cli/azure/ml/online-endpoint?view=azure-cli-latest)
89 | - [CLI (v2) documentation for managing Azure ML online deployments.](https://docs.microsoft.com/cli/azure/ml/online-deployment?view=azure-cli-latest)
90 | - [GitHub Actions.](https://docs.github.com/actions/guides)


--------------------------------------------------------------------------------
/documentation/media/00-01-github-secret.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrosoftLearning/mslearn-mlops/a103a1bcdc53849e30c8e1952cb1321db97b7248/documentation/media/00-01-github-secret.png


--------------------------------------------------------------------------------
/experimentation/train-classification-model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "# Train diabetes classification model\n",
  8 |         "\n",
  9 |         "This notebook reads a CSV file and trains a model to predict diabetes in patients. The data is already preprocessed and requires no feature engineering.\n",
 10 |         "\n",
 11 |         "The evaluation methods were used during experimentation to decide whether the model was accurate enough. Moving forward, there's a preference to use the autolog feature of MLflow to more easily deploy the model later on."
 12 |       ]
 13 |     },
 14 |     {
 15 |       "cell_type": "markdown",
 16 |       "metadata": {},
 17 |       "source": [
 18 |         "## Read data from local file\n",
 19 |         "\n"
 20 |       ]
 21 |     },
 22 |     {
 23 |       "cell_type": "code",
 24 |       "execution_count": 50,
 25 |       "metadata": {},
 26 |       "outputs": [],
 27 |       "source": [
 28 |         "import pandas as pd"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "execution_count": 51,
 34 |       "metadata": {},
 35 |       "outputs": [],
 36 |       "source": [
 37 |         "df = pd.read_csv('data/diabetes.csv')"
 38 |       ]
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "execution_count": 52,
 43 |       "metadata": {},
 44 |       "outputs": [
 45 |         {
 46 |           "data": {
 47 |             "text/html": [
 48 |               "<div>\n",
 49 |               "<style scoped>\n",
 50 |               "    .dataframe tbody tr th:only-of-type {\n",
 51 |               "        vertical-align: middle;\n",
 52 |               "    }\n",
 53 |               "\n",
 54 |               "    .dataframe tbody tr th {\n",
 55 |               "        vertical-align: top;\n",
 56 |               "    }\n",
 57 |               "\n",
 58 |               "    .dataframe thead th {\n",
 59 |               "        text-align: right;\n",
 60 |               "    }\n",
 61 |               "</style>\n",
 62 |               "<table border=\"1\" class=\"dataframe\">\n",
 63 |               "  <thead>\n",
 64 |               "    <tr style=\"text-align: right;\">\n",
 65 |               "      <th></th>\n",
 66 |               "      <th>PatientID</th>\n",
 67 |               "      <th>Pregnancies</th>\n",
 68 |               "      <th>PlasmaGlucose</th>\n",
 69 |               "      <th>DiastolicBloodPressure</th>\n",
 70 |               "      <th>TricepsThickness</th>\n",
 71 |               "      <th>SerumInsulin</th>\n",
 72 |               "      <th>BMI</th>\n",
 73 |               "      <th>DiabetesPedigree</th>\n",
 74 |               "      <th>Age</th>\n",
 75 |               "      <th>Diabetic</th>\n",
 76 |               "    </tr>\n",
 77 |               "  </thead>\n",
 78 |               "  <tbody>\n",
 79 |               "    <tr>\n",
 80 |               "      <th>0</th>\n",
 81 |               "      <td>1354778</td>\n",
 82 |               "      <td>0</td>\n",
 83 |               "      <td>171</td>\n",
 84 |               "      <td>80</td>\n",
 85 |               "      <td>34</td>\n",
 86 |               "      <td>23</td>\n",
 87 |               "      <td>43.509726</td>\n",
 88 |               "      <td>1.213191</td>\n",
 89 |               "      <td>21</td>\n",
 90 |               "      <td>0</td>\n",
 91 |               "    </tr>\n",
 92 |               "    <tr>\n",
 93 |               "      <th>1</th>\n",
 94 |               "      <td>1147438</td>\n",
 95 |               "      <td>8</td>\n",
 96 |               "      <td>92</td>\n",
 97 |               "      <td>93</td>\n",
 98 |               "      <td>47</td>\n",
 99 |               "      <td>36</td>\n",
100 |               "      <td>21.240576</td>\n",
101 |               "      <td>0.158365</td>\n",
102 |               "      <td>23</td>\n",
103 |               "      <td>0</td>\n",
104 |               "    </tr>\n",
105 |               "    <tr>\n",
106 |               "      <th>2</th>\n",
107 |               "      <td>1640031</td>\n",
108 |               "      <td>7</td>\n",
109 |               "      <td>115</td>\n",
110 |               "      <td>47</td>\n",
111 |               "      <td>52</td>\n",
112 |               "      <td>35</td>\n",
113 |               "      <td>41.511523</td>\n",
114 |               "      <td>0.079019</td>\n",
115 |               "      <td>23</td>\n",
116 |               "      <td>0</td>\n",
117 |               "    </tr>\n",
118 |               "    <tr>\n",
119 |               "      <th>3</th>\n",
120 |               "      <td>1883350</td>\n",
121 |               "      <td>9</td>\n",
122 |               "      <td>103</td>\n",
123 |               "      <td>78</td>\n",
124 |               "      <td>25</td>\n",
125 |               "      <td>304</td>\n",
126 |               "      <td>29.582192</td>\n",
127 |               "      <td>1.282870</td>\n",
128 |               "      <td>43</td>\n",
129 |               "      <td>1</td>\n",
130 |               "    </tr>\n",
131 |               "    <tr>\n",
132 |               "      <th>4</th>\n",
133 |               "      <td>1424119</td>\n",
134 |               "      <td>1</td>\n",
135 |               "      <td>85</td>\n",
136 |               "      <td>59</td>\n",
137 |               "      <td>27</td>\n",
138 |               "      <td>35</td>\n",
139 |               "      <td>42.604536</td>\n",
140 |               "      <td>0.549542</td>\n",
141 |               "      <td>22</td>\n",
142 |               "      <td>0</td>\n",
143 |               "    </tr>\n",
144 |               "    <tr>\n",
145 |               "      <th>...</th>\n",
146 |               "      <td>...</td>\n",
147 |               "      <td>...</td>\n",
148 |               "      <td>...</td>\n",
149 |               "      <td>...</td>\n",
150 |               "      <td>...</td>\n",
151 |               "      <td>...</td>\n",
152 |               "      <td>...</td>\n",
153 |               "      <td>...</td>\n",
154 |               "      <td>...</td>\n",
155 |               "      <td>...</td>\n",
156 |               "    </tr>\n",
157 |               "    <tr>\n",
158 |               "      <th>9995</th>\n",
159 |               "      <td>1469198</td>\n",
160 |               "      <td>6</td>\n",
161 |               "      <td>95</td>\n",
162 |               "      <td>85</td>\n",
163 |               "      <td>37</td>\n",
164 |               "      <td>267</td>\n",
165 |               "      <td>18.497542</td>\n",
166 |               "      <td>0.660240</td>\n",
167 |               "      <td>31</td>\n",
168 |               "      <td>0</td>\n",
169 |               "    </tr>\n",
170 |               "    <tr>\n",
171 |               "      <th>9996</th>\n",
172 |               "      <td>1432736</td>\n",
173 |               "      <td>0</td>\n",
174 |               "      <td>55</td>\n",
175 |               "      <td>51</td>\n",
176 |               "      <td>7</td>\n",
177 |               "      <td>50</td>\n",
178 |               "      <td>21.865341</td>\n",
179 |               "      <td>0.086589</td>\n",
180 |               "      <td>34</td>\n",
181 |               "      <td>0</td>\n",
182 |               "    </tr>\n",
183 |               "    <tr>\n",
184 |               "      <th>9997</th>\n",
185 |               "      <td>1410962</td>\n",
186 |               "      <td>5</td>\n",
187 |               "      <td>99</td>\n",
188 |               "      <td>59</td>\n",
189 |               "      <td>47</td>\n",
190 |               "      <td>67</td>\n",
191 |               "      <td>30.774018</td>\n",
192 |               "      <td>2.301594</td>\n",
193 |               "      <td>43</td>\n",
194 |               "      <td>1</td>\n",
195 |               "    </tr>\n",
196 |               "    <tr>\n",
197 |               "      <th>9998</th>\n",
198 |               "      <td>1958653</td>\n",
199 |               "      <td>0</td>\n",
200 |               "      <td>145</td>\n",
201 |               "      <td>67</td>\n",
202 |               "      <td>30</td>\n",
203 |               "      <td>21</td>\n",
204 |               "      <td>18.811861</td>\n",
205 |               "      <td>0.789572</td>\n",
206 |               "      <td>26</td>\n",
207 |               "      <td>0</td>\n",
208 |               "    </tr>\n",
209 |               "    <tr>\n",
210 |               "      <th>9999</th>\n",
211 |               "      <td>1332938</td>\n",
212 |               "      <td>10</td>\n",
213 |               "      <td>100</td>\n",
214 |               "      <td>54</td>\n",
215 |               "      <td>34</td>\n",
216 |               "      <td>27</td>\n",
217 |               "      <td>38.840943</td>\n",
218 |               "      <td>0.175465</td>\n",
219 |               "      <td>23</td>\n",
220 |               "      <td>0</td>\n",
221 |               "    </tr>\n",
222 |               "  </tbody>\n",
223 |               "</table>\n",
224 |               "<p>10000 rows × 10 columns</p>\n",
225 |               "</div>"
226 |             ],
227 |             "text/plain": [
228 |               "      PatientID  Pregnancies  PlasmaGlucose  DiastolicBloodPressure  \\\n",
229 |               "0       1354778            0            171                      80   \n",
230 |               "1       1147438            8             92                      93   \n",
231 |               "2       1640031            7            115                      47   \n",
232 |               "3       1883350            9            103                      78   \n",
233 |               "4       1424119            1             85                      59   \n",
234 |               "...         ...          ...            ...                     ...   \n",
235 |               "9995    1469198            6             95                      85   \n",
236 |               "9996    1432736            0             55                      51   \n",
237 |               "9997    1410962            5             99                      59   \n",
238 |               "9998    1958653            0            145                      67   \n",
239 |               "9999    1332938           10            100                      54   \n",
240 |               "\n",
241 |               "      TricepsThickness  SerumInsulin        BMI  DiabetesPedigree  Age  \\\n",
242 |               "0                   34            23  43.509726          1.213191   21   \n",
243 |               "1                   47            36  21.240576          0.158365   23   \n",
244 |               "2                   52            35  41.511523          0.079019   23   \n",
245 |               "3                   25           304  29.582192          1.282870   43   \n",
246 |               "4                   27            35  42.604536          0.549542   22   \n",
247 |               "...                ...           ...        ...               ...  ...   \n",
248 |               "9995                37           267  18.497542          0.660240   31   \n",
249 |               "9996                 7            50  21.865341          0.086589   34   \n",
250 |               "9997                47            67  30.774018          2.301594   43   \n",
251 |               "9998                30            21  18.811861          0.789572   26   \n",
252 |               "9999                34            27  38.840943          0.175465   23   \n",
253 |               "\n",
254 |               "      Diabetic  \n",
255 |               "0            0  \n",
256 |               "1            0  \n",
257 |               "2            0  \n",
258 |               "3            1  \n",
259 |               "4            0  \n",
260 |               "...        ...  \n",
261 |               "9995         0  \n",
262 |               "9996         0  \n",
263 |               "9997         1  \n",
264 |               "9998         0  \n",
265 |               "9999         0  \n",
266 |               "\n",
267 |               "[10000 rows x 10 columns]"
268 |             ]
269 |           },
270 |           "execution_count": 52,
271 |           "metadata": {},
272 |           "output_type": "execute_result"
273 |         }
274 |       ],
275 |       "source": [
276 |         "df"
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "markdown",
281 |       "metadata": {},
282 |       "source": [
283 |         "## Split data"
284 |       ]
285 |     },
286 |     {
287 |       "cell_type": "code",
288 |       "execution_count": 53,
289 |       "metadata": {},
290 |       "outputs": [],
291 |       "source": [
292 |         "X, y = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, df['Diabetic'].values"
293 |       ]
294 |     },
295 |     {
296 |       "cell_type": "code",
297 |       "execution_count": 54,
298 |       "metadata": {},
299 |       "outputs": [
300 |         {
301 |           "data": {
302 |             "text/plain": [
303 |               "10000"
304 |             ]
305 |           },
306 |           "execution_count": 54,
307 |           "metadata": {},
308 |           "output_type": "execute_result"
309 |         }
310 |       ],
311 |       "source": [
312 |         "len(X)"
313 |       ]
314 |     },
315 |     {
316 |       "cell_type": "code",
317 |       "execution_count": 55,
318 |       "metadata": {},
319 |       "outputs": [],
320 |       "source": [
321 |         "import numpy as np"
322 |       ]
323 |     },
324 |     {
325 |       "cell_type": "code",
326 |       "execution_count": 56,
327 |       "metadata": {},
328 |       "outputs": [
329 |         {
330 |           "name": "stdout",
331 |           "output_type": "stream",
332 |           "text": [
333 |             "(array([0, 1], dtype=int64), array([6656, 3344], dtype=int64))\n"
334 |           ]
335 |         }
336 |       ],
337 |       "source": [
338 |         "print(np.unique(y, return_counts=True))"
339 |       ]
340 |     },
341 |     {
342 |       "cell_type": "code",
343 |       "execution_count": 57,
344 |       "metadata": {},
345 |       "outputs": [],
346 |       "source": [
347 |         "from sklearn.model_selection import train_test_split"
348 |       ]
349 |     },
350 |     {
351 |       "cell_type": "code",
352 |       "execution_count": 58,
353 |       "metadata": {},
354 |       "outputs": [],
355 |       "source": [
356 |         "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)"
357 |       ]
358 |     },
359 |     {
360 |       "cell_type": "markdown",
361 |       "metadata": {},
362 |       "source": [
363 |         "## Train model"
364 |       ]
365 |     },
366 |     {
367 |       "cell_type": "code",
368 |       "execution_count": 59,
369 |       "metadata": {},
370 |       "outputs": [],
371 |       "source": [
372 |         "from sklearn.linear_model import LogisticRegression"
373 |       ]
374 |     },
375 |     {
376 |       "cell_type": "code",
377 |       "execution_count": 60,
378 |       "metadata": {},
379 |       "outputs": [],
380 |       "source": [
381 |         "model = LogisticRegression(C=1/0.1, solver=\"liblinear\").fit(X_train, y_train)"
382 |       ]
383 |     },
384 |     {
385 |       "cell_type": "markdown",
386 |       "metadata": {},
387 |       "source": [
388 |         "## Evaluate model"
389 |       ]
390 |     },
391 |     {
392 |       "cell_type": "code",
393 |       "execution_count": 61,
394 |       "metadata": {},
395 |       "outputs": [],
396 |       "source": [
397 |         "import numpy as np"
398 |       ]
399 |     },
400 |     {
401 |       "cell_type": "code",
402 |       "execution_count": 62,
403 |       "metadata": {},
404 |       "outputs": [],
405 |       "source": [
406 |         "y_hat = model.predict(X_test)\n",
407 |         "acc = np.average(y_hat == y_test)"
408 |       ]
409 |     },
410 |     {
411 |       "cell_type": "code",
412 |       "execution_count": 63,
413 |       "metadata": {},
414 |       "outputs": [
415 |         {
416 |           "data": {
417 |             "text/plain": [
418 |               "0.7736666666666666"
419 |             ]
420 |           },
421 |           "execution_count": 63,
422 |           "metadata": {},
423 |           "output_type": "execute_result"
424 |         }
425 |       ],
426 |       "source": [
427 |         "acc"
428 |       ]
429 |     },
430 |     {
431 |       "cell_type": "code",
432 |       "execution_count": 64,
433 |       "metadata": {},
434 |       "outputs": [],
435 |       "source": [
436 |         "from sklearn.metrics import roc_auc_score"
437 |       ]
438 |     },
439 |     {
440 |       "cell_type": "code",
441 |       "execution_count": 65,
442 |       "metadata": {},
443 |       "outputs": [],
444 |       "source": [
445 |         "y_scores = model.predict_proba(X_test)\n",
446 |         "auc = roc_auc_score(y_test,y_scores[:,1])"
447 |       ]
448 |     },
449 |     {
450 |       "cell_type": "code",
451 |       "execution_count": 66,
452 |       "metadata": {},
453 |       "outputs": [
454 |         {
455 |           "data": {
456 |             "text/plain": [
457 |               "0.848386486889895"
458 |             ]
459 |           },
460 |           "execution_count": 66,
461 |           "metadata": {},
462 |           "output_type": "execute_result"
463 |         }
464 |       ],
465 |       "source": [
466 |         "auc"
467 |       ]
468 |     },
469 |     {
470 |       "cell_type": "code",
471 |       "execution_count": 67,
472 |       "metadata": {},
473 |       "outputs": [],
474 |       "source": [
475 |         "\n",
476 |         "from sklearn.metrics import roc_curve\n",
477 |         "import matplotlib.pyplot as plt"
478 |       ]
479 |     },
480 |     {
481 |       "cell_type": "code",
482 |       "execution_count": 68,
483 |       "metadata": {},
484 |       "outputs": [
485 |         {
486 |           "data": {
487 |             "text/plain": [
488 |               "Text(0.5, 1.0, 'ROC Curve')"
489 |             ]
490 |           },
491 |           "execution_count": 68,
492 |           "metadata": {},
493 |           "output_type": "execute_result"
494 |         },
495 |         {
496 |           "data": {
497 |             "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAvPElEQVR4nO3de5xN9f748de7mUoXpwu6yP0+Q5ImQkjkEpJKiZSaSNJNuirJkdyJ3CVSSimlk5PT6ZzSrwvJnVLTuEcuX5QumPH+/bHWOLtpLnvMrL323uv9fDz2Y9ba+7P3eq8x1nt/LuvzEVXFGGNMcJ3gdwDGGGP8ZYnAGGMCzhKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4CwRGGNMwFkiMHFFRDaJyO8iclBEdorITBE5PVuZhiLyHxH5RUQOiMh7IpKcrczfRGSsiGxxP+sHd79kLscVEblPRNaKyK8isk1E3hSRC708X2OKgiUCE4/aq+rpQB3gYuDxrBdEpAHwL+BdoDRQEVgFfCYildwyJwEfATWB1sDfgAbAXqBeLsd8HrgfuA84G6gGvAO0LWjwIpJY0PcYUxhidxabeCIim4A7VfXf7v5woKaqtnX3PwXWqGrvbO/7J7BbVW8VkTuBZ4HKqnowjGNWBb4FGqjq0lzKfAy8oqrT3f3ubpyXu/sK9AEeABKBD4BfVbVfyGe8C3yiqqNFpDQwHmgCHATGqOq4/H9DxvyV1QhM3BKRMkAbIM3dPxVoCLyZQ/E3gKvc7RbAB+EkAVdzYFtuSaAArgXqA8nAa8BNIiIAInIW0BJ4XUROAN7Dqclc4B7/ARFpVcjjm4CyRGDi0Tsi8guwFdgFPO0+fzbO3/yOHN6zA8hq/y+RS5ncFLR8bp5T1f9T1d+BTwEFGruv3QB8oao/ApcCpVR1kKoeVtV0YBrQuQhiMAFkicDEo2tVtThwBVCD/13g9wFHgfNzeM/5wB53e28uZXJT0PK52Zq1oU6b7evAze5TXYBX3e3yQGkR2Z/1AJ4Azi2CGEwAWSIwcUtVPwFmAiPd/V+BL4BOORS/EaeDGODfQCsROS3MQ30ElBGRlDzK/AqcGrJ/Xk4hZ9t/DbhBRMrjNBm95T6/FdioqmeGPIqr6tVhxmvMn1giMPFuLHCViFzk7j8G3OYO9SwuImeJyGCcUUHPuGVm41xs3xKRGiJygoiUEJEnROQvF1tV/R6YCLwmIleIyEkiUkxEOovIY26xlcB1InKqiFQBUvMLXFVX4NRSpgOLVHW/+9JS4BcReVREThGRBBGpJSKXFvi3YwyWCEycU9XdwMvAAHf//wGtgOtw2vU34wwxvdy9oKOqh3A6jL8FPgR+xrn4lgSW5HKo+4AXgAnAfuAHoCNOpy7AGOAw8BMwi/818+RnjhvLnJBzygTa4QyP3cj/ksUZYX6mMX9iw0eNMSbgrEZgjDEBZ4nAGGMCzhKBMcYEnCUCY4wJuJib3KpkyZJaoUIFv8MwxpiY8vXXX+9R1VI5vRZziaBChQosW7bM7zCMMSamiMjm3F6zpiFjjAk4SwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiA8ywRiMgMEdklImtzeV1EZJyIpInIahGp61UsxhhjcudljWAmzsLfuWkDVHUfPYFJHsZijDEmF57dR6Cqi0WkQh5FOgAvuysxfSkiZ4rI+apaFEv+GWNMxMxZsoV3V2737POPHs3k8OEj1K10Dk+3r1nkn+/nDWUXELI0H7DNfe4viUBEeuLUGihXrlxEgjPGRA+vL7SFtWTj/wFQv+LZRf7Z+/fvZ8OGDSQmJnJxxRxvDC60mLizWFWnAlMBUlJSbAEFY2JQYS7mXl5oi0L9imfToc4FdKlfdF9U9+/fz8MPP8wb06dTpUoVpk+fTtOmtYrs80P5mQi2A2VD9su4zxljfOLlN+/CXMy9uNBGs8zMTBo2bMiGDRt45JFHGDhwIKeccopnx/MzESwA+ojI6zgLcx+w/gFjil5BLu5efvMO2sX8eOzdu5ezzz6bhIQEnn32WcqWLUtKSornx/UsEYjIa8AVQEkR2QY8DZwIoKqTgYXA1UAa8Btwu1exGBPLCvstvSAXd7tY+0NVefXVV7n//vsZOnQoPXr0oGPHjhE7vpejhm7O53UF7vHq+MbEquwX/sJ+S7eLe3TbunUrvXr1YuHChVx22WU0atQo4jHERGexMfEinG/32S/8diGPX6+99hp33XUXmZmZjB07lj59+pCQkBDxOCwRGOOh4/l2bxf+4DjrrLOoX78+U6dOpWLFir7FIU4LTexISUlRW5jGRKOcvu3ndOG3i3xwZWRkMGbMGA4fPkz//v0Bp39ARDw/toh8rao59jxbjcCYAsiraSeni759uzdZVq1aRWpqKl9//TU33njjsQQQiSSQH0sExuQi3G/4Weyib3Jy6NAhBg8ezNChQzn77LN58803uf7666MiAWSxRGCMK5z2fLvYm4L6/vvvGTZsGF26dGH06NGUKFHC75D+whKBCbTQi7+N1jFF5eDBg7z77rt07dqVWrVq8e2331KpUiW/w8qVJQITWHOWbOGJ+WsA56JvF35TFD788EN69uzJ5s2bqVu3LklJSVGdBMASgQmo0CQwpOOFdvE3hbZv3z769evHjBkzqFatGp988glJSUl+hxUWSwQmULKagrKagSwJmKKQmZlJo0aN+O6773j88ccZMGAAxYoV8zussFkiMHEpt2Geof0A1gxkCmvPnj3HJokbMmQI5cqVo27d2Ft11xKBiSvZv/FnH+ZpCcAUBVVl9uzZPPDAAwwdOpSePXty7bXX+h3WcbNEYOJCTgnALvjGC5s3b+auu+5i0aJFNGzYkCZNmvgdUqFZIjAxL/voH0sAxiuvvPIKd999N6rK+PHj6d27NyeccILfYRWaJQITs6zj10RaqVKlaNSoEVOmTKF8+fJ+h1NkLBGYmJHXnb9WCzBeOHLkCKNGjeLIkSM89dRTtGrVipYtW0bV9BBFwRKBiXq5dQBbAjBeWrFiBampqaxYsYLOnTtH1SRxRc0SgYlq1v5vIu2PP/5g0KBBDB8+nJIlS/LWW29x3XXX+R2WpywRmKhld/8aP6SlpTFy5EhuvfVWRo0axVlnneV3SJ6zRGCikiUBE0kHDx5k/vz5dOvWjVq1arFhwwZfVwyLNEsEJqrYSCATaYsWLaJnz55s3bqVlJQUkpKSApUEwBKBiRJ2Q5iJtL1799K3b19efvllatSowaeffhozk8QVNUsExnfWIWwiLWuSuLS0NPr378+TTz4ZU5PEFTVLBMY31gxkIm337t2UKFGChIQEhg0bRvny5alTp47fYfku9u+NNjFnzpIt3DTlC56Yv4YlG/+P+hXPtiRgPKWqvPTSS1SrVo1p06YB0KFDB0sCLqsRmIixfgDjh02bNtGzZ08+/PBDGjduTLNmzfwOKepYIjARYf0Axg+zZ8/m7rvvRkSYOHEid911V1xMElfULBEYT1k/gPHTueeeS5MmTZg8eTLlytnfXW4sEZgik9OqYNYMZCLpyJEjDB8+nMzMTAYMGEDLli1p2bKl32FFPUsEpkhkb/rJYgnARMry5cu54447WLVqFV26dDk2SZzJnyUCc9xCawDW9GP88vvvv/PMM88wcuRISpUqxfz582N62Ug/eJoIRKQ18DyQAExX1aHZXi8HzALOdMs8pqoLvYzJFE5OF//6Fc+2b/7GN+np6YwePZru3bszYsSIQEwSV9Q8SwQikgBMAK4CtgFficgCVV0fUuxJ4A1VnSQiycBCoIJXMZnCyd78Yxd/45eff/6Zt99+m+7du1OzZk2+//77uFoxLNK8rBHUA9JUNR1ARF4HOgChiUCBv7nbZwA/ehiPOU428sdEk4ULF9KrVy+2b99O/fr1SUpKsiRQSF4OqL0A2Bqyv819LtRA4BYR2YZTG7g3pw8SkZ4iskxElu3evduLWE0usmoBdgew8duePXvo1q0bbdu2pXjx4nz22WeBnSSuqPndWXwzMFNVR4lIA2C2iNRS1aOhhVR1KjAVICUlRX2IM5BsTQATLbImiUtPT2fAgAE88cQTnHzyyX6HFTe8TATbgbIh+2Xc50KlAq0BVPULESkGlAR2eRiXCYMlARMNfvrpJ0qVKkVCQgIjR46kfPny1K5d2++w4o6XTUNfAVVFpKKInAR0BhZkK7MFaA4gIklAMcDafnxmScD4TVV58cUXqV69OlOnTgWgffv2lgQ84lkiUNUMoA+wCPgGZ3TQOhEZJCLXuMUeAnqIyCrgNaC7qlrTj48sCRi/paen06JFC+68807q1KlDixYt/A4p7nnaR+DeE7Aw23MDQrbXA428jMGEx0YGmWgwa9YsevfuTUJCApMnT6ZHjx42SVwE+N1ZbKKAzQxqokXp0qW58sormTRpEmXKlPE7nMCwRBBw1hRk/HT48GGGDh3K0aNHGThwIFdddRVXXXWV32EFjtW5AsySgPHTV199xSWXXMLTTz9Neno61j3oH0sEAZY1Z5AlARNJv/32G/369eOyyy5j3759LFiwgJdfftlmCvWRJYKAmrNky7G7hS0JmEjauHEj48ePp0ePHqxbt4727dv7HVLgWR9BAIU2CXWok33WD2OK3oEDB3j77be5/fbbqVmzJmlpaZQtWzb/N5qIsBpBAFmTkImk999/n5o1a3LnnXfy7bffAlgSiDKWCALGmoRMpOzevZuuXbvSrl07zjrrLL744gtq1Kjhd1gmB9Y0FBDZbxizJiHjpczMTC6//HI2btzIM888w2OPPcZJJ53kd1gmF5YIAsBuGDORsnPnTs455xwSEhIYNWoUFSpUoFatWn6HZfIRdtOQiJzqZSDGG9nvFZh7VwNLAqbIHT16lClTplCtWjWmTJkCQLt27SwJxIh8E4GINBSR9cC37v5FIjLR88hModkNYyYS0tLSaN68Ob169eLSSy+lVatWfodkCiicGsEYoBWwF0BVVwFNvAzKFA0bHWS89tJLL3HhhReyfPlypk2bxr///W8qVarkd1imgMLqI1DVrdnu+sv0JhxTFLI6htfv+NlGBxlPlStXjlatWjFhwgQuuMAGIMSqcBLBVhFpCKiInAjcj7O+gIlCOXUMG1NUDh06xHPPPcfRo0cZNGgQzZs3p3nz5n6HZQopnETQC3geZ+H57cC/gN5eBmWOj/UJGC8tWbKE1NRU1q1bx2233Yaq2vxAcSKcPoLqqtpVVc9V1XNU9RYgyevATMFYEjBe+fXXX+nbty8NGjTgwIED/OMf/2DmzJmWBOJIOIlgfJjPGZ9YEjBe2rx5MxMnTqRXr16sW7eOtm3b+h2SKWK5Ng2JSAOgIVBKRPqGvPQ3IMHrwEx4LAkYL+zfv5958+Zx5513kpycTFpamq0YFsfyqhGcBJyOkyyKhzx+Bm7wPjQTDhsiaorau+++S3JyMr169To2SZwlgfiWa41AVT8BPhGRmaq6OYIxmTDZBHKmKO3atYv77ruPuXPnUrt2bRYsWGCTxAVEOKOGfhOREUBNoFjWk6p6pWdRmbBk1QZsiKgprMzMTBo1asSWLVsYPHgwjzzyCCeeeKLfYZkICScRvArMBdrhDCW9DdjtZVAmfFYbMIXx448/ct5555GQkMDzzz9PhQoVSE5O9jssE2HhjBoqoaovAkdU9RNVvQOw2oCP5izZwk1TvmD9jp/9DsXEqKNHjzJp0iRq1KjB5MmTAbj66qstCQRUODWCI+7PHSLSFvgRONu7kExe7M5hU1jfffcdPXr0YPHixbRo0YI2bdr4HZLxWTiJYLCInAE8hHP/wN+AB7wMyvxZ1txBwLGFZWyUkDkeL774In369KFYsWLMmDGD7t27241hJv9EoKr/cDcPAM0ARKSRl0GZ/8leA7CFZUxhVKhQgTZt2jBhwgTOP/98v8MxUSKvG8oSgBtx5hj6QFXXikg74AngFODiyIQYXHazmCmsQ4cO8fe//x2AwYMH2yRxJkd51QheBMoCS4FxIvIjkAI8pqrvRCC2QLMkYArr888/JzU1lW+//ZY77rjDJokzucorEaQAtVX1qIgUA3YClVV1b2RCCza7Y9gcr4MHD9K/f3/Gjx9P2bJl+eCDD2zVMJOnvIaPHlbVowCq+geQXtAkICKtRWSDiKSJyGO5lLlRRNaLyDoRmVOQz493do+AOR5btmxhypQp3HPPPaxdu9aSgMlXXjWCGiKy2t0WoLK7L4Cqau28PtjtY5gAXAVsA74SkQWquj6kTFXgcaCRqu4TkXMKcS5xI3TqCGPCsW/fPt5880169uxJcnIy6enplC5d2u+wTIzIKxEUds2BekCaqqYDiMjrQAdgfUiZHsAEVd0HoKq7CnnMmBfaN2D3CJhwzJ8/n969e7N7926aNm1K9erVLQmYAsm1aUhVN+f1COOzLwC2huxvc58LVQ2oJiKficiXItI6pw8SkZ4iskxElu3eHd+zW1jfgAnXzp076dSpE9dddx3nnXceS5cupXr16n6HZWJQWIvXe3z8qsAVQBlgsYhcqKr7Qwup6lRgKkBKSopGOMaIsdlETbgyMzNp3LgxW7duZciQIfTr188miTPHzctEsB1n+GmWMu5zobYBS1T1CLBRRL7DSQxfeRhXVLImIROObdu2Ubp0aRISEhg3bhwVK1a0qaJNoYUz6RwicoqIFLTO+RVQVUQqishJQGdgQbYy7+DUBhCRkjhNRekFPE7Ms3sGTH6OHj3K+PHjqVGjBpMmTQKgTZs2lgRMkcg3EYhIe2Al8IG7X0dEsl/Q/0JVM4A+wCLgG+ANVV0nIoNE5Bq32CJgr4isB/4LPBy0+xQsCZj8fPvttzRp0oT77ruPyy+/nHbt2vkdkokz4TQNDcQZAfQxgKquFJGK4Xy4qi4EFmZ7bkDItgJ93UcgWeewycv06dPp06cPp556KrNmzaJbt252d7ApcmFNQ62qB7L98cVth20kWeewyU/lypVp3749L7zwAueee67f4Zg4FU4iWCciXYAE9waw+4DPvQ0r/lnnsMnJH3/8waBBgwAYMmQIzZo1o1mzZj5HZeJdOJ3F9+KsV3wImIMzHfUDHsYU17JWF7N+AZPdZ599Rp06dXjuuefYvXs3TsupMd4Lp0ZQQ1X7A/29Dibe5bS6mCUB88svv/DEE08wYcIEypcvz6JFi2jZsqXfYZkACScRjBKR84B5wFxVXetxTHHLOoZNTrZt28b06dO59957efbZZzn99NP9DskETL5NQ6raDGdlst3AFBFZIyJPeh5ZnLKOYQOwd+/eY/cDJCUlkZ6ezvPPP29JwPgirBvKVHWnqo4DeuHcUzAg73eY7LJGCJlgU1XmzZtHcnIy9913Hxs2bACwZSONr8K5oSxJRAaKyBqcxes/x5kuwoTJRggZgB07dnD99dfTqVMnypYty7Jly2ySOBMVwukjmAHMBVqp6o8exxN37M5hA/+bJG779u0MHz6cBx98kMREv+d8NMaR71+iqjaIRCDxyJKA2bp1KxdccAEJCQlMmDCBihUrUq1aNb/DMuZPcm0aEpE33J9rRGR1yGNNyMplJg82Sii4MjMzGTdu3J8miWvVqpUlAROV8qoR3O/+tBmujoNNHxFc33zzDampqXzxxRe0adOG9u3b+x2SMXnKa4WyHe5m7xxWJ+sdmfBiV1ZtwDqHg2Xq1KnUqVOH7777jtmzZ/P+++9Trpx9ETDRLZzho1fl8Fybog4kHlltIHiqVq1Kx44dWb9+PbfccovNFGpiQq5NQyJyN843/0rZ+gSKA595HVgsC20WMvHt999/Z+DAgYgIQ4cOtUniTEzKq0YwB2iPs6pY+5DHJap6SwRii1nWLBQMixcv5qKLLmL48OEcOHDAJokzMSuvRKCqugm4B/gl5IGI2FfdXFgncfz7+eef6d27N02bNiUzM5OPPvqISZMmWTOQiVl5jRqagzNi6GuchWhC/8oVqORhXDHJ7iAOhh9//JGZM2fSt29fBg0axGmnneZ3SMYUSq6JQFXbuT/DWpbS2H0D8WzPnj288cYb9O7dmxo1arBx40ZbMczEjXDmGmokIqe527eIyGgRsatcLqxJKL6oKnPnziU5OZkHHniA7777DsCSgIkr4QwfnQT8JiIXAQ8BPwCzPY0qBtnsovHnxx9/5Nprr6Vz586UL1+er7/+2u4MNnEpnFmvMlRVRaQD8IKqvigiqV4HFmtspFB8yczMpEmTJmzfvp2RI0dy//332yRxJm6F85f9i4g8DnQDGovICcCJ3oYVW2ykUPzYvHkzZcqUISEhgYkTJ1KpUiWqVKnid1jGeCqcpqGbcBauv0NVd+KsRTDC06hiiI0Uig+ZmZmMHj2apKSkY5PEtWzZ0pKACYRwlqrcCbwKnCEi7YA/VPVlzyOLETZSKPatXbuWhg0b8tBDD9G8eXOuvfZav0MyJqLCGTV0I7AU6ATcCCwRkRu8DiwWWJNQ7Js8eTJ169YlPT2dOXPmsGDBAsqUsQX4TLCE00fQH7hUVXcBiEgp4N/APC8Di3bWJBTbVBURISkpiU6dOjF27FhKlSrld1jG+CKcRHBCVhJw7SXMRe/jmTUJxabffvuNAQMGkJCQwLBhw2jatClNmzb1OyxjfBXOBf0DEVkkIt1FpDvwPrDQ27CimzUJxaaPP/6Y2rVrM2rUKA4ePGiTxBnjCqez+GFgClDbfUxV1Ue9DixaWZNQ7Dlw4AB33XXXsemh//Of/zBhwgSbJM4YV17rEVQFRgKVgTVAP1XdHqnAopU1CcWeHTt28Morr9CvXz+eeeYZTj31VL9DMiaq5FUjmAH8A7geZwbS8QX9cBFpLSIbRCRNRB7Lo9z1IqIiklLQY0SSNQnFjt27dzN+vPMnW6NGDTZt2sSIESMsCRiTg7wSQXFVnaaqG1R1JFChIB8sIgnABJxlLZOBm0UkOYdyxYH7gSUF+Xw/2DQS0U9VmTNnDklJSTz00EPHJomzEUHG5C6vRFBMRC4WkboiUhc4Jdt+fuoBaaqarqqHgdeBDjmU+zswDPijwNFHkNUGot/WrVtp3749Xbt2pUqVKqxYscImiTMmDHkNH90BjA7Z3xmyr8CV+Xz2BcDWkP1tQP3QAm5CKauq74vIw7l9kIj0BHoClCsX+YuwdRBHv4yMDK644gp27tzJmDFjuPfee0lISPA7LGNiQl4L03i6Arc7ed1ooHt+ZVV1KjAVICUlJeJj/qyDOHpt2rSJsmXLkpiYyJQpU6hUqRKVKtniecYUhJc3hm0Hyobsl3Gfy1IcqAV8LCKbgMuABdHaYWxNQtElIyODkSNHkpSUxMSJEwFo0aKFJQFjjoOXE6x/BVQVkYo4CaAz0CXrRVU9AJTM2heRj3GGqC7zMCYTB1avXk1qairLli2jQ4cOXH/99X6HZExM86xGoKoZQB9gEfAN8IaqrhORQSJyjVfHLWq28lh0mThxIpdccgmbN29m7ty5zJ8/n9KlS/sdljExLd8agTi3X3YFKqnqIHe94vNUdWl+71XVhWSbjkJVB+RS9oqwIo4wGzIaHbImiatVqxadO3dmzJgxlCxZMv83GmPyFU7T0ETgKM4ooUHAL8BbwKUexhUVbMio/3799VeefPJJEhMTGTFiBE2aNKFJkyZ+h2VMXAmnaai+qt6DO85fVfcBJ3kaVZSw2oC/PvroIy688ELGjh3LoUOHbJI4YzwSTiI44t4lrHBsPYKjnkYVBaw24J/9+/dz55130qJFCxITE1m8eDHjxo2zSeKM8Ug4iWAcMB84R0SeBf4fMMTTqKKA1Qb889NPP/H666/z6KOPsmrVKho3bux3SMbEtXz7CFT1VRH5GmgOCHCtqn7jeWRRwGoDkZN18b///vupXr06mzZtss5gYyIknDWLywG/Ae8BC4Bf3eeMKTRV5ZVXXiE5OZlHHnmE77//HsCSgDERFE7T0Ps401G/D3wEpAP/9DIov9m9A5GxZcsW2rZtS7du3ahevTorV66katWqfodlTOCE0zR0Yei+O1Fcb88iigLWP+C9rEnidu3axbhx4+jdu7dNEmeMTwo8xYSqLheR+vmXjG3WP+CN9PR0ypcvT2JiItOmTaNy5cpUqFDB77CMCbRw+gj6hjz6icgc4McIxOYLaxbyRkZGBsOGDSM5OZkJEyYA0Lx5c0sCxkSBcGoExUO2M3D6Ct7yJhz/WbNQ0Vu5ciWpqaksX76cjh070qlTJ79DMsaEyDMRuDeSFVfVfhGKJypYs1DReeGFF3jwwQcpUaIE8+bNs5lCjYlCuTYNiUiiqmYCjSIYj4kTWdNB1K5dm65du7J+/XpLAsZEqbxqBEuBusBKEVkAvAn8mvWiqr7tcWwmBh08eJD+/ftz4oknMnLkSJskzpgYEM59BMWAvTizj7YD2rs/jfmTf/3rX9SqVYvx48dz5MgRmyTOmBiRV43gHBHpC6zFmXAudMYv+x9ujtm3bx99+/Zl5syZVK9encWLF3P55Zf7HZYxJkx51QgSgNPdR/GQ7axH3LGho8dn165dzJs3j8cff5yVK1daEjAmxuRVI9ihqoMiFkkUsKGj4du5cyevvfYaDz744LFJ4kqUKOF3WMaY45BXjSBQk7/b+gPhUVVmzZpFcnIyjz/++LFJ4iwJGBO78koEzSMWhc/mLNnCE/PXAFYbyMumTZto3bo13bt3Jzk52SaJMyZO5No0pKqBaSzPahIa0vFCqw3kIiMjg2bNmrFnzx4mTJhAr169OOGEcAadGWOiXYEnnYtX1iSUs7S0NCpWrEhiYiIzZsygUqVKlC9f3u+wjDFFyL7SmRwdOXKEIUOGULNmzWOTxDVr1sySgDFxyGoE5i+WL19OamoqK1eupFOnTtx0001+h2SM8ZDVCMyfjBs3jnr16rFz507efvtt3njjDc4991y/wzLGeCjwicBuInNkTQdx8cUXc+utt7J+/Xo6duzoc1TGmEgIfNNQ0G8i++WXX3j88cc5+eSTGTVqFI0bN6Zx48Z+h2WMiaDA1wgguCOGPvjgA2rVqsXEiRNRVZskzpiACnQiCGqz0N69e7ntttto06YNp512Gp999hmjR49GJFA3kxtjXIFOBEFtFtq7dy/z58/nqaeeYsWKFTRo0MDvkIwxPvI0EYhIaxHZICJpIvJYDq/3FZH1IrJaRD4SkYgPUg9Ks9COHTsYOXIkqkq1atXYvHkzgwYN4uSTT/Y7NGOMzzxLBO56xxOANkAycLOIJGcrtgJIUdXawDxguFfxBJWqMmPGDJKSknjqqadIS0sD4KyzzvI5MmNMtPCyRlAPSFPVdFU9DLwOdAgtoKr/VdXf3N0vgTIexhM4GzdupGXLlqSmpnLRRRexatUqmyTOGPMXXg4fvQDYGrK/DaifR/lU4J85vSAiPYGeAOXKxX8zTlHIyMjgyiuvZO/evUyaNImePXvaJHHGmBxFxX0EInILkAI0zel1VZ0KTAVISUkpkjGOoesPxJPvv/+eSpUqkZiYyEsvvUTlypUpW7as32EZY6KYl18RtwOhV6Ay7nN/IiItgP7ANap6yMN4/iTeRgwdOXKEwYMHU6tWLV544QUArrjiCksCxph8eVkj+AqoKiIVcRJAZ6BLaAERuRiYArRW1V0expKjeBkxtGzZMlJTU1m9ejWdO3fm5ptv9jskY0wM8axGoKoZQB9gEfAN8IaqrhORQSJyjVtsBHA68KaIrBSRBV7FE6+ef/556tevz549e3j33Xd57bXXOOecc/wOyxgTQzztI1DVhcDCbM8NCNlu4eXx45mqIiKkpKSQmprK8OHDOfPMM/0OyxgTg6Kis9iE7+eff+bRRx+lWLFijBkzhkaNGtGoUSO/wzLGxDAbTxhDFi5cSM2aNZk6dSqJiYk2SZwxpkgEMhHE2mRze/bs4ZZbbqFt27acccYZfP7554wYMcImiTPGFIlAJoJYGzq6b98+3nvvPZ5++mmWL19O/fp53ZdnjDEFE9g+gmgfOrp9+3ZeffVVHn74YapWrcrmzZutM9gY44lA1giimaoybdo0kpOTGThwID/88AOAJQFjjGcsEUSRH374gebNm9OzZ0/q1q3L6tWrqVKlit9hGWPiXOASQbR2FGdkZNC8eXOWLVvGlClT+OijjywJGGMiInB9BNHWUbxhwwYqV65MYmIis2bNonLlypQpY7NxG2MiJ3A1AoiOjuLDhw/zzDPPcOGFFzJhwgQAmjZtaknAGBNxgasRRIOlS5eSmprK2rVr6dKlC127dvU7JGNMgAWyRuCnsWPH0qBBg2P3Brz66quULFnS77CMMQFmiSBCsqaDqFevHj169GDdunW0a9fO56iMMcaahjx34MABHnnkEU455RTGjh1Lw4YNadiwod9hGWPMMVYj8NB7771HcnIy06dP5+STT7ZJ4owxUSlQiSBS9xDs3r2bLl26cM0111CiRAm+/PJLhg0bZpPEGWOiUqASQaTuIThw4AALFy7kmWeeYdmyZVx66aWeHs8YYwojcH0EXt1DsHXrVl555RUee+wxqlSpwubNmznjjDOK/DjGGFPUAlUj8MLRo0eZPHkyNWvWZPDgwccmibMkYIyJFZYICuH777/nyiuv5O6776ZevXqsWbPG5gcyxsScwDUNFZWMjAyuuuoq9u/fz4svvsjtt99uncHGmJhkiaCAvvnmG6pWrUpiYiKzZ8+mcuXKlC5d2u+wjDHmuFnTUJgOHTrE008/Te3atXnhhRcAaNy4sSUBY0zMsxpBGL788ktSU1NZv3493bp1o1u3bn6HZIwxRcZqBPkYNWoUDRs25JdffmHhwoW8/PLLlChRwu+wjDGmyFgiyMXRo0cBaNCgAb169WLt2rW0adPG56iMMaboWdNQNvv37+ehhx7i1FNPZfz48TZJnDEm7gWmRhDOPEPvvPMOycnJzJo1i+LFi9skccaYQAhMIshrnqFdu3Zx44030rFjR84991yWLl3KkCFD7L4AY0wgBCYRQO7zDP388898+OGHPPvssyxdupS6dev6EJ0xxvgjsH0EW7ZsYfbs2TzxxBNUqVKFLVu2ULx4cb/DMsaYiPO0RiAirUVkg4ikichjObx+sojMdV9fIiIVvIwHnNFAEydOpGbNmgwZMuTYJHGWBIwxQeVZIhCRBGAC0AZIBm4WkeRsxVKBfapaBRgDDPMqHoDff/+NK664gnvuuYcGDRqwbt06myTOGBN4XtYI6gFpqpquqoeB14EO2cp0AGa52/OA5uJRD62qsnr1atasWcNLL73EokWLqFChgheHMsaYmOJlH8EFwNaQ/W1A/dzKqGqGiBwASgB7QguJSE+gJ0C5cse3qEzNC87grPq1GPjses4///zj+gxjjIlHMdFZrKpTgakAKSkpxzW4/+n2NYGaRRmWMcbEBS+bhrYDZUP2y7jP5VhGRBKBM4C9HsZkjDEmGy8TwVdAVRGpKCInAZ2BBdnKLABuc7dvAP6jdjuvMcZElGdNQ26bfx9gEZAAzFDVdSIyCFimqguAF4HZIpIG/B9OsjDGGBNBnvYRqOpCYGG25waEbP8BdPIyBmOMMXkL1BQTxhhj/soSgTHGBJwlAmOMCThLBMYYE3ASa6M1RWQ3sPk4316SbHctB4CdczDYOQdDYc65vKqWyumFmEsEhSEiy1Q1xe84IsnOORjsnIPBq3O2piFjjAk4SwTGGBNwQUsEU/0OwAd2zsFg5xwMnpxzoPoIjDHG/FXQagTGGGOysURgjDEBF5eJQERai8gGEUkTkcdyeP1kEZnrvr5ERCr4EGaRCuOc+4rIehFZLSIfiUh5P+IsSvmdc0i560VERSTmhxqGc84icqP7b71OROZEOsaiFsbfdjkR+a+IrHD/vq/2I86iIiIzRGSXiKzN5XURkXHu72O1iNQt9EFVNa4eOFNe/wBUAk4CVgHJ2cr0Bia7252BuX7HHYFzbgac6m7fHYRzdssVBxYDXwIpfscdgX/nqsAK4Cx3/xy/447AOU8F7na3k4FNfsddyHNuAtQF1uby+tXAPwEBLgOWFPaY8VgjqAekqWq6qh4GXgc6ZCvTAZjlbs8DmouIRDDGopbvOavqf1X1N3f3S5wV42JZOP/OAH8HhgF/RDI4j4Rzzj2ACaq6D0BVd0U4xqIWzjkr8Dd3+wzgxwjGV+RUdTHO+iy56QC8rI4vgTNFpFALscdjIrgA2Bqyv819LscyqpoBHABKRCQ6b4RzzqFScb5RxLJ8z9mtMpdV1fcjGZiHwvl3rgZUE5HPRORLEWkdsei8Ec45DwRuEZFtOOuf3BuZ0HxT0P/v+YqJxetN0RGRW4AUoKnfsXhJRE4ARgPdfQ4l0hJxmoeuwKn1LRaRC1V1v59BeexmYKaqjhKRBjirHtZS1aN+BxYr4rFGsB0oG7Jfxn0uxzIikohTndwbkei8Ec45IyItgP7ANap6KEKxeSW/cy4O1AI+FpFNOG2pC2K8wzicf+dtwAJVPaKqG4HvcBJDrArnnFOBNwBU9QugGM7kbPEqrP/vBRGPieAroKqIVBSRk3A6gxdkK7MAuM3dvgH4j7q9MDEq33MWkYuBKThJINbbjSGfc1bVA6paUlUrqGoFnH6Ra1R1mT/hFolw/rbfwakNICIlcZqK0iMYY1EL55y3AM0BRCQJJxHsjmiUkbUAuNUdPXQZcEBVdxTmA+OuaUhVM0SkD7AIZ8TBDFVdJyKDgGWqugB4Eaf6mIbTKdPZv4gLL8xzHgGcDrzp9otvUdVrfAu6kMI857gS5jkvAlqKyHogE3hYVWO2thvmOT8ETBORB3E6jrvH8hc7EXkNJ5mXdPs9ngZOBFDVyTj9IFcDacBvwO2FPmYM/76MMcYUgXhsGjLGGFMAlgiMMSbgLBEYY0zAWSIwxpiAs0RgjDEBZ4nARCURyRSRlSGPCnmUPVgEx5spIhvdYy1371At6GdMF5Fkd/uJbK99XtgY3c/J+r2sFZH3ROTMfMrXifXZOI33bPioiUoiclBVTy/qsnl8xkzgH6o6T0RaAiNVtXYhPq/QMeX3uSIyC/hOVZ/No3x3nFlX+xR1LCZ+WI3AxAQROd1dR2G5iKwRkb/MNCoi54vI4pBvzI3d51uKyBfue98Ukfwu0IuBKu57+7qftVZEHnCfO01E3heRVe7zN7nPfywiKSIyFDjFjeNV97WD7s/XRaRtSMwzReQGEUkQkREi8pU7x/xdYfxavsCdbExE6rnnuEJEPheR6u6duIOAm9xYbnJjnyEiS92yOc3YaoLG77m37WGPnB44d8WudB/zce6C/5v7WkmcuyqzarQH3Z8PAf3d7QSc+YZK4lzYT3OffxQYkMPxZgI3uNudgCXAJcAa4DScu7LXARcD1wPTQt57hvvzY9w1D7JiCimTFWNHYJa7fRLOLJKnAD2BJ93nTwaWARVziPNgyPm9CbR29/8GJLrbLYC33O3uwAsh7x8C3OJun4kzF9Fpfv9728PfR9xNMWHixu+qWidrR0ROBIaISBPgKM434XOBnSHv+QqY4ZZ9R1VXikhTnMVKPnOn1jgJ55t0TkaIyJM489Sk4sxfM19Vf3VjeBtoDHwAjBKRYTjNSZ8W4Lz+CTwvIicDrYHFqvq72xxVW0RucMudgTNZ3MZs7z9FRFa65/8N8GFI+VkiUhVnmoUTczl+S+AaEenn7hcDyrmfZQLKEoGJFV2BUsAlqnpEnBlFi4UWUNXFbqJoC8wUkdHAPuBDVb05jGM8rKrzsnZEpHlOhVT1O3HWOrgaGCwiH6nqoHBOQlX/EJGPgVbATTgLrYCz2tS9qroon4/4XVXriMipOPPv3AOMw1mA57+q2tHtWP84l/cLcL2qbggnXhMM1kdgYsUZwC43CTQD/rLmsjjrMP+kqtOA6TjL/X0JNBKRrDb/00SkWpjH/BS4VkROFZHTcJp1PhWR0sBvqvoKzmR+Oa0Ze8StmeRkLs5EYVm1C3Au6ndnvUdEqrnHzJE6q83dBzwk/5tKPWsq4u4hRX/BaSLLsgi4V9zqkTiz0pqAs0RgYsWrQIqIrAFuBb7NocwVwCoRWYHzbft5Vd2Nc2F8TURW4zQL1QjngKq6HKfvYClOn8F0VV0BXAgsdZtongYG5/D2qcDqrM7ibP6FszDQv9VZfhGcxLUeWC7OouVTyKfG7sayGmdhluHAc+65h77vv0ByVmcxTs3hRDe2de6+CTgbPmqMMQFnNQJjjAk4SwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiAs0RgjDEBZ4nAGGMC7v8DPMojSB0aO1YAAAAASUVORK5CYII=",
498 |             "text/plain": [
499 |               "<Figure size 432x288 with 1 Axes>"
500 |             ]
501 |           },
502 |           "metadata": {
503 |             "needs_background": "light"
504 |           },
505 |           "output_type": "display_data"
506 |         }
507 |       ],
508 |       "source": [
509 |         "# plot ROC curve\n",
510 |         "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n",
511 |         "fig = plt.figure(figsize=(6, 4))\n",
512 |         "# Plot the diagonal 50% line\n",
513 |         "plt.plot([0, 1], [0, 1], 'k--')\n",
514 |         "# Plot the FPR and TPR achieved by our model\n",
515 |         "plt.plot(fpr, tpr)\n",
516 |         "plt.xlabel('False Positive Rate')\n",
517 |         "plt.ylabel('True Positive Rate')\n",
518 |         "plt.title('ROC Curve')"
519 |       ]
520 |     }
521 |   ],
522 |   "metadata": {
523 |     "interpreter": {
524 |       "hash": "f2b2cd046deda8eabef1e765a11d0ec9aa9bd1d31d56ce79c815a38c323e14ec"
525 |     },
526 |     "kernel_info": {
527 |       "name": "python38-azureml"
528 |     },
529 |     "kernelspec": {
530 |       "display_name": "Python 3.9.5 ('base')",
531 |       "language": "python",
532 |       "name": "python3"
533 |     },
534 |     "language_info": {
535 |       "codemirror_mode": {
536 |         "name": "ipython",
537 |         "version": 3
538 |       },
539 |       "file_extension": ".py",
540 |       "mimetype": "text/x-python",
541 |       "name": "python",
542 |       "nbconvert_exporter": "python",
543 |       "pygments_lexer": "ipython3",
544 |       "version": "3.9.5"
545 |     },
546 |     "nteract": {
547 |       "version": "nteract-front-end@1.0.0"
548 |     }
549 |   },
550 |   "nbformat": 4,
551 |   "nbformat_minor": 0
552 | }
553 | 


--------------------------------------------------------------------------------
/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Online Hosted Instructions
 3 | permalink: index.html
 4 | layout: home
 5 | ---
 6 | 
 7 | # MLOps Challenges
 8 | 
 9 | This repository contains hands-on challenges for end-to-end machine learning operations (MLOps) with Azure Machine Learning.
10 | 
11 | To complete these exercises, you’ll need a Microsoft Azure subscription. If your instructor has not provided you with one, you can sign up for a free trial at [https://azure.microsoft.com](https://azure.microsoft.com/).
12 | 
13 | ## Challenges
14 | 
15 | {% assign challenge = site.pages | where_exp:"page", "page.url contains '/documentation'" %}
16 | | Module | Challenge |
17 | | --- | --- | 
18 | {% for activity in challenge  %}| {{ activity.challenge.module }} | [{{ activity.challenge.challenge }}{% if activity.challenge.type %} - {{ activity.challenge.type }}{% endif %}]({{ site.github.url }}{{ activity.url }}) |
19 | {% endfor %}


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | pythonpath = src
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==7.1.2
2 | mlflow==1.27.0
3 | pandas==1.4.3
4 | sklearn==0.0
5 | scikit-learn==1.1.1
6 | 


--------------------------------------------------------------------------------
/src/job.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 2 | code: model
 3 | command: >-
 4 |   python 
 5 |   --
 6 |   --
 7 | inputs:
 8 |   training_data: 
 9 |     type: uri_folder 
10 |     path: 
11 |   reg_rate: 0.01
12 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
13 | compute: 
14 | experiment_name: 
15 | description: 


--------------------------------------------------------------------------------
/src/model/train.py:
--------------------------------------------------------------------------------
 1 | # Import libraries
 2 | 
 3 | import argparse
 4 | import glob
 5 | import os
 6 | 
 7 | import pandas as pd
 8 | 
 9 | from sklearn.linear_model import LogisticRegression
10 | 
11 | 
12 | # define functions
13 | def main(args):
14 |     # TO DO: enable autologging
15 | 
16 | 
17 |     # read data
18 |     df = get_csvs_df(args.training_data)
19 | 
20 |     # split data
21 |     X_train, X_test, y_train, y_test = split_data(df)
22 | 
23 |     # train model
24 |     train_model(args.reg_rate, X_train, X_test, y_train, y_test)
25 | 
26 | 
27 | def get_csvs_df(path):
28 |     if not os.path.exists(path):
29 |         raise RuntimeError(f"Cannot use non-existent path provided: {path}")
30 |     csv_files = glob.glob(f"{path}/*.csv")
31 |     if not csv_files:
32 |         raise RuntimeError(f"No CSV files found in provided data path: {path}")
33 |     return pd.concat((pd.read_csv(f) for f in csv_files), sort=False)
34 | 
35 | 
36 | # TO DO: add function to split data
37 | 
38 | 
39 | def train_model(reg_rate, X_train, X_test, y_train, y_test):
40 |     # train model
41 |     LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train)
42 | 
43 | 
44 | def parse_args():
45 |     # setup arg parser
46 |     parser = argparse.ArgumentParser()
47 | 
48 |     # add arguments
49 |     parser.add_argument("--training_data", dest='training_data',
50 |                         type=str)
51 |     parser.add_argument("--reg_rate", dest='reg_rate',
52 |                         type=float, default=0.01)
53 | 
54 |     # parse args
55 |     args = parser.parse_args()
56 | 
57 |     # return args
58 |     return args
59 | 
60 | # run script
61 | if __name__ == "__main__":
62 |     # add space in logs
63 |     print("\n\n")
64 |     print("*" * 60)
65 | 
66 |     # parse args
67 |     args = parse_args()
68 | 
69 |     # run main function
70 |     main(args)
71 | 
72 |     # add space in logs
73 |     print("*" * 60)
74 |     print("\n\n")
75 | 


--------------------------------------------------------------------------------
/tests/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore = 
 3 |     W504,
 4 |     C901,
 5 |     E41,
 6 |     E722,
 7 |     W,
 8 |     D,
 9 |     F,
10 |     N,
11 |     C,
12 |     I
13 | max-line-length = 79
14 | exclude = 
15 |     .tox,
16 |     .git,
17 |     __pycache__,
18 |     *.pyc,
19 |     *.egg-info,
20 |     .cache,
21 |     .eggs,
22 |     develop
23 | per-file-ignores =
24 |     src/__init__.py:D104
25 |     src/*/__init__.py:D104
26 | max-complexity = 10
27 | import-order-style = pep8


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MicrosoftLearning/mslearn-mlops/a103a1bcdc53849e30c8e1952cb1321db97b7248/tests/__init__.py


--------------------------------------------------------------------------------
/tests/datasets/first.csv:
--------------------------------------------------------------------------------
 1 | index,first,last
 2 | 0,Glenn,Hernandez
 3 | 1,Sarah,Pedersen
 4 | 2,Jill,Tracy
 5 | 3,Melissa,Nelson
 6 | 4,Hugh,Soto
 7 | 5,Frank,Dees
 8 | 6,Vita,Singleton
 9 | 7,James,Papenfuss
10 | 8,Mary,Smithson
11 | 9,Bonnie,Begor
12 | 


--------------------------------------------------------------------------------
/tests/datasets/foo.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | current_directory = os.path.dirname(os.path.abspath(__file__))
4 | 
5 | print(current_directory)
6 | 


--------------------------------------------------------------------------------
/tests/datasets/second.csv:
--------------------------------------------------------------------------------
 1 | index,first,last
 2 | 0,Tina,Holloway
 3 | 1,Katherine,Logan
 4 | 2,Juan,Duncan
 5 | 3,Doyle,Clyne
 6 | 4,Jacob,Kazin
 7 | 5,Kimberly,Tomes
 8 | 6,Lisa,Cochrane
 9 | 7,Troy,Hall
10 | 8,Erin,Johnson
11 | 9,Joan,Laborde
12 | 


--------------------------------------------------------------------------------
/tests/test_train.py:
--------------------------------------------------------------------------------
 1 | from model.train import get_csvs_df
 2 | import os
 3 | import pytest
 4 | 
 5 | 
 6 | def test_csvs_no_files():
 7 |     with pytest.raises(RuntimeError) as error:
 8 |         get_csvs_df("./")
 9 |     assert error.match("No CSV files found in provided data")
10 | 
11 | 
12 | def test_csvs_no_files_invalid_path():
13 |     with pytest.raises(RuntimeError) as error:
14 |         get_csvs_df("/invalid/path/does/not/exist/")
15 |     assert error.match("Cannot use non-existent path provided")
16 | 
17 | 
18 | def test_csvs_creates_dataframe():
19 |     current_directory = os.path.dirname(os.path.abspath(__file__))
20 |     datasets_directory = os.path.join(current_directory, 'datasets')
21 |     result = get_csvs_df(datasets_directory)
22 |     assert len(result) == 20
23 | 


--------------------------------------------------------------------------------